##
# ------------------------------------------------------------------------
#
# best.sub.size.iid(X, func, B=999, PLT=TRUE, qq=0.75,rep=FALSE, ...) 
#
# ------------------------------------------------------------------------
##
#' @aliases best.sub.size.iid
#' @title Optimal Block Subsampling or MOON Bootstrap Sizes for I.I.D. Data
#' @description This function determines the optimal block size for subsampling 
#' or moon bootstrap sizes 
#' using a distance-based method, specifically applying undersampling techniques 
#' for independent and identically distributed (i.i.d.) data. It computes 
#' Kolmogorov distances between consecutive subsampling (or moon bootstrap) distributions to 
#' select the most suitable block size.
#' @param X A numeric vector or data representing i.i.d. observations.
#' @param func A function pplied to the blocks.
#' @param B An integer; the number of resampling replications.
#'  Default is \eqn{999}.
#' @param PLT Logical. If \code{TRUE} (default), plots the Kolmogorov distances 
#' versus subsampling sizes and intermediate regression results.
#' @param qq A numeric value in the interval \eqn{(0, 1)}. Determines the scaling 
#' factor for subsampling sizes. Higher values result in more subsampling 
#' distributions being computed. Default is \eqn{0.75}.
#' @param rep Logical. If \code{TRUE}, performs moon bootstrap (subsampling with replacement). 
#' If \code{FALSE} (default), performs subsampling without replacement.
#' @param ... Optional additional arguments passed to the \code{func} function.
#' @details This function implements a procedure based on the method proposed by Götze and Račkauskas (2001) and
#' Bickel and Sakov (2008) for determining optimal subsampling sizes in i.i.d. case. It computes 
#' a range of subsampling distributions or moon bootstrap distribution for sizes
#'  proportional to powers of \code{qq}. 
#' The function then evaluates the Kolmogorov distance between consecutive distributions. 
#' The optimal block size is the value which minimises this distance.
#' 
#' Sometimes looking at the plot is more informative, especially when the distance 
#' does not vary very much. In this case, the largest value in a stable zone will
#' be a better choice than the minimiser of the distance.
#' 
#' @return Returns the optimal block size for subsampling or moon bootstrap.
#' @references Bertail, P. and Dudek, A. (2025). \emph{Bootstrap for 
#' Dependent Data, with an R package} (by Bernard Desgraupes and Karolina Marek) - submitted.
#' 
#' Bickel, P., and Sakov, A. (2008). On the choice of m in the m out of n bootstrap and 
#' confidence bounds for extrema. \emph{Statistica Sinica}, \bold{18} 967–985.
#' 
#' Götze, F. and Račkauskas, A. (2001). 
#'  Adaptive choice of bootstrap sample sizes. In \emph{State of the art in probability and statistics.}
#'  Institute of Mathematical Statistics, 
#' pp. 286-310.
#' 
#' @seealso {\code{\link{block.sub}},
#' \code{\link{rate.sub}},
#' \code{\link{rate.block.sub}},
#' \code{\link{best.block.sub.size}}}.
#' @keywords bootstrap
#' @export
#' @examples 
#'  set.seed(12345)
#'  n = 1000 # sample size
#'  ts = rnorm(n)
#'  bopt=best.sub.size.iid(ts,max)
##


best.sub.size.iid<-function(X, func, B=999, PLT=TRUE, qq=0.75,rep=FALSE, ...)  {
  N=length(X)
  if (floor(-log(N)/log(qq))<3)  stop("choose an higher value for qq<1 (default=0.75)")                 
  TN=func(X,...)
  nb=floor(-log(N)/log(qq))
  l_b = floor((1/qq)^(4:nb))
  n_b = length(l_b)
  
  # Estimation of the rate of convergence n^alpha,  that is  alpha
  
  T_table=matrix(0,n_b,B) # matrix with subsampling distribution for subsampling sizes l_b
  
  for (i in 1:n_b)  T_table[i,]=sapply(rep(l_b[i],B), FUN=function(x){func(sample(X,size=x, replace=rep,...))})
  
  dist2= matrix(0,nrow=n_b-1, ncol=2)
  
  for (i in 2:n_b)      {
    i1=i
    i2 =i-1
    
    # Recenter and standardize the subsampling distribution (with the range)
    # Use range instead of variance to have more robust standardisation
    Z1 = T_table[i1,]
    Z2 = T_table[i2,]
    
    #	Z1 = (T[i1,]-TN)/sd(T[i1,])
    #	Z2 = (T[i2,]-TN)/sd(T[i2,])
    
    a=t(cbind(rbind(Z1,1),rbind(Z2,2)))
    a=a[order(a[,1]),]
    Kb_n1n2 = (cumsum(a[,2]==2)-cumsum(a[,2]==1))/B
    
    # compute the Kolmogorov distance between two consecutive moon (or subsampling) distribution  
    MM=max(abs(Kb_n1n2))
    dist2[i2,]=c(l_b[i1],MM)
  }
  
  # Plot distance as a function of subsampling size 
  if (PLT == TRUE) {
    plot(dist2[,1],dist2[,2])
  }
  ind=which.min(dist2[,2])
  
  return(dist2[ind,1])
}
