## 
# ------------------------------------------------------------------------
# 
# "GetPseudoBlocks(x,s,eps_opt, delta_opt, p_XiXip1,m=min(X),M=max(X),func=sum,...)" 
# 
# Compute the division into the blocks for the general Markov chains.
#
# ------------------------------------------------------------------------
##
#' @aliases GetPseudoBlocks
#' @title Computing Pseudo-regenerative Blocks
#' @description The function computes pseudo-regenerative 
#' blocks for general Markov chains. 
#' @param x A numeric vector representing a Markov chain.
#' @param s A real number specifying the center of the small set. 
#' @param eps_opt A numeric value for the size of the small set.
#' @param delta_opt A numeric value for the lower bound in the minorization condition.
#' @param p_XiXip1 A numeric value representing the estimator of the transition density.
#' @param m A numeric value; the lower truncation threshold 
#' Default is the 5th percentile of \code{x}.
#' @param M A numeric value; the upper truncation threshold 
#' Default is the 95th percentile of \code{x}.
#' @param func A function to apply to each block. Default is \code{sum}.
#' @param ... Additional arguments passed to the function \code{func}.
#' @details The function begins by determining which elements of \code{x} are
#'  within an interval \eqn{[s-esp_opt,s+eps_opt]}. Then an estimated Nummelin
#'  splitting trick is performed using the estimators \eqn{p_n(X_i,X_{i+1})}.
#' @return Returns a list containing:
#' \enumerate{
#'   \item A data frame with the following columns:
#'   \itemize{
#'     \item \code{Time} - the index of each observation,
#'     \item \code{x} - values of the process,
#'     \item \code{Bnumber} - block number assigned to each observation,
#'     \item \code{regen} - indicator (1 or 0) of regeneration times. 1 corresponds to
#'     the regeneration time.
#'   }
#'   \item A matrix summarizing block characteristics with the following columns:
#'   \itemize{
#'     \item \code{Block number} - the block index,
#'     \item \code{Block length} - number of observations in the block,
#'     \item \code{Truncated sum} - the value of \code{func} applied to truncated 
#'     observations in the block,
#'     \item \code{Valid points} - number of observations within the truncation thresholds,
#'     \item \code{Winsorized value} - the Winsorized value of \code{func} applied to the block,
#'     \item \code{Start index} - the starting index of the block,
#'     \item \code{End index} -  the ending index of the block.
#'   }
#'   \item \code{Total blocks} - the total number of regeneration blocks.
#' }
#' @references Bertail, P. and Dudek, A. (2025). \emph{Bootstrap for 
#' Dependent Data, with an R package} (by Bernard Desgraupes and Karolina Marek) - submitted. 
#' 
#' Bertail, P. and Clémençon, S. (2006). Regenerative block bootstrap for Markov
#'  chains. \emph{Bernoulli}, \bold{12}, 689-712. 
#'  
#' @seealso \code{\link{findBestEpsilon}}, \code{\link{ftrunc}},
#' \code{\link{regenboot}}, \code{\link{smallEnsemble}}.
#' @keywords "Regenerative Block Bootstrap" "Markov chains" "Small set" "Nummelin splitting trick"
#' @export
#' @examples 
#' \donttest{
#' n=200# the length of the process
#' # Generating the AR(1) process
#' coeff=0.75
#' X = arima.sim(n=n, list(ar = c(coeff)))
#' # Find the small ensemble with the largest number of regeneration
#' sm <- findBestEpsilon(X,s=0,plotIt=FALSE)
#' f =sm$trans
#' eps = sm$epsilon
#' delta = sm$delta
#' m = sm$s
#' Pseudo_blocks=GetPseudoBlocks(X, m, eps_opt = eps, delta_opt = delta, p_XiXip1 = f,func=sum)
#' }

GetPseudoBlocks <- function(x, s, eps_opt, delta_opt, p_XiXip1,m=min(x),M=max(x),func=sum,...) {
  # Check if 'x' is a numeric vector
  
  # Check if 's', 'eps_opt', 'delta_opt', and 'p_XiXip1' are numeric and of length 1
  if (!is.numeric(s) || length(s) != 1) {
    stop("Error: 's' must be a single numeric value.")
  }
  
  if (!is.numeric(eps_opt) || length(eps_opt) != 1) {
    stop("Error: 'eps_opt' must be a single numeric value.")
  }
  
  if (!is.numeric(delta_opt) || length(delta_opt) != 1) {
    stop("Error: 'delta_opt' must be a single numeric value.")
  }
  
  if (!is.numeric(p_XiXip1)) {
    stop("Error: 'p_XiXip1' must be a single numeric value.")
  }
  
  # Proceed with the rest of the function after passing all checks
  n = NROW(x) - 1
  pregen_opt = delta_opt * (x[1:n] >= s - eps_opt) * (x[1:n] <= s + eps_opt) * 
    (x[2:(n + 1)] >= s - eps_opt) * (x[2:(n + 1)] <= s + eps_opt) / p_XiXip1
  regen = c((pregen_opt > runif(n)), 0)
  Bnumber = cumsum(c(0, regen[1:n]))
  
  if (regen[n] == 1) {
    NN = max(Bnumber)
  } else {
    NN = max(Bnumber) - 1
  }
  if (NN<=0) stop(" Not enough regeneration blocks : check stationarity or the choice of the small-set")
  
  l_n = max(Bnumber)
  dataset = cbind(1:(n + 1), x, Bnumber, regen)
  
  Submax = matrix(0, NN, 1)
  lB = matrix(0, NN, 1)
  Submax_1 = matrix(0, NN, 1)
  First = matrix(0, NN, 1)
  
  debX=matrix(0,NN,1)
  finX=matrix(0,NN,1)
  
  for (i in 1:NN) {
    aux = matrix(dataset[dataset[, 3] == i, 1:2], ncol = 2)
    First[i]=sum((aux[,2]<=M)&(m<=aux[,2])) #number of non-truncated points outside [m,M]
    Submax[i]=func(aux[(aux[,2]<=M)&(aux[,2]>=m),2])  # truncated version of function func on blocks  if m and M are different from min and max
    Submax_1[i]=func(aux[(aux[,2]<=M)&(aux[,2]>=m),2])+func((aux[,2]>M)*M) +func((aux[,2]<m)*m) # winzorized value of the function func on blocks (large value replaced by M, small value replace by m)
    lB[i] = nrow(aux)
    debX[i]=aux[1,1]
    finX[i]=aux[nrow(aux),1]
  }  
  Block=cbind(1:NN,lB,Submax,First,Submax_1,debX,finX)
  return(list(dataset, Block, l_n))
}
