#' Multi-stage MCMC Bayesian Method for DMR Detection
#'
#' @description
#' This function implements a multi-stage MCMC Bayesian method for detecting differentially methylated regions (DMRs)
#' between cancer and normal groups. It uses the ASGN model for parameter estimation and provides both Bayes Factor
#' and p-value based testing.
#'
#' @param cancer_data A matrix of methylation data for the cancer group (rows: regions, columns: samples)
#' @param normal_data A matrix of methylation data for the normal group (rows: regions, columns: samples)
#' @param stage The starting stage for multi-stage analysis (default: 1)
#' @param max_stages Maximum number of stages (default: 3)
#' @param num_splits Number of splits for the data in each stage (default: 10)
#' @param test Type of test to use: "BF" for Bayes Factor, "pvalue" for p-value (default: "BF")
#' @param mcmc A list of MCMC parameters (default: NULL, uses function defaults)
#' @param priors_cancer Prior parameters for the cancer group (default: NULL, uses function defaults)
#' @param priors_normal Prior parameters for the normal group (default: NULL, uses function defaults)
#' @param bf_thresholds Bayes Factor thresholds for each stage (default: NULL, uses function defaults)
#' @param pvalue_thresholds p-value thresholds for each stage (default: NULL, uses function defaults)
#' @param return_mcmc Logical indicating whether to return MCMC samples for diagnostic purposes (default: FALSE)
#'
#' @details
#' This function implements a multistage MCMC Bayesian approach for
#' DMR detection. It recursively splits genomic regions and applies Bayesian
#' testing. This function supports both Bayes Factor and Anderson-Darling tests
#' for significance assessment. The algorithm begins by analyzing entire chromosomal
#' regions, then recursively splits significant regions into smaller sub-regions
#' for analysis, stopping when either maximum stages reached or no significant 
#' differences are detected. 
#'
#' @return A list containing DMR detection results and, if requested, MCMC samples.
#'
#' Returns NULL if no significant DMRs are detected.
#'
#' @examples  
#' \donttest{
#' # Load the datasets
#' data(cancer_demo)
#' data(normal_demo)
#'
#' priors=list(alpha = 1,mu = 1,sigma2 = 1)
#' 
#' mcmc = list(nburn = 5000, niter = 10000, thin = 5) 
#' 
#' set.seed(2021)
#' rst <- mmcmcBayes(cancer_demo, normal_demo, 
#'                  stage = 1,max_stages = 2,num_splits = 5,
#'                  test = "BF", priors_cancer = NULL, priors_normal = NULL,
#'                  bf_thresholds = list(stage1 = 10, stage2 = 10.3, stage3 = 10.3),
#'                  return_mcmc = TRUE)
#' print(rst$dmrs)
#' }
#'
#' @author 
#' Zhexuan Yang, Duchwan Ryu, and Feng Luan
#' 
#' @seealso
#' Helper functions in this package:
#' \code{\link{asgn_func}} for parameter estimation,
#' \code{\link{traceplot_asgn}} for MCMC diagnostics,
#' \code{\link{compare_dmrs}} for result comparison
#'
#' @export
mmcmcBayes <- function(cancer_data, normal_data,
                       stage = 1, max_stages = 3,
                       num_splits = 10, 
                       test = "BF", 
                       mcmc = NULL, 
                       priors_cancer = NULL, 
                       priors_normal = NULL, 
                       bf_thresholds = NULL, 
                       pvalue_thresholds = NULL,
                       return_mcmc = FALSE) { 
  # Input validation
  if (!is.data.frame(cancer_data) || !is.data.frame(normal_data)) {
    stop("Both cancer_data and normal_data must be data frames")
  }
  
  required_cols <- c("CpG_ID", "Chromosome")
  if (!all(required_cols %in% colnames(cancer_data)) ||
      !all(required_cols %in% colnames(normal_data))) {
    stop("Both datasets must contain 'CpG_ID' and 'Chromosome' columns")
  }
  
  if (stage < 1 || stage > max_stages) {
    stop("Invalid stage parameter. Must be between 1 and max_stages")
  }
  
  if (!test %in% c("BF", "AD")) {
    stop("Test parameter must be either 'BF' or 'AD'")
  }
  
  ####### Compute Mean Methylation #######
  calMean <- function(data) {
    data <- as.data.frame(data)
    required_cols <- c("CpG_ID", "Chromosome")
    
    if (!all(required_cols %in% colnames(data))) {
      stop("Missing metadata columns in methylation data")
    }
    
    data_numeric <- data[, !(colnames(data) %in% required_cols), drop = FALSE]
    if (ncol(data_numeric) == 0) {
      stop("No methylation data columns found")
    }
    
    mean_meth <- colMeans(data_numeric, na.rm = TRUE)
    return(matrix(mean_meth, ncol = 1))
  }
  
  ####### Compute ASGN Density #######
  asgn_density <- function(x, alpha, mu, sigma2) {
    # Input validation
    if (sigma2 <= 0) {
      stop("sigma2 must be positive")
    }
    
    numer <- sqrt(2) * ((1 - alpha * x)^2 + 1)
    denom <- 4 * gamma(3/2) * (alpha^2) + 4 * gamma(1/2)
    e <- exp(-((x - mu)^2) / (2 * sigma2))
    return((numer / denom) * e)
  }
  
  ####### Compute Bayes Factor #######
  calBF <- function(ybar_cancer, ybar_normal, posterior_cancer, posterior_normal) {
    # Input validation
    if (length(posterior_cancer) != 3 || length(posterior_normal) != 3) {
      stop("Posterior parameters must have length 3")
    }
    
    likelihood_cancer <- sapply(ybar_cancer, asgn_density, 
                                alpha = posterior_cancer[1], 
                                mu = posterior_cancer[2], 
                                sigma2 = posterior_cancer[3])
    
    likelihood_normal <- sapply(ybar_normal, asgn_density, 
                                alpha = posterior_normal[1], 
                                mu = posterior_normal[2], 
                                sigma2 = posterior_normal[3])
    
    # Avoid division by zero
    if (sum(likelihood_normal) == 0) {
      return(Inf)
    }
    
    BF <- sum(likelihood_cancer) / sum(likelihood_normal)
    return(BF)
  }
  
  ####### Compute Anderson-Darling Test #######
  calAD <- function(ybar_cancer, ybar_normal, posterior_cancer, posterior_normal) {
    if (!requireNamespace("kSamples", quietly = TRUE)) {
      stop("The 'kSamples' package is required but not installed. Install it using install.packages('kSamples')")
    }
    
    density_cancer <- sapply(ybar_cancer, asgn_density, 
                             alpha = posterior_cancer[1], 
                             mu = posterior_cancer[2], 
                             sigma2 = posterior_cancer[3])
    
    density_normal <- sapply(ybar_normal, asgn_density, 
                             alpha = posterior_normal[1], 
                             mu = posterior_normal[2], 
                             sigma2 = posterior_normal[3])
    
    ad_test_result <- kSamples::ad.test(density_cancer, density_normal)
    p_value <- ad_test_result$ad[2, 3]
    
    return(p_value)
  }
  
  ####### Main Function Logic #######
  
  # Set default parameters
  if (is.null(mcmc)) {
    mcmc <- list(nburn = 5000, niter = 10000, thin = 1)  
  }
  
  # Set default thresholds with proper length checking
  if (is.null(bf_thresholds)) {
    bf_thresholds <- c(10, 15, 20)
  }
  
  if (is.null(pvalue_thresholds)) {
    pvalue_thresholds <- c(1e-4, 1e-6, 1e-8)
  }
  
  # Ensure thresholds have proper length
  if (length(bf_thresholds) < max_stages) {
    bf_thresholds <- c(bf_thresholds, 
                       rep(bf_thresholds[length(bf_thresholds)], 
                           max_stages - length(bf_thresholds)))
  }
  
  if (length(pvalue_thresholds) < max_stages) {
    pvalue_thresholds <- c(pvalue_thresholds, 
                           rep(pvalue_thresholds[length(pvalue_thresholds)], 
                               max_stages - length(pvalue_thresholds)))
  }
  
  total_cpgs <- nrow(cancer_data)
  if (total_cpgs == 0) {
    return(NULL)
  }
  
  ybar_cancer <- tryCatch(
    calMean(cancer_data),
    error = function(e) {
      return(NULL)
    }
  )
  
  ybar_normal <- tryCatch(
    calMean(normal_data),
    error = function(e) {
      return(NULL)
    }
  )
  
  if (is.null(ybar_cancer) || is.null(ybar_normal)) {
    return(NULL)
  }
  
  ####### Run ASGN Function for Both Groups #######
  # MODIFIED: Store full MCMC output if requested
  posterior_cancer <- tryCatch(
    asgn_func(ybar_cancer, priors_cancer, mcmc, return_mcmc = return_mcmc),
    error = function(e) {
      return(NULL)
    }
  )
  
  posterior_normal <- tryCatch(
    asgn_func(ybar_normal, priors_normal, mcmc, return_mcmc = return_mcmc),
    error = function(e) {
      return(NULL)
    }
  )
  
  if (is.null(posterior_cancer) || is.null(posterior_normal)) {
    return(NULL)
  }
  
  ####### Select Statistical Test #######
  decision_value <- NULL
  decision_criteria <- FALSE
  
  if (test == "BF") {
    BF <- tryCatch(
      calBF(ybar_cancer, ybar_normal, posterior_cancer$posteriors, posterior_normal$posteriors),
      error = function(e) {
        return(NA)
      }
    )
    
    if (is.na(BF)) {
      return(NULL)
    }
    
    threshold <- bf_thresholds[stage]
    decision_value <- BF
    decision_criteria <- (BF >= threshold)
    
  } else if (test == "AD") {
    p_val <- tryCatch(
      calAD(ybar_cancer, ybar_normal, posterior_cancer$posteriors, posterior_normal$posteriors),
      error = function(e) {
        return(NA)
      }
    )
    
    if (is.na(p_val)) {
      return(NULL)
    }
    
    threshold <- pvalue_thresholds[stage]
    decision_value <- p_val
    decision_criteria <- (p_val < threshold)
  }
  
  ####### Stopping Condition #######
  if (stage == max_stages || !decision_criteria) {
    if (decision_criteria) {
      detected_DMR <- data.frame(
        Chromosome = cancer_data$Chromosome[1],
        Start_CpG = cancer_data$CpG_ID[1],
        End_CpG = cancer_data$CpG_ID[nrow(cancer_data)],
        CpG_Count = total_cpgs,
        Decision_Value = decision_value,
        Stage = stage,
        stringsAsFactors = FALSE
      )
      
      # MODIFIED: Return MCMC samples if requested
      if (return_mcmc) {
        return(list(
          dmr = detected_DMR,
          mcmc_samples = list(
            cancer = posterior_cancer$mcmc_samples,
            normal = posterior_normal$mcmc_samples
          )
        ))
      } else {
        return(detected_DMR)
      }
    } else {
      return(NULL)
    }
  }
  
  ####### Split Data Into Sub-Segments #######
  # Ensure we don't split into more segments than available CpGs
  actual_splits <- min(num_splits, total_cpgs)
  
  # Create split indices
  split_indices <- split(seq_len(total_cpgs), 
                         cut(seq_len(total_cpgs), 
                             breaks = actual_splits, 
                             labels = FALSE))
  
  cancer_sub_segments <- lapply(split_indices, function(idx) {
    cancer_data[idx, , drop = FALSE]
  })
  
  normal_sub_segments <- lapply(split_indices, function(idx) {
    normal_data[idx, , drop = FALSE]
  })
  
  ####### Process Each Sub-Segment with Updated Priors #######
  results <- list()
  mcmc_results <- list()  # NEW: Store MCMC results for sub-segments
  
  for (i in seq_along(cancer_sub_segments)) {
    new_priors_cancer <- list(
      alpha = posterior_cancer$posteriors[1],
      mu = posterior_cancer$posteriors[2],
      sigma2 = posterior_cancer$posteriors[3]
    )
    
    new_priors_normal <- list(
      alpha = posterior_normal$posteriors[1],
      mu = posterior_normal$posteriors[2],
      sigma2 = posterior_normal$posteriors[3]
    )
    
    segment_result <- mmcmcBayes(
      cancer_sub_segments[[i]], 
      normal_sub_segments[[i]], 
      stage = stage + 1, 
      max_stages = max_stages, 
      num_splits = num_splits, 
      test = test, 
      mcmc = mcmc, 
      priors_cancer = new_priors_cancer, 
      priors_normal = new_priors_normal, 
      bf_thresholds = bf_thresholds, 
      pvalue_thresholds = pvalue_thresholds,
      return_mcmc = return_mcmc  # NEW: Pass the parameter
    )
    
    if (!is.null(segment_result)) {
      # MODIFIED: Handle both regular and MCMC-enabled returns
      if (return_mcmc && is.list(segment_result) && "dmr" %in% names(segment_result)) {
        results[[length(results) + 1]] <- segment_result$dmr
        mcmc_results[[length(mcmc_results) + 1]] <- segment_result$mcmc_samples
      } else {
        results[[length(results) + 1]] <- segment_result
      }
    }
  }
  
  if (length(results) == 0) {
    return(NULL)
  }
  
  # Combine all results
  final_dmrs <- do.call(rbind, results)
  
  # MODIFIED: Return structure depends on whether MCMC samples are requested
  if (return_mcmc) {
    return(list(
      dmrs = final_dmrs,
      mcmc_samples = list(
        current_stage = list(
          cancer = posterior_cancer$mcmc_samples,
          normal = posterior_normal$mcmc_samples
        ),
        sub_segments = mcmc_results
      )
    ))
  } else {
    return(final_dmrs)
  }
}