#' CSEM, CSSEM, and Reliability under the Compound Binomial Model
#'
#' @description
#' Compute the CSEM, CSSEM, and reliability coefficients for raw scores
#' and scaled scores using the full compound binomial error model.
#'
#' @param x Examinee-by-item matrix/data frame of item responses, ordered by stratum.
#' @param s Numeric vector of number of items in each stratum. Sum(s) must equal ncol(x).
#' @param ct Optional conversion table with maxZ + 1 rows. The second column
#'   is the scale score corresponding to composite score Z = 0, 1, ..., maxZ.
#' @param w Optional numeric vector of weights for each stratum. Defaults to 1 per stratum.
#'
#' @return A list containing:
#' \describe{
#'   \item{x}{Raw total scores (row sums of x).}
#'   \item{total_scale}{If \code{ct} is provided, the composite scale score for each examinee.}
#'   \item{csem}{CSEM on the raw-score metric for each examinee.}
#'   \item{cssem}{If \code{ct} is provided, CSSEM on the scale-score metric.}
#'   \item{reliability_raw}{Reliability coefficient for raw scores.}
#'   \item{reliability_scale}{If \code{ct} is provided, reliability coefficient
#'     for scale scores.}
#' }
#'
#' @examples
#' data(data.m)
#' data(ct.m)
#' csem_compound_binomial(data.m, c(13, 12, 6))
#' \donttest{
#' csem_compound_binomial(data.m, c(13, 12, 6), ct.m)
#' }
#'
#' @export
csem_compound_binomial <- function(x, s, ct = NULL, w = NULL) {

  x <- as.matrix(stats::na.exclude(x))
  ni <- ncol(x)
  n  <- nrow(x)

  if (sum(s) != ni) stop("Sum of `s` must equal the number of items (ncol(x)).")
  M <- length(s)

  if (is.null(w)) w <- rep(1, M)
  if (length(w) != M) stop("Length of `w` must match length of `s` (number of strata).")

  has_scale <- !is.null(ct)
  if (has_scale) {
    ct <- as.data.frame(ct)
  }

  # stratum index boundaries
  ends   <- cumsum(s)
  starts <- c(1, ends[-M] + 1)

  # storage
  raw_csem    <- numeric(n)
  scale_csem  <- if (has_scale) numeric(n) else NULL
  total_raw   <- rowSums(x)
  total_scale <- if (has_scale) numeric(n) else NULL

  # maximum composite score Z = sum_m w_m * s_m
  maxZ <- sum(w * s)

  if (has_scale) {
    if (nrow(ct) != (maxZ + 1L)) {
      stop("`ct` must have maxZ + 1 rows (for composite scores Z = 0:maxZ).")
    }
    # tZ[z + 1] is the scale score for composite score z
    tZ <- ct[, 2]
  }

  for (i in seq_len(n)) {

    # part-test scores and true-score estimates
    Xt     <- numeric(M)
    pi_hat <- numeric(M)

    for (m in seq_len(M)) {
      idx <- starts[m]:ends[m]
      Xt[m] <- sum(x[i, idx])
      pi_hat[m] <- Xt[m] / s[m]
    }

    # ---- raw-score CSEM (compound binomial) ---------------------------------
    vk <- Xt * (s - Xt) / (s - 1)
    raw_csem[i] <- sqrt(sum(vk))

    if (!has_scale) next

    # ---- composite-score distribution for this examinee ---------------------
    # first stratum
    dist_z <- stats::dbinom(0:s[1], s[1], pi_hat[1])
    names(dist_z) <- w[1] * (0:s[1])

    # convolve remaining strata
    for (m in 2:M) {
      dist_next <- stats::dbinom(0:s[m], s[m], pi_hat[m])
      names(dist_next) <- w[m] * (0:s[m])

      z1 <- as.numeric(names(dist_z))
      z2 <- as.numeric(names(dist_next))

      new_vals  <- outer(z1, z2, "+")
      new_probs <- outer(dist_z, dist_next, "*")

      dist_z <- tapply(new_probs, new_vals, sum)
    }

    z_scores <- as.numeric(names(dist_z))
    pz       <- as.numeric(dist_z)

    # expected Z and Z^2
    EZ  <- sum(z_scores * pz)
    EZ2 <- sum(z_scores^2 * pz)

    # ---- Feldt correction constant (eq. 0.84), with 0/0 protection ----------
    A_top <- sum(w^2 * Xt * (s - Xt) / (s - 1))
    A_bot <- sum(w^2 * Xt * (s - Xt) /  s    )

    if (A_top <= .Machine$double.eps && A_bot <= .Machine$double.eps) {
      # no error variance (e.g., perfect or all-zero scores), set C = 1
      C_corr <- 1
    } else {
      C_corr <- sqrt(A_top / A_bot)
    }

    # ---- transform composite distribution to scale metric -------------------
    # scale score for each possible composite score z
    t_vec <- tZ[z_scores + 1]

    Et  <- sum(t_vec * pz)
    Et2 <- sum(t_vec^2 * pz)

    var_t <- Et2 - Et^2
    if (var_t < 0) var_t <- 0 # numerical guard

    scale_csem[i] <- C_corr * sqrt(var_t)

    # total composite raw score for this examinee
    Zi <- sum(w * Xt)
    # corresponding scale score (exact from table)
    total_scale[i] <- tZ[Zi + 1]
  }

  # ---- reliability for raw scores -------------------------------------------
  err_var_raw <- mean(raw_csem^2)
  obs_var_raw <- stats::var(total_raw)

  if (isTRUE(all.equal(obs_var_raw, 0))) {
    reliability_raw <- NA_real_
    warning("Observed variance of raw scores is 0; reliability_raw set to NA.")
  } else {
    reliability_raw <- 1 - err_var_raw / obs_var_raw
  }

  # ---- reliability for scale scores (if available) --------------------------
  if (has_scale) {
    err_var_scale <- mean(scale_csem^2)
    obs_var_scale <- stats::var(total_scale)

    if (isTRUE(all.equal(obs_var_scale, 0))) {
      reliability_scale <- NA_real_
      warning("Observed variance of scale scores is 0; reliability_scale set to NA.")
    } else {
      reliability_scale <- 1 - err_var_scale / obs_var_scale
    }

    return(list(
      x                = total_raw,
      total_scale      = total_scale,
      csem             = raw_csem,
      cssem            = scale_csem,
      reliability_raw  = reliability_raw,
      reliability_scale = reliability_scale
    ))
  }

  # ---- return (raw only) ----------------------------------------------------
  return(list(
    x               = total_raw,
    csem            = raw_csem,
    reliability_raw = reliability_raw
  ))
}
