#' @title A General Length and Information Criterion (LIC) Function
#' @description This function applies the LIC method to find an optimal data subset, supporting various error term distributions like T-distribution and skewed distributions.
#'
#' @details
#' The function iteratively samples subsets from the data, calculates a length criterion (L1) and an information criterion (N), and finds an optimal subset based on the intersection of the best samples from both criteria. It is a general implementation combining the logic of TLIC and SLIC.
#'
#' @param X A numeric design matrix.
#' @param Y A numeric response vector.
#' @param alpha The significance level for criterion calculation, default is 0.05.
#' @param K The number of subsets to sample, default is 10.
#' @param nk The sample size of each subset. If NULL (default), it's calculated as n/K.
#' @param dist_type A character string specifying the assumed error distribution.
#'        Accepts T-distribution types (e.g., "student_t") from the original TLIC,
#'        and skewed types ("skew_normal", "skew_t", "skew_laplace") from SLIC.
#'        Note: In this implementation, the core calculation is robust and does not change based on dist_type. The parameter is kept for consistency with the original functions.
#'
#' @return A list containing the optimal model components:
#' \item{MUopt}{The predicted values for the optimal subset.}
#' \item{Bopt}{The estimated coefficients for the optimal model.}
#' \item{MAEMUopt}{The Mean Absolute Error of the optimal model.}
#' \item{MSEMUopt}{The Mean Squared Error of the optimal model.}
#' \item{opt}{The indices of the optimal data subset.}
#' \item{Yopt}{The response values of the optimal subset.}
#' @export
#'
#' @importFrom stats qt runif
#' @references
#' Guo, G., Song, H. & Zhu, L. The COR criterion for optimal subset selection in distributed estimation. \emph{Statistics and Computing}, 34, 163 (2024). \doi{10.1007/s11222-024-10471-z}
#'
#' Guo, G., Sun, Y., Qian, G., & Wang, Q. (2022). LIC criterion for optimal subset selection in distributed interval estimation. \emph{Journal of Applied Statistics}, 50(9), 1900-1920. \doi{10.1080/02664763.2022.2053949}.
#'
#' Chang, D., Guo, G. (2024). LIC: An R package for optimal subset selection for distributed data. \emph{SoftwareX}, 28, 101909.
#'
#' Jing, G., & Guo, G. (2025). TLIC: An R package for the LIC for T distribution regression analysis. \emph{SoftwareX}, 30, 102132.
#'
#' Chang, D., & Guo, G. Research on Distributed Redundant Data Estimation Based on LIC. \emph{IAENG International Journal of Applied Mathematics}, 55(1), 1-6 (2025).
#'
#' Gao, H., & Guo, G. LIC for Distributed Skewed Regression. \emph{IAENG International Journal of Applied Mathematics}, 55(9), 2925-2930 (2025).
#'
#' Zhang, C., & Guo, G. (2025). The optimal subset estimation of distributed redundant data. \emph{IAENG International Journal of Applied Mathematics}, 55(2), 270-277.
#'
#' Jing, G., & Guo, G. (2025). Student LIC for distributed estimation. \emph{IAENG International Journal of Applied Mathematics}, 55(3), 575-581.
#'
#' Liu, Q., & Guo, G. (2025). Distributed estimation of redundant data. \emph{IAENG International Journal of Applied Mathematics}, 55(2), 332-337.
#' @examples
#' # Example with T-distributed error data (like TLIC)
#' set.seed(12)
#' n <- 200
#' p <- 5
#' X_t <- matrix(stats::runif(n * p), ncol = p)
#' beta_t <- sort(stats::runif(p, 1, 5))
#' e_t <- stats::rt(n, df = 5)
#' Y_t <- X_t %*% beta_t + e_t
#' result_t <- ELIC(X_t, Y_t, dist_type = "student_t")
#' str(result_t)
#'
#' # Example with Skew-Normal error data (like SLIC)
#' if (requireNamespace("sn", quietly = TRUE)) {
#'   set.seed(123)
#'   n <- 200
#'   p <- 5
#'   X_s <- matrix(stats::rnorm(n * p), ncol = p)
#'   beta_s <- stats::runif(p, 1, 2)
#'   e_s <- sn::rsn(n = n, xi = 0, omega = 1, alpha = 5)
#'   Y_s <- X_s %*% beta_s + e_s
#'   result_s <- ELIC(X_s, Y_s, K = 5, dist_type = "skew_normal")
#'   str(result_s)
#' }

ELIC <- function(X, Y, alpha = 0.05, K = 10, nk = NULL, dist_type = "student_t") {

  if (is.null(nk)) {
    nk <- nrow(X) / K
  }
  n <- nrow(X)
  p <- ncol(X)
  N <- L1 <- rep(NA, K)
  Rm <- matrix(rep(0, nk * K), ncol = K)
  mr <- matrix(rep(0, K * nk), ncol = nk)

  for (i in 1:K) {
    mr[i, ] <- sample(1:n, nk, replace = FALSE)
    r <- matrix(c(1:nk, mr[i, ]), ncol = nk, byrow = TRUE)
    Rm[, i] <- r[2,]
    R <- matrix(rep(0, nk * n), ncol = n)
    R[t(r)] <- 1
    X1 <- R %*% X
    Y1 <- R %*% Y
    Hr <- X1 %*% solve(crossprod(X1)) %*% t(X1)
    I1 <- diag(rep(1, nk))
    SY <- sqrt(t(Y1) %*% (I1 - Hr) %*% Y1) / (nk - p)
    C1 <- sum(diag(X1 %*% solve(crossprod(X1)) %*% t(X1))) / nk
    L1[i] <- 2 * SY * C1 * stats::qt(1 - alpha / 2, nk - p)
    N[i] <- det(t(X1) %*% X1)
  }

  opt1 <- Rm[, which.min(L1)]
  opt2 <- Rm[, which.max(N)]
  opt <- intersect(opt1, opt2)

  Yopt <- Y[opt]
  Xopt <- X[opt, , drop = FALSE]

  Bopt <- solve(crossprod(Xopt)) %*% t(Xopt) %*% Yopt
  MUopt <- Xopt %*% Bopt
  Nopt <- length(Yopt)

  E5 <- (t(Yopt - MUopt) %*% (Yopt - MUopt)) / Nopt
  A5 <- sum(abs(Yopt - MUopt)) / Nopt

  return(list(MUopt = MUopt, Bopt = Bopt, MAEMUopt = A5, MSEMUopt = E5, opt = opt, Yopt = Yopt))
}
