#' Descriptive Statistics
#'
#' This function computes summary statistics for one or more than one variable,
#' optionally by a grouping and/or split variable. By default, the function prints
#' the number of observations (\code{n}), number of missing values (\code{nNA}),
#' percentage of missing values (\code{%NA}), number of unique elements after omitting
#' missing values (\code{nUQ}), arithmetic mean (\code{M}), standard deviation
#' (\code{SD}), minimum (\code{Min}), percentage of observations at the minimum
#' (\code{%Min}), maximum (\code{Max}), percentage of observations at the maximum
#' (\code{%Max}), skewness (\code{Skew}), and kurtosis (\code{Kurt}).
#'
#' @param data     a numeric vector or data frame with numeric variables, i.e.,
#'                 factors and character variables are excluded from \code{data}
#'                 before conducting the analysis.
#' @param ...      an expression indicating the variable names in \code{data},
#'                 e.g., \code{descript(dat, x1, x2, x3)}. Note that the operators
#'                 \code{+}, \code{-}, \code{~}, \code{:}, \code{::},
#'                 and \code{!} can also be used to select variables, see 'Details'
#'                 in the \code{\link{df.subset}} function.
#' @param print    a character vector indicating which statistical measures to be
#'                 printed on the console, i.e., \code{n} (number of observations),
#'                 \code{nNA} (number of missing values), \code{pNA} (percentage of
#'                 missing values), \code{nUQ} (number of unique elements after
#'                 omitting missing values), \code{m} (arithmetic mean), \code{se.m}
#'                 (standard error of the arithmetic mean), \code{var} (variance),
#'                 \code{sd} (standard deviation), \code{med} (median),\code{min}
#'                 (minimum), \code{p.min} (percentage of observations at the minimum),
#'                 \code{p25} (25th percentile, first quartile), \code{p75} (75th
#'                 percentile, third quartile), \code{max} (maximum), \code{p.max}
#'                 (percentage of observations at the maximum),\code{range} (range),
#'                 \code{iqr} (interquartile range), \code{skew} (skewness), and
#'                 \code{kurt} (excess kurtosis). The default setting is
#'                 \code{print = c("n", "nNA", "pNA", "nUQ", "m", "sd", "min", "p.min", "max", "p.max", "skew", "kurt")}.
#' @param group    a numeric vector, character vector or factor as grouping variable.
#'                 Alternatively, a character string indicating the variable name
#'                 of the grouping variable in \code{data} can be specified.
#' @param split    a numeric vector, character vector or factor as split variable.
#'                 Alternatively, a character string indicating the variable name
#'                 of the split variable in \code{data} can be specified.
#' @param sample   logical: if \code{TRUE} (default), the univariate sample skewness
#'                 or kurtosis is computed, while the population skewness or kurtosis
#'                 is computed when \code{sample = FALSE}.
#' @param sort.var logical: if \code{TRUE}, output table is sorted by variables when
#'                 specifying \code{group}.
#' @param na.omit  logical: if \code{TRUE}, incomplete cases are removed before
#'                 conducting the analysis (i.e., listwise deletion).
#' @param digits   an integer value indicating the number of decimal places to be
#'                 used.
#' @param as.na    a numeric vector indicating user-defined missing values,
#'                 i.e. these values are converted to \code{NA} before conducting
#'                 the analysis. Note that \code{as.na()} function is only applied
#'                 to \code{data}, but not to \code{group} or \code{split}.
#' @param write    a character string naming a file for writing the output into
#'                 either a text file with file extension \code{".txt"} (e.g.,
#'                 \code{"Output.txt"}) or Excel file with file extension
#'                 \code{".xlsx"}  (e.g., \code{"Output.xlsx"}). If the file
#'                 name does not contain any file extension, an Excel file will
#'                 be written.
#' @param append   logical: if \code{TRUE} (default), output will be appended
#'                 to an existing text file with extension \code{.txt} specified
#'                 in \code{write}, if \code{FALSE} existing text file will be
#'                 overwritten.
#' @param check    logical: if \code{TRUE} (default), argument specification is checked.
#' @param output   logical: if \code{TRUE} (default), output is shown on the console.
#'
#' @details
#' \describe{
#' \item{\strong{Floor and Ceiling Effects}}{This function computes the percentage
#' of observations at both the minimum and maximum to evaluate floor and ceiling
#' effects in continuous variables. Historically, floor or ceiling effects are
#' considered to be present if more than 15% of observations are at the lowest
#' or highest possible score (McHorney & Tarlov, 1995; Terwee et al., 2007).
#' Muthen (2023, see video at 7:58) noted the rule of thumb that linear models
#' should be avoided when the floor or ceiling effect of the outcome variable
#' exceeds 25%.}
#' }
#'
#' @author
#' Takuya Yanagida \email{takuya.yanagida@@univie.ac.at}
#'
#' @seealso
#' \code{\link{ci.mean}}, \code{\link{ci.mean.diff}}, \code{\link{ci.median}},
#' \code{\link{ci.prop}}, \code{\link{ci.prop.diff}}, \code{\link{ci.var}},
#' \code{\link{ci.sd}}, \code{\link{ci.cor}}, \code{\link{freq}},
#' \code{\link{crosstab}}, \code{\link{multilevel.descript}}, \code{\link{na.descript}}.
#'
#' @references
#' McHorney, C. A., & Tarlov, A. R. (1995). Individual-patient monitoring in clinical
#' practice: are available health status surveys adequate?.
#' \emph{Quality of Life Research, 4}(4), 293-307. https://doi.org/10.1007/BF01593882
#'
#' Muthen, B. (2023, Feb. 28). \emph{Mplus Web Talk No. 6 - Using Mplus To Do Dynamic Structural
#' Equation Modeling: Segment 3, Descriptive Analyses} [Video]. YouTube.
#' https://www.statmodel.com/Webtalk6.shtml
#'
#' Rasch, D., Kubinger, K. D., & Yanagida, T. (2011). \emph{Statistics in psychology
#' - Using R and SPSS}. John Wiley & Sons.
#'
#' Terwee, C. B., Bot, S. D., de Boer, M. R., van der Windt, D. A., Knol, D. L.,
#' Dekker, J., Bouter, L. M., & de Vet, H. C. (2007). Quality criteria were proposed
#' for measurement properties of health status questionnaires.
#' \emph{Journal of Clinical Epidemiology, 60}(1), 34-42.
#' https://doi.org/10.1016/j.jclinepi.2006.03.012
#'
#' @return
#' Returns an object of class \code{misty.object}, which is a list with following
#' entries:
#'
#' \item{\code{call}}{function call}
#' \item{\code{type}}{type of analysis}
#' \item{\code{data}}{list with the input specified in \code{data}, \code{group}, and \code{split}}
#' \item{\code{args}}{specification of function arguments}
#' \item{\code{result}}{result table}
#'
#' @export
#'
#' @examples
#' #----------------------------------------------------------------------------
#' # Descriptive statistics
#'
#' # Example 1a: Descriptive statistics for 'mpg', 'cyl', and 'hp'
#' descript(mtcars, mpg, cyl, hp)
#'
#' # Alternative specification without using the '...' argument
#' descript(mtcars[, c("mpg", "cyl", "hp")])
#'
#' # Example 1b: Print all available statistical measures
#' descript(mtcars, mpg, cyl, hp, print = "all")
#'
#' # Example 1c: Print default statistical measures plus median
#' descript(mtcars, mpg, cyl, hp, print = c("default", "med"))
#'
#' #----------------------------------------------------------------------------
#' # Grouping and Split Variable
#'
#' # Example 2a: Grouping variable
#' descript(mtcars, mpg, cyl, hp, group = "vs")
#'
#' # Alternative specification without using the '...' argument
#' descript(mtcars[, c("mpg", "cyl", "hp")], group = mtcars$vs)
#'
#' # Another alternative specification without using the '...' argument
#' descript(mtcars[, c("mpg", "cyl", "hp", "vs")], group = "vs")
#'
#' # Example 2b: Split variable
#' descript(mtcars, mpg, cyl, hp, split = "am")
#'
#' # Alternative specification without using the '...' argument
#' descript(mtcars[, c("mpg", "cyl", "hp")], split = mtcars$am)
#'
#' # Another alternative specification without using the '...' argument
#' descript(mtcars[, c("mpg", "cyl", "hp", "am")], split = "am")
#'
#' # Example 2c: Grouping and split variable
#' descript(mtcars, mpg, cyl, hp, group = "vs", split = "am")
#'
#' # Alternative specification without using the '...' argument
#' descript(mtcars[, c("mpg", "cyl", "hp")], group = mtcars$vs, split = mtcars$am)
#'
#' # Another alternative specification without using the '...' argument
#' descript(mtcars[, c("mpg", "cyl", "hp", "vs", "am")], group = "vs", split = "am")
#'
#' \dontrun{
#' #----------------------------------------------------------------------------
#' # Write Output
#'
#' # Example 3a: Text file
#' descript(mtcars, write = "Descript_Text.txt")
#'
#' # Example 3b: Excel file
#' descript(mtcars, write = "Descript_Excel.xlsx")
#' }
descript <- function(data, ...,
                     print = c("all", "default", "n", "nNA", "pNA", "nUQ", "m", "se.m", "var", "sd", "min", "p.min", "p25", "med", "p75", "max", "p.max", "range", "iqr", "skew", "kurt"),
                     group = NULL, split = NULL, sample = FALSE, sort.var = FALSE, na.omit = FALSE,
                     digits = 2, as.na = NULL, write = NULL, append = TRUE,
                     check = TRUE, output = TRUE) {

  #_____________________________________________________________________________
  #
  # Initial Check --------------------------------------------------------------

  # Check if input 'data' is missing or NULL
  if (isTRUE(missing(data) || is.null(data))) { stop("Please specify a numeric vector or data frame for the argument 'data'", call. = FALSE) }

  #_____________________________________________________________________________
  #
  # Data -----------------------------------------------------------------------

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Data using the argument '...' ####

  if (isTRUE(!missing(...))) {

    # Extract data and convert tibble into data frame or vector
    x <- data[, .var.names(data = data, ..., group = group, split = split), drop = FALSE] |> (\(p) if (isTRUE("tbl" %in% substr(class(p), 1L, 3L))) { if (isTRUE(ncol(as.data.frame(p)) == 1L)) { unname(unlist(p)) } else { as.data.frame(p) } } else { p })()

    # Extract grouping variable and convert tibble into a vector
    if (isTRUE(!is.null(group))) { group <- data[, group] |> (\(y) if (isTRUE("tbl" %in% substr(class(y), 1L, 3L))) { unname(unlist(y)) } else { return(y) })() }

    # Extract splitting variable and convert tibble into a vector
    if (isTRUE(!is.null(split))) { split <- data[, split] |> (\(y) if (isTRUE("tbl" %in% substr(class(y), 1L, 3L))) { unname(unlist(y)) } else { return(y) })() }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Data without using the argument '...' ####

  } else {

    # Data frame
    x <- as.data.frame(data)

    # Remove group and split variable in 'x'
    var.group <- .var.group(data = x, group = group, split = split)

    # Data
    if (isTRUE(!is.null(var.group$data)))  { x <- var.group$data }

    # Grouping variable
    if (isTRUE(!is.null(var.group$group))) { group <- var.group$group }

    # Split variable
    if (isTRUE(!is.null(var.group$split))) { split <- var.group$split }

  }

  # Variables in 'x'
  if (isTRUE(ncol(as.data.frame(x)) == 0L)) { stop("No variable left for analysis after excluding the grouping and/or split variable.", call. = FALSE) }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Exclude Non-Numeric Variables ####

  (!vapply(as.data.frame(x), is.numeric, FUN.VALUE = logical(1L))) |> (\(p) if (isTRUE(any(p))) {

    x <<- as.data.frame(x)[, -which(p), drop = FALSE]

    warning(paste0("Non-numeric variables were excluded from the analysis: ", paste(names(which(p)), collapse = ", ")), call. = FALSE)

    if (isTRUE(ncol(as.data.frame(x)) == 0L)) { stop("No variable left for analysis after excluding non-numeric variables.", call. = FALSE) }

  })()

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Missing Data ####

  #...................
  ### Convert User-Missing Values into NA ####

  if (isTRUE(!is.null(as.na))) { x <- .as.na(x, na = as.na) }

  #...................
  ### Listwise Deletion ####

  # Check input 'na.omit'
  .check.input(logical = "na.omit", envir = environment(), input.check = check)

  if (isTRUE(na.omit && any(is.na(x)))) {

    na.omit(x) |> (\(p) {

        # Listwise deletion
        x <<- p

        # Grouping variable
        if (isTRUE(!is.null(group))) { group <<- group[-attributes(p)$na.action] }

        # Split variable
        if (isTRUE(!is.null(split))) { split <<- split[-attributes(p)$na.action] }

        warning(paste0("Listwise deletion of incomplete data, number of cases removed from the analysis: ", length(attributes(p)$na.action)), call. = FALSE)

      })()

  }

  #_____________________________________________________________________________
  #
  # Input Check ----------------------------------------------------------------

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Check Inputs ####

  .check.input(logical = c("sample", "sort.var", "na.omit", "append", "output"),
               m.character = list(print = c("all", "default", "n", "nNA", "pNA", "m", "nUQ", "se.m", "var", "sd", "min", "p.min", "p25", "med", "p75", "max", "p.max", "range", "iqr", "skew", "kurt")),
               args = c("digits", "write2"), envir = environment(), input.check = check)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Additional Checks ####

  if (isTRUE(check)) {

    #...................
    ### Check input 'group' ####

    if (isTRUE(!is.null(group))) {

      # Input 'group' completely missing
      if (isTRUE(all(is.na(group)))) { stop("The grouping variable specified in 'group' is completely missing.", call. = FALSE) }

      # Grouping variable identical to variable in 'x'
      if (isTRUE(!is.null(group))) { which(sapply(names(x), function(y) identical(group, x[, y]))) |> (\(p) if (isTRUE(length(p) != 0L)) { stop(paste0("Grouping variable is identical to the variable '", names(p)[1L]), "'.") })() }

      # Only one group in 'group'
      if (isTRUE(length(na.omit(unique(group))) == 1L)) { warning("There is only one group represented in the grouping variable specified in 'group'.", call. = FALSE) }

    }

    #...................
    ### Check input 'split' ####

    if (isTRUE(!is.null(split))) {

      # Input 'split' completely missing
      if (isTRUE(all(is.na(split)))) { stop("The split variable specified in 'split' is completely missing.", call. = FALSE) }

      # Split variable identical to variable in 'x'
      if (isTRUE(!is.null(split))) { which(sapply(names(x), function(y) identical(split, x[, y]))) |> (\(p) if (isTRUE(length(p) != 0L)) { stop(paste0("Split variable is identical to the variable '", names(p)[1L]), "'.") })() }

      # Only one group in 'split'
      if (isTRUE(length(na.omit(unique(split))) == 1L)) { warning("There is only one group represented in the split variable specified in 'split'.", call. = FALSE) }

    }

    # Grouping and split variable are identical
    if (isTRUE(!is.null(group) && !is.null(split) && identical(group, split))) { stop("Grouping and split variables are identical.", call. = FALSE) }

  }

  #_____________________________________________________________________________
  #
  # Arguments ------------------------------------------------------------------

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Statistical measures ####

  print.all <- c("n", "nNA", "pNA", "nUQ", "m", "se.m", "var", "sd", "min", "p.min", "p25", "med", "p75", "max", "p.max", "range", "iqr", "skew", "kurt")

  # Default setting
  if (isTRUE(all(c("all", "default", "n", "nNA", "pNA", "nUQ", "m", "se.m", "var", "sd", "min", "p.min", "p25", "med", "p75", "max", "p.max", "range", "iqr", "skew", "kurt") %in% print))) {

    print <- c("n", "nNA", "pNA", "nUQ", "m", "sd", "min", "p.min", "max", "p.max", "skew", "kurt")

  # All statistical measures
  } else if (isTRUE("all" %in% print)) {

    print <- print.all

  # Default setting with additional statistical measures
  } else if (isTRUE("default" %in% print && length(print > 1L))) {

    print <- print.all[print.all %in% misty::chr.omit(union(c("n", "nNA", "pNA", "nUQ", "m", "sd", "min", "p.min", "max", "p.max", "skew", "kurt"), print), "default", check = FALSE)]

  # Manual default setting
  } else if (isTRUE(all(print == "default"))) {

    print <- c("n", "nNA", "pNA", "nUQ", "m", "sd", "min", "p.min", "max", "p.max", "skew", "kurt")

  }

  #_____________________________________________________________________________
  #
  # Main Function --------------------------------------------------------------

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## No Grouping, No Split ####

  if (isTRUE(is.null(group) && is.null(split))) {

    result <- data.frame(variable = colnames(x),
                         n     = vapply(x, function(y) length(y[!is.na(y)]), FUN.VALUE = integer(1L)),
                         nNA   = vapply(x, function(y) sum(is.na(y)), FUN.VALUE = integer(1L)),
                         pNA   = vapply(x, function(y) sum(is.na(y)) / length(y) * 100L, FUN.VALUE = double(1L)),
                         nUQ   = vapply(x, function(y) misty::uniq.n(y), FUN.VALUE = integer(1L)),
                         m     = vapply(x, function(y) y[!is.na(y)] |> (\(p) ifelse(length(p) <  1L, NA, mean(p, na.rm = FALSE)))(), FUN.VALUE = double(1L)),
                         se.m  = vapply(x, function(y) y[!is.na(y)] |> (\(p) ifelse(length(p) <= 1L, NA, sd(p, na.rm = FALSE) / sqrt(length(p))))(), FUN.VALUE = double(1L)),
                         var   = vapply(x, function(y) y[!is.na(y)] |> (\(p) ifelse(length(p) <= 1L, NA, var(p, na.rm = FALSE)))(), FUN.VALUE = double(1L)),
                         sd    = vapply(x, function(y) y[!is.na(y)] |> (\(p) ifelse(length(p) <= 1L, NA, sd(p, na.rm = FALSE)))(), FUN.VALUE = double(1L)),
                         min   = vapply(x, function(y) y[!is.na(y)] |> (\(p) ifelse(length(p) <  1L, NA, min(p, na.rm = FALSE)))(), FUN.VALUE = double(1L)),
                         p.min = vapply(x, function(y) y[!is.na(y)] |> (\(p) ifelse(length(p) <  1L, NA, sum(p == min(p, na.rm = FALSE)) / length(p) * 100L))(), FUN.VALUE = double(1L)),
                         p25   = vapply(x, function(y) y[!is.na(y)] |> (\(p) ifelse(length(p) <  1L, NA, quantile(p, probs = 0.25, na.rm = FALSE)))(), FUN.VALUE = double(1L)),
                         med   = vapply(x, function(y) y[!is.na(y)] |> (\(p) ifelse(length(p) <  1L, NA, median(p, na.rm = FALSE)))(), FUN.VALUE = double(1L)),
                         p75   = vapply(x, function(y) y[!is.na(y)] |> (\(p) ifelse(length(p) <  1L, NA, quantile(p, probs = 0.75, na.rm = FALSE)))(), FUN.VALUE = double(1L)),
                         max   = vapply(x, function(y) y[!is.na(y)] |> (\(p) ifelse(length(p) <  1L, NA, max(p, na.rm = FALSE)))(), FUN.VALUE = double(1L)),
                         p.max = vapply(x, function(y) y[!is.na(y)] |> (\(p) ifelse(length(p) <  1L, NA, sum(p == max(p, na.rm = FALSE)) / length(p) * 100L))(), FUN.VALUE = double(1L)),
                         range = vapply(x, function(y) y[!is.na(y)] |> (\(p) ifelse(length(p) <  1L, NA, diff(range(p, na.rm = FALSE))))(), FUN.VALUE = double(1L)),
                         iqr   = vapply(x, function(y) y[!is.na(y)] |> (\(p) ifelse(length(p) <  1L, NA, IQR(p, na.rm = FALSE)))(), FUN.VALUE = double(1L)),
                         skew  = suppressWarnings(vapply(x, misty::skewness, sample = sample, check = FALSE, FUN.VALUE = double(1L))),
                         kurt  = suppressWarnings(vapply(x, misty::kurtosis, sample = sample, check = FALSE, FUN.VALUE = double(1L))),
                         row.names = NULL, check.names = FALSE)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Grouping, No Split ####

  } else if (isTRUE(!is.null(group) && is.null(split))) {

    result <- lapply(split(x, f = group), function(y) misty::descript(y, group = NULL, split = NULL, sort.var = sort.var, check = FALSE, output = FALSE)$result) |> (\(y) data.frame(group = rep(names(y), each = ncol(x)), eval(parse(text = paste0("rbind(", paste0("y[[", seq_len(length(y)), "]]", collapse = ", "), ")")))) )()

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## No Grouping, Split ####

  } else if (isTRUE(is.null(group) && !is.null(split))) {

    result <- lapply(split(data.frame(x), f = split), function(y) misty::descript(y, group = NULL, split = NULL, sort.var = sort.var, check = FALSE, output = FALSE)$result)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Grouping, Split ####

  } else if (isTRUE(!is.null(group) && !is.null(split))) {

    result <- lapply(split(data.frame(x, group = group), f = split), function(y) misty::descript(y[, -grep("group", names(y))], group = y$group, split = NULL, sort.var = sort.var, check = FALSE, output = FALSE)$result)

  }

  #_____________________________________________________________________________
  #
  # Return Object --------------------------------------------------------------

  object <- list(call = match.call(),
                 type = "descript",
                 data = list(x = x, group = group, split = split),
                 args = list(print = print, sample = sample, sort.var = sort.var, na.omit = na.omit, digits = digits, as.na = as.na, write = write, append = append, check = check, output = output),
                 result = result)

  class(object) <- "misty.object"

  #_____________________________________________________________________________
  #
  # Write Results --------------------------------------------------------------

  if (isTRUE(!is.null(write))) { .write.result(object = object, write = write, append = append) }

  #_____________________________________________________________________________
  #
  # Output ---------------------------------------------------------------------

  if (isTRUE(output)) { print(object, check = FALSE) }

  return(invisible(object))

}

#_______________________________________________________________________________
