#' @title Antimicrobial Data Analysis
#' @name antimicrobial_analysis
#'
#' @description
#' Analyzes and visualizes antimicrobial test data through a combination of descriptive statistics, inferential analyses (ANOVA, Tukey HSD, Chi-square),
#' and multivariate techniques including Multidimensional Scaling (MDS), and Random Forest classification.
#' This function provides insights into group differences, underlying data structure, and variable importance in antimicrobial activity.
#'
#' @param zone_data A data frame containing the results of antimicrobial testing, with the following columns:
#' 'bacterial_strain', 'antimicrobial_agent', 'zone_inhibition', and 'contact_area'.
#' @param seed Optional. A numeric value to set the seed for reproducibility. If NULL (default), no seed is set.
#'
# Multivariate & stats
#' @importFrom effectsize eta_squared
#' @importFrom FactoMineR PCA
#' @importFrom factoextra fviz_pca_ind
#' @importFrom stats aov TukeyHSD chisq.test lm resid fitted na.omit dist cmdscale
#'
# Random Forest
#' @importFrom randomForest randomForest importance
#'
# dplyr
#' @importFrom dplyr group_by summarise %>% ungroup mutate
#'
# ggplot2
#' @importFrom ggplot2 ggplot aes geom_bar geom_errorbar labs theme_minimal facet_wrap theme
#' @importFrom ggplot2 element_text element_rect geom_tile scale_fill_gradient scale_color_brewer geom_point geom_col coord_flip
#'
#' @return A list containing the results of the antimicrobial analysis, including:
#' \itemize{
#'   \item \strong{Summary statistics} of the zone of inhibition per bacterial strain and antimicrobial agent.
#'   \item \strong{Chi-square test results} for associations between bacterial strain and contact area.
#'   \item \strong{Boxplot} visualizing the distribution of zone of inhibition across antimicrobial treatments and bacterial strains.
#'   \item \strong{Heatmap} displaying zone of inhibition values across bacterial strains and antimicrobial agents.
#'   \item \strong{ANOVA and Tukey HSD test results} for differences in zone of inhibition across treatments.
#'   \item \strong{Linear model diagnostics} including residuals vs. fitted for model assumptions.
#'   \item \strong{MDS plot} representing similarity of samples in reduced dimensions based on zone of inhibition.
#'   \item \strong{Random forest model} used to classify bacterial response based on zone of inhibition.
#'   \item \strong{Variable importance results} identifying key predictors from the random forest model.
#'   \item \strong{Variable importance plot} visualizing the most influential variables in the classification task.
#' }
#'
#' @examples
#' # EXAMPLE 1: Two Bacterial Strain
#' bacterial_strain = rep(c("S. aureus", "K. pneumoniae"), each = 12)
#' antimicrobial_agent = rep(rep(c("Control", "HGS1 (30% wet pick-up)", "HGS2 (50% wet pick-up)",
#'                                 "HGS3 (80% wet pick-up)"), each = 3), times = 2)
#' zone_inhibition = c(0, 0, 0, 10.2, 11.0, 9.8, 14.1, 13.8, 15.0, 18.5, 17.9, 19.2,
#'                     0, 0, 0, 8.5, 9.0, 8.8, 12.7, 13.1, 12.9, 16.4, 15.8, 16.9)
#' contact_area = c("Growth", "Growth", "Growth", "Partial Growth",
#'                  "Growth", "Partial Growth", "No Growth", "No Growth",
#'                  "Partial Growth", "No Growth", "No Growth", "No Growth",
#'                  "Growth", "Growth", "Growth", "Growth",
#'                  "Partial Growth", "Growth", "Partial Growth", "No Growth",
#'                  "No Growth", "No Growth", "No Growth", "No Growth")
#' zone_data <- data.frame(bacterial_strain, antimicrobial_agent, zone_inhibition, contact_area)
#' antimicrobial_analysis(zone_data)                 # seed = NULL, no seed is set
#' antimicrobial_analysis(zone_data, seed = 123)     # seed is set to ensure reproducible results
#'
#' # EXAMPLE 2: Four Bacterial Strain
#' bacterial_strain = rep(c("S. aureus", "K. pneumoniae", "E. coli", "S. pneumoniae"), each = 12)
#' antimicrobial_agent = rep(rep(c("Control", "HGS1 (30% wet pick-up)", "HGS2 (50% wet pick-up)",
#'                                 "HGS3 (80% wet pick-up)"), each = 3), times = 4)
#' zone_inhibition = c(0, 0, 0, 10.2, 11.0, 9.8, 14.1, 13.8, 15.0, 18.5, 17.9, 19.2,
#'                     0, 0, 0, 8.5, 9.0, 8.8, 12.7, 13.1, 12.9, 16.4, 15.8, 16.9,
#'                     0, 0, 0, 9.3, 8.9, 9.1, 13.0, 13.5, 13.2, 16.7, 17.0, 16.9,
#'                     0, 0, 0, 11.5, 11.2, 11.0, 15.2, 14.9, 15.0, 19.0, 18.7, 19.3)
#' contact_area = c("Growth", "Growth", "Growth", "Partial Growth",
#'                  "Growth", "Partial Growth", "No Growth", "No Growth",
#'                  "Partial Growth", "No Growth", "No Growth", "No Growth",
#'                  "Growth", "Growth", "Growth", "Growth",
#'                  "Partial Growth", "Growth", "Partial Growth", "No Growth",
#'                  "No Growth", "No Growth", "No Growth", "No Growth",
#'                  "Growth", "Growth", "Growth", "Partial Growth",
#'                  "Partial Growth", "Growth", "Partial Growth", "No Growth",
#'                  "Partial Growth", "No Growth", "No Growth", "No Growth",
#'                  "Growth", "Growth", "Growth", "Partial Growth",
#'                  "Partial Growth", "Growth", "No Growth", "No Growth",
#'                  "Partial Growth", "No Growth", "No Growth", "No Growth")
#' zone_data <- data.frame(bacterial_strain, antimicrobial_agent, zone_inhibition, contact_area)
#' antimicrobial_analysis(zone_data)                 # seed = NULL, no seed is set
#' antimicrobial_analysis(zone_data, seed = 123)     # seed is set to ensure reproducible results
#'
#' @author Paul Angelo C. Manlapaz
#' @export

utils::globalVariables(c('bacterial_strain', 'antimicrobial_agent', 'zone_inhibition', 'contact_area', 'Freq', 'Dim1', 'Dim2', 'mean_zone', 'sd_zone', 'Variable', 'Importance'))

antimicrobial_analysis <- function(zone_data, seed = NULL) {

  # early check for data type
  if (!is.data.frame(zone_data)) {
    stop("Input must be a data frame.")
  }

  # Check required columns
  required_cols <- c("bacterial_strain", "antimicrobial_agent", "zone_inhibition", "contact_area")
  if (!all(required_cols %in% colnames(zone_data))) {
    stop("Input data must contain: bacterial_strain, antimicrobial_agent, zone_inhibition, contact_area")
  }

  # Setting the seed
  if(!is.null(seed)){
    set.seed(seed)
  }

  # Remove missing values
  zone_data <- stats::na.omit(zone_data)

  # Ensure contact_area is factor
  zone_data$contact_area <- factor(zone_data$contact_area, levels = c('No Growth', 'Partial Growth', 'Growth'))

  # Summary statistics
  summary_stats <- dplyr::group_by(zone_data, antimicrobial_agent, bacterial_strain) %>%
    dplyr::summarise(
      mean_zone = mean(zone_inhibition),
      sd_zone = sd(zone_inhibition),
      min_zone = min(zone_inhibition),
      max_zone = max(zone_inhibition),
      .groups = 'drop'
    )

  # Zone of inhibition plot
  zone_plot <- ggplot2::ggplot(summary_stats, ggplot2::aes(x = antimicrobial_agent, y = mean_zone, fill = bacterial_strain)) +
    ggplot2::geom_bar(stat = 'identity', position = 'dodge') +
    ggplot2::geom_errorbar(ggplot2::aes(ymin = mean_zone - sd_zone, ymax = mean_zone + sd_zone), width = 0.2) +
    ggplot2::labs(title = "Zone of Inhibition by Antimicrobial Agent",
                  x = "Antimicrobial Agent", y = "Mean Zone of Inhibition (mm)") +
    ggplot2::theme_minimal() +
    ggplot2::facet_wrap(~ bacterial_strain, scales = "free_y") +
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
                   panel.grid = ggplot2::element_blank(),
                   panel.border = ggplot2::element_rect(color = "black", fill = NA, linewidth = 1),
                   plot.title = ggplot2::element_text(hjust = 0.5))


  # Contact area heatmap
  contact_area_table <- table(zone_data$contact_area, zone_data$antimicrobial_agent, zone_data$bacterial_strain)
  heatmap_df <- as.data.frame(contact_area_table)
  colnames(heatmap_df) <- c("contact_area", "antimicrobial_agent", "bacterial_strain", "Freq")

  contact_area_plot <- ggplot2::ggplot(heatmap_df, ggplot2::aes(x = antimicrobial_agent, y = contact_area, fill = Freq)) +
    ggplot2::geom_tile() +
    ggplot2::labs(title = "Contact Area by Antimicrobial Agent",
                  x = "Antimicrobial Agent", y = "Contact Area") +
    ggplot2::scale_fill_gradient(low = "white", high = "#800000") +
    ggplot2::theme_minimal() +
    ggplot2::facet_wrap(~ bacterial_strain) +
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
                   panel.border = ggplot2::element_rect(color = "black", fill = NA, linewidth = 1),
                   plot.title = ggplot2::element_text(hjust = 0.5))


  # Linear model and diagnostics
  zone_model <- stats::lm(zone_inhibition ~ contact_area + bacterial_strain + antimicrobial_agent, data = zone_data)
  model_summary <- summary(zone_model)

  diagnostics <- list(
    residuals = stats::resid(zone_model),
    fitted = stats::fitted(zone_model),
    r_squared = model_summary$r.squared,
    adj_r_squared = model_summary$adj.r.squared
  )

  # ANOVA and post-hoc
  anova_result <- stats::aov(zone_inhibition ~ antimicrobial_agent * bacterial_strain, data = zone_data)
  anova_summary <- summary(anova_result)
  tukey_result <- stats::TukeyHSD(anova_result)

  # Effect sizes
  effect_sizes <- effectsize::eta_squared(anova_result)

  # Chi-square / Fisher's test
  contact_table <- table(zone_data$contact_area, zone_data$antimicrobial_agent)

  if (any(stats::chisq.test(contact_table)$expected < 5)) {
    contact_test <- stats::fisher.test(contact_table)
    test_method <- "Fisher's Exact Test"
  } else {
    contact_test <- stats::chisq.test(contact_table)
    test_method <- "Chi-squared Test"
  }

  # MDS (Multidimensional Scaling)
  mds_data <- stats::dist(zone_data$zone_inhibition)
  mds_result <- stats::cmdscale(mds_data, k = 2)

  mds_df <- data.frame(
    Dim1 = mds_result[, 1],
    Dim2 = mds_result[, 2],
    contact_area = zone_data$contact_area
  )

  mds_plot <- ggplot2::ggplot(mds_df, ggplot2::aes(x = Dim1, y = Dim2, color = contact_area)) +
    ggplot2::geom_point(size = 3) +
    ggplot2::theme_minimal() +
    ggplot2::scale_color_brewer(palette = "Dark2") +
    ggplot2::labs(title = "Multidimensional Scaling (MDS)",
                  x = "Dimension 1", y = "Dimension 2") +
    ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5),
                   panel.border = ggplot2::element_rect(color = "black", fill = NA, linewidth = 1),
                   panel.grid = ggplot2::element_blank())

  # Random Forest (Classification and Variable Importance)
  if (!requireNamespace("randomForest", quietly = TRUE)) {
    warning("Package 'randomForest' not installed. Skipping Random Forest analysis.")
    rf_model <- NULL
    variable_importance <- NULL
    variable_importance_plot <- NULL
  } else {
    rf_model <- randomForest::randomForest(contact_area ~ zone_inhibition + antimicrobial_agent + bacterial_strain,
                                           data = zone_data, importance = TRUE)

    variable_importance <- randomForest::importance(rf_model)

    var_imp_df <- data.frame(Variable = rownames(variable_importance),
                             Importance = variable_importance[, "MeanDecreaseAccuracy"],
                             stringsAsFactors = FALSE)

    variable_importance_plot <- ggplot2::ggplot(var_imp_df, ggplot2::aes(x = reorder(Variable, Importance), y = Importance)) +
      ggplot2::geom_col(fill = "#1b9e77") +
      ggplot2::coord_flip() +
      ggplot2::labs(title = "Variable Importance (Random Forest)",
                    x = "Predictor", y = "Mean Decrease in Accuracy") +
      ggplot2::theme_minimal() +
      ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5),
                     panel.border = ggplot2::element_rect(color = "black", fill = NA, linewidth = 1),
                     panel.grid = ggplot2::element_blank())
  }


  # Return
  return(list(
    summary_stats = summary_stats,
    model_summary = model_summary,
    anova_summary = anova_summary,
    tukey_hsd = tukey_result,
    contact_test = contact_test,
    contact_test_method = test_method,
    effect_sizes = effect_sizes,
    model_diagnostics = diagnostics,
    zone_plot = zone_plot,
    contact_area_plot = contact_area_plot,
    mds_plot = mds_plot,
    random_forest_model = rf_model,
    variable_importance = variable_importance,
    variable_importance_plot = variable_importance_plot
  ))
}
