#' Impute Data
#'
#' Add imputed data columns to existing data.frame
#'
#' For each of the specified variables, use all variables
#' to predict missing values. Populate actual (when available) and imputed
#' values into new columns appended with names appended with _I.
#'
#' If you choose to return the mice object with return_mice, the function
#' output will be a list that includes the final data.frame and the mice output.
#'
#' @param input data.frame with calibration variables
#' @param vars character vector with names of variables to be imputed
#' @param keep_vars character vector with names of additional variables that should be retained
#' @param return_mice boolean for whether to return mice object (for looking at logged events)
#' @param impute_constant numeric if not NULL will impute with provided constant
#'
#' @return data.frame with imputed versions of variables
#' 
#' @examples
#' calVars <- c(
#'   "SEX_A_R", "AGEP_A_R", "HISPALLP_A_R", "ORIENT_A_R", "HICOV_A_R", "EDUCP_A_R", "REGION_R",
#'   "EMPLASTWK_A_R", "HOUTENURE_A_R", "MARITAL_A_R"
#' )
#' stuVars <- "DIBTYPE_A_R"
#' nhis_keep_vars <- c("PPSU","PSTRAT","WTFA_A")
#' 
#' nhis_imputed <- impute_data(nhis_processed, c(calVars, stuVars), nhis_keep_vars)
#' 
#' @import dplyr
#' @importFrom mice mice complete
#' @export
impute_data <- function(input, vars, keep_vars = c(), return_mice=FALSE, impute_constant=NULL) {
  
  all_vars <- vars
  all_vars_imp <- paste0(all_vars,"_I")
  
  # Data imputation ------------------------------------------------------------
  # Copy original values (not-imputed) to _I variables 
  df <- input %>%
    mutate(across(
      .cols = all_of(all_vars),
      .fns = ~ .x,
      .names = "{.col}_I"
    ))
  
  # Data imputation
  if(is.null(impute_constant)) {
    df_imp <- mice(
      data = df[, all_vars_imp],
      m = 1,                       # Generate 1 imputed dataset
      method = 'pmm',              # Specify the imputation methods
      maxit = 20                   # 20 iterations for FCS
    )
    
    # Merge imputed data back to original data frame
    final_df_imp <- df
    final_df_imp[,all_vars_imp] <- complete(df_imp, 1)
    
  } else {
    df_imp <- df[, all_vars_imp]
    df_imp[is.na(df_imp)] <- impute_constant
    
    final_df_imp <- cbind(df[, setdiff(names(df), all_vars_imp)], df_imp)
  }
  
  
  final_df_imp <- final_df_imp %>%
    select(all_of(c(all_vars_imp, keep_vars)))
  
  final_df_imp[all_vars_imp] <- lapply(final_df_imp[all_vars_imp], factor)
  
  final <- final_df_imp
  
  if(return_mice)
    return(list(final, df_imp))
  else
    return(final)
}
