% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/treemodels.R
\name{impute_cart}
\alias{impute_cart}
\alias{impute_rf}
\title{Decision Tree Imputation}
\usage{
impute_cart(dat, formula, add_residual = c("none", "observed", "normal"),
  cp, na_action = na.rpart, ...)

impute_rf(dat, formula, add_residual = c("none", "observed", "normal"),
  na_action = na.omit, ...)
}
\arguments{
\item{dat}{\code{[data.frame]}, with variables to be imputed and their
predictors.}

\item{formula}{\code{[formula]} imputation model description (see Details below).}

\item{add_residual}{\code{[character]} Type of residual to add. \code{"normal"} 
means that the imputed value is drawn from \code{N(mu,sd)} where \code{mu}
and \code{sd} are estimated from the model's residuals (\code{mu} should equal
zero in most cases). If \code{add_residual = "observed"}, residuals are drawn
(with replacement) from the model's residuals. Ignored for non-numeric 
predicted variables.}

\item{cp}{The complexity parameter used to \code{\link[rpart]{prune}} the CART model. If
omitted, no pruning takes place. If a single number, the same complexity parameter is
used for each imputed variable. If of length \code{#} of variables imputed, the complexity
parameters used must be in the same order as the predicted variables in the \code{model}
formula.}

\item{na_action}{\code{[function]} what to do with missings in training data.
By default cases with missing values in predicted or predictors are omitted
(see `Missings in training data').}

\item{...}{further arguments passed to 
\itemize{
\item{\code{\link[rpart]{rpart}} for \code{impute_cart}}
\item{\code{\link[randomForest]{randomForest}} for \code{impute_rf}}
}}
}
\description{
Imputation based on CART models or Random Forests.
}
\section{Model specification}{


Formulas are of the form

\code{IMPUTED_VARIABLES ~ MODEL_SPECIFICATION [ | GROUPING_VARIABLES ] }

The left-hand-side of the formula object lists the variable or variables to 
be imputed. Variables on the right-hand-side are used as predictors in the
CART or random forest model.

If grouping variables are specified, the data set is split according to the
values of those variables, and model estimation and imputation occur
independently for each group.

Grouping using \code{dplyr::group_by} is also supported. If groups are 
defined in both the formula and using \code{dplyr::group_by}, the data is 
grouped by the union of grouping variables. Any missing value in one of the 
grouping variables results in an error.
}

\section{Methodology}{


\bold{CART imputation} by \code{impute_cart} can be used for numerical,
categorical, or mixed data. Missing values are estimated using a 
Classification and Regression Tree as specified by Breiman, Friedman and
Olshen (1984). This means that prediction is fairly robust agains missingess
in predictors.

\bold{Random Forest imputation} with \code{impute_rf} can be used for numerical,
categorical, or mixed data. Missing values are estimated using a Random Forest
model as specified by Breiman (2001).
}

\references{
Breiman, L., Friedman, J., Stone, C.J. and Olshen, R.A., 1984. Classification
and regression trees. CRC press.
  
Breiman, L., 2001. Random forests. Machine learning, 45(1), pp.5-32.
}
\seealso{
Other imputation: \code{\link{impute_hotdeck}},
  \code{\link{impute_lm}}, \code{\link{impute}}
}
\concept{imputation}
