% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/meanimpute.R
\name{step_meanimpute}
\alias{step_meanimpute}
\alias{tidy.step_meanimpute}
\title{Impute Numeric Data Using the Mean}
\usage{
step_meanimpute(recipe, ..., role = NA, trained = FALSE, means = NULL,
  trim = 0, skip = FALSE)

\method{tidy}{step_meanimpute}(x, ...)
}
\arguments{
\item{recipe}{A recipe object. The step will be added to the
sequence of operations for this recipe.}

\item{...}{One or more selector functions to choose which
variables are affected by the step. See \code{\link[=selections]{selections()}}
for more details. For the \code{tidy} method, these are not
currently used.}

\item{role}{Not used by this step since no new variables are
created.}

\item{trained}{A logical to indicate if the quantities for
preprocessing have been estimated.}

\item{means}{A named numeric vector of means. This is
\code{NULL} until computed by \code{\link[=prep.recipe]{prep.recipe()}}.}

\item{trim}{The fraction (0 to 0.5) of observations to be
trimmed from each end of the variables before the mean is
computed. Values of trim outside that range are taken as the
nearest endpoint.}

\item{skip}{A logical. Should the step be skipped when the
recipe is baked by \code{\link[=bake.recipe]{bake.recipe()}}? While all operations are baked
when \code{\link[=prep.recipe]{prep.recipe()}} is run, some operations may not be able to be
conducted on new data (e.g. processing the outcome variable(s)).
Care should be taken when using \code{skip = TRUE} as it may affect
the computations for subsequent operations}

\item{x}{A \code{step_meanimpute} object.}
}
\value{
An updated version of \code{recipe} with the new step
added to the sequence of existing steps (if any). For the
\code{tidy} method, a tibble with columns \code{terms} (the
selectors or variables selected) and \code{model} (the mean
value).
}
\description{
\code{step_meanimpute} creates a \emph{specification} of a
recipe step that will substitute missing values of numeric
variables by the training set mean of those variables.
}
\details{
\code{step_meanimpute} estimates the variable means
from the data used in the \code{training} argument of
\code{prep.recipe}. \code{bake.recipe} then applies the new
values to new data sets using these averages.
}
\examples{
data("credit_data")

## missing data per column
vapply(credit_data, function(x) mean(is.na(x)), c(num = 0))

set.seed(342)
in_training <- sample(1:nrow(credit_data), 2000)

credit_tr <- credit_data[ in_training, ]
credit_te <- credit_data[-in_training, ]
missing_examples <- c(14, 394, 565)

rec <- recipe(Price ~ ., data = credit_tr)

impute_rec <- rec \%>\%
  step_meanimpute(Income, Assets, Debt)

imp_models <- prep(impute_rec, training = credit_tr)

imputed_te <- bake(imp_models, newdata = credit_te, everything())

credit_te[missing_examples,]
imputed_te[missing_examples, names(credit_te)]

tidy(impute_rec, number = 1)
tidy(imp_models, number = 1)
}
\concept{
preprocessing imputation
}
\keyword{datagen}
