% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/residualize.R
\name{residualize}
\alias{residualize}
\title{Arbitrary residualization of outcomes}
\usage{
residualize(data, y, vars, nfold = 5, fun.rf = "ranger")
}
\arguments{
\item{data}{input data used for training and estimation, where each
row corresponds to an individual and columns contain information on treatments,
covariates, probabilities of treatment assignment, and observed outcomes.}

\item{y}{a character string denoting the column name of outcomes.}

\item{vars}{a vector of character strings denoting the column names of covariates.}

\item{nfold}{number of folds in cross-validation. The default value is 5.}

\item{fun.rf}{a character string specifying which random forest package to use.
Two options are \code{ranger} and \code{randomForest}, with the default being \code{ranger}.}
}
\value{
data for training and estimation with residualized outcomes.
}
\description{
This function employs random forests and cross-validation to residualize
outcomes following Section 3.3 of Ladhania et al. (2023).
That is, predicted outcomes resulting from random forests are
subtracted from the original outcomes. Doing so helps in adjusting for small imbalanaces
in baseline covariates and removing part of the variation in
outcomes common across treatment arms
}
\examples{
data(Example_data)
library(dplyr)
library(magrittr)
Example_trainest <- Example_data \%>\% slice_sample(n = floor(0.5 * nrow(Example_data)))
y <- "Y"
vars <- paste0("X", 1:3)
Example_resid <- residualize(Example_trainest, y, vars, nfold = 5, fun.rf = "ranger")

}
\references{
Ladhania Rahul, Spiess Jann, Ungar Lyle, Wu Wenbo (2023). Personalized Assignment
to One of Many Treatment Arms via Regularized and Clustered Joint Assignment Forests.
https://doi.org/10.48550/arXiv.2311.00577.
\cr
}
