% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/single_mab_simulation.R
\name{single_mab_simulation}
\alias{single_mab_simulation}
\title{Run One Adaptive Simulation With Inference.}
\usage{
single_mab_simulation(
  data,
  assignment_method,
  algorithm,
  prior_periods,
  perfect_assignment,
  whole_experiment,
  blocking,
  data_cols,
  control_augment = 0,
  random_assign_prop = 0,
  ndraws = 5000,
  control_condition = NULL,
  time_unit = NULL,
  period_length = NULL,
  block_cols = NULL,
  verbose = FALSE,
  check_args = TRUE
)
}
\arguments{
\item{data}{A data.frame, data.table, or tibble containing input data from the trial. This should be the results
of a traditional Randomized Controlled Trial (RCT). Any data.frames will be converted to tibbles internally.}

\item{assignment_method}{A character string; one of "date", "batch", or "individual", to define the assignment into treatment waves. When using
"batch" or "individual", ensure your dataset is pre-arranged in the proper order observations should be considered so that
groups are assigned correctly. For "date", observations will be considered in chronological order.
"individual" assignment can be computationally intensive for larger datasets.}

\item{algorithm}{A character string specifying the MAB algorithm to use. Options are "thompson" or "ucb1". Algorithm
defines the adaptive assignment process. Mathematical details on these algorithms
can be found in \href{https://arxiv.org/abs/1402.6028}{Kuleshov and Precup 2014} and
\href{https://arxiv.org/abs/1904.07272}{Slivkins 2024}.}

\item{prior_periods}{A numeric value of length 1, or the character string "All"; number of previous periods to use
in the treatment assignment model. This is used to implement the stationary/non-stationary bandit.
For example, a non-stationary bandit assumes the true probability of success for each treatment changes over time, so to
account for that, not all prior data should be used when making decisions because it could be "out of date".}

\item{perfect_assignment}{Logical; if TRUE, assumes perfect information for treatment assignment
(i.e., all outcomes are observed regardless of the date).
If FALSE, hides outcomes not yet theoretically observed, based
on the dates treatments would have been assigned for each wave.
This is useful when simulating batch-based assignment where treatments were assigned
on a given day whether or not all the information from a prior batch was available and
you have exact dates treatments were assigned.}

\item{whole_experiment}{Logical; if TRUE, uses all past experimental data for imputing outcomes.
If FALSE, uses only data available up to the current period. In large datasets or with a high number
of periods, setting this to FALSE can be more computationally intensive, though not a significant
contributor to total run time.}

\item{blocking}{Logical; whether or not to use treatment blocking. Treatment blocking is used to ensure an even-enough
distribution of treatment conditions across blocks. For example, blocking by gender would mean the randomized assignment should
split treatments evenly not just throughout the sample (so for 4 arms, 25-25-25-25), but also within each block, so 25\% of men
would receive each treatment and 25\% of women the same.}

\item{data_cols}{A named character vector containing the names of columns in \code{data} as strings:
\itemize{
\item \code{id_col}: Column in \code{data}; contains unique ID as a key.
\item \code{success_col}: Column in \code{data}; binary successes from the original experiment.
\item \code{condition_col}: Column in \code{data}; original treatment condition for each observation.
\item \code{date_col}: Column in \code{data}; contains original date of event/trial. Only necessary when assigning by "Date". Must be of type \code{Date}, not a character string.
\item \code{month_col}: Column in \code{data}; contains month of treatment. Only necessary when \code{time_unit = "Month"}, and when periods should be determined directly by
the calendar months instead of month based time periods. This column can be a string/factor variable with the month names or numeric with the month number. It can easily
be created from your \code{date_col} via \code{lubridate::month(data[[date_col]])} or \code{format(data[[date_col]], "\%m")}.
\item \code{success_date_col}: Column in \code{data}; contains original dates each success occurred. Only necessary when \code{perfect_assignment = FALSE}. Must be of type \code{Date}, not a character string.
\item \code{assignment_date_col}: Column in \code{data}; contains original dates treatments were assigned to observations. Only necessary when \code{perfect_assignment = FALSE}.
Used to simulate imperfect information on the part of researchers conducting an adaptive trial. Must be of type \code{Date}, not a character string.
}}

\item{control_augment}{A numeric value ranging from 0 to 1; proportion of each wave guaranteed to receive the "Control" treatment.
Default is 0. It is not recommended to use this in conjunction with \code{random_assign_prop}.}

\item{random_assign_prop}{A numeric value ranging from 0 to 1; proportion of each wave to be assigned new treatments randomly,
1 - \code{random_assign_prop} is the proportion assigned through the bandit procedure. For example if this is set to 0.1, then
for each wave 10\% of the observations will be randomly assigned to a new treatment, while the remaining 90\% will be assigned according
to UCB1 or Thompson result. It is not recommended to use this in conjunction with \code{control_augment}. If batch sizes are small,
and the number of rows is calculate to be less than 1, and probability sampling approach is used where each row in the batch
will have a \code{random_assign_prop} probability of being selected for random assignment. Otherwise the number is rounded to
a whole number, and that many rows are selected for random assignment.}

\item{ndraws}{A numeric value; When Thompson sampling direct calculations fail, draws from a simulated posterior
will be used to approximate the Thompson sampling probabilities. This is the number of simulations to use, the default
is 5000 to match the default parameter \code{\link[bandit:best_binomial_bandit_sim]{bandit::best_binomial_bandit_sim()}}, but might need to be raised or lowered depending on performance and accuracy
concerns.}

\item{control_condition}{Value of the control condition. Only necessary when \code{control_augment} is greater than 0. Internally this value
is coerced to a string, so it should be passed as a string, or a type that can easily be converted to a string.}

\item{time_unit}{A character string specifying the unit of time for assigning periods when \code{assignment_method} is "date".
Acceptable values are "day", "week", or "month". "month" does not require an additional column with the months of each observation,
but it can accept a separate \code{month_col}. If \code{month_col} is specified, the periods follow the calendar months strictly, and when it is not
specified months are simply used as the time interval. For example if a dataset has dates starting on July 26th, under month based assignment and
a specified \code{month_col} the dates July 26th and August 3st would be in different periods, but if the \code{month_col} was not specified, they would be
in the same period because the dates are less than one month apart.}

\item{period_length}{A numeric value of length 1; represents the length of each treatment period.
If assignment method is "date", this length refers the number of units specified in \code{time_unit}
(i.e., if "day", 10 would be 10 days). If assignment method is "batch", this refers to the number of people in each batch.}

\item{block_cols}{A character vector of variables to block by. This vector should not be named.}

\item{verbose}{Logical; whether or not to print intermediate messages. Default is FALSE.}

\item{check_args}{Logical; Whether or not to robustly check whether arguments are valid. Default is TRUE, and recommended
not to be changed.}
}
\value{
An object of class \code{mab}, containing:
\itemize{
\item \code{final_data}: The processed tibble or data.table, containing new columns pertaining to the results of the trial. Specifically Contains:
\itemize{
\item \code{period_number}: Assigned period for simulation.
\item \verb{mab_*}: New treatment conditions and outcomes under the simulation.
\item \code{impute_req}: Whether observation required an imputed outcome.
\item \verb{*block}: variables relating to the block specified for treatment blocking, and the concatenation
of that block with an observations original treatment, and new treatment.
\item \verb{aipw_*} Columns containing individual Augmented Inverse Probability Weighted estimates for each observation and treatment arm.
\item \verb{prior_rate_*}: Columns containing success rate for each treatment arm, from all periods before the observations period of the simulation.
\item \verb{*_assign_prob}: Columns containing probability of being assigned each treatment at the given period.
}
\item \code{bandits}: A tibble or data.table containing the UCB1 values or Thompson sampling posterior distributions for each period. Wide format,
each row is a period, and each columns is a treatment. Each row in this table represents the calculation from the given period
after its values were imputed, so row 2 represents the calculations made in period 3, but represent the impact of period 2's new assignments.
\item \code{assignment_probs}: A tibble or data.table containing the probability of being assigned each treatment arm at a given period. Wide format,
each row is a period, and each columns is a treatment. Each row represents the probability of being assigned each treatment at each period, these have not
been shifted like the bandits table.
\item \code{estimates}: A tibble or data.table containing the
AIPW (Augmented Inverse Probability Weighting) treatment effect estimates and variances, and traditional
sample means and variances, for each treatment arm. Long format, treatment arm, and estimate type are columns along with the mean
and variance.
\item \code{settings}: A named list of the configuration settings used in the trial.
}
}
\description{
Performs a single Multi-Arm Bandit (MAB) trial using experimental data from
an original randomized controlled trial, and adaptive inference strategies as described in
\href{https://www.pnas.org/doi/pdf/10.1073/pnas.2014602118}{Hadad et al. (2021)}.
Wraps around the internal implementation functions, and performs the full
MAB pipeline: preparing inputs, assigning treatments and imputing successes, and adaptively weighted
estimation. See the details and vignettes to learn more.
}
\details{
For all the items laballed as a tibble or data.table, data.tables will be used if the user passed \code{data} is a
data.table, tibbles used otherwise.
\subsection{Implementation}{

At each period, either the Thompson sampling probabilities or UCB1 values are calculated based on
the outcomes from the number of \code{prior_periods} specified. New treatments are then assigned randomly using the Thompson
sampling probabilities via the \href{https://cran.r-project.org/package=randomizr}{randomizr}
package, or as the treatment with the highest UCB1 values, while implementing the specific
treatment blocking and control augmentation specified. More details on bandit algorithms can in
\href{https://arxiv.org/abs/1402.6028}{Kuleshov and Precup 2014} and
\href{https://arxiv.org/abs/1904.07272}{Slivkins 2024}.

If a hybrid assignment is specified, here is where it is implemented in the simulation.
\code{control_augment} is a threshold probability for the control group, and the assignment probabilities
are changed to ensure that threshold is met. The other hybrid assignment is \code{random_assign_prop}. Here, the specified
proportion of the data is set aside to assign treatments randomly, while the rest of the data is assigned through the bandit procedure.

After assigning treatments, observations with new treatments have their outcomes imputed, with any
specified treatment blocking. The probabilities of success used to impute,
are estimated via the grouped means of successes from the original data either from the whole trial, or
up to that period, defined by \code{whole_experiment}.

If \code{perfect_assignment} is FALSE, new dates of success will be imputed using averages
of those dates in the period, grouped by treatment block. Observations for which
their treatment changed, but their outcome was success in the original and simulation, do not have their date changed.
When the next period starts, the success dates are checked against the maximum/latest \code{assignment_date} for the period, and
if any success occurs after that, it is treated as a failure for the purpose of the bandit decision algorithms.

At the end of the simulation the results are aggregated together to calculate the Adaptively Weighted
Augmented Inverse Probability Estimator (Hadad et al. 2021) using the mean and variance formulas provided, under
the constant allocation rate adaptive schema. These estimators are unbiased and asymptotically normal under the adaptive
conditions which is why they are used. For a complete view of their properties, reading the paper is recommended.
}

\subsection{Performance Concerns}{

This procedure has the potential to be computationally expensive and time-consuming. Performance
depends on the relative size of each period, number of periods, and overall size of the dataset. This function has
separate support for data.frames and data.tables. If a data.frame is passed, the function uses a combination of dplyr, tidyr
and base R to shape data, and run the simulation. However, if a data.table is passed the function exclusively uses the data.table
code for all the same operations.

In general, smaller batches run faster under base R, while larger ones could benefit from the performance
and memory efficiencies provided by data.table. However, we've observed larger datasets can cause numerical
instability with some calculations in the Thompson sampling procedure. Internal safeguards exist to prevent this, but
the best way to preempt any issues is to set \code{prior_periods} to a low number.

For more information about how to use the function, please view the vignette.
}
}
\examples{
# Loading Example Data and defining conditions
data(tanf)

## Running Thompson sampling with 500 person large batches,
## with no blocks and imperfect assignment

single_mab_simulation(
  data = tanf,
  assignment_method = "Batch",
  algorithm = "Thompson",
  period_length = 500,
  prior_periods = "All",
  blocking = FALSE,
  whole_experiment = TRUE,
  perfect_assignment = FALSE,
  data_cols = c(
    condition_col = "condition",
    id_col = "ic_case_id",
    success_col = "success",
    success_date_col = "date_of_recert",
    assignment_date_col = "letter_sent_date"
  )
)

## Running UCB1 Sampling with 1 Month based batches and
## control augmentation set to 0.25, with perfect_assignment.
## When using control_augment > 0, conditions need to have proper names
# no_letter is control, the others are treatments

single_mab_simulation(
  data = tanf,
  assignment_method = "Date",
  time_unit = "Month",
  algorithm = "UCB1",
  period_length = 1,
  prior_periods = "All",
  blocking = FALSE,
  whole_experiment = TRUE,
  perfect_assignment = TRUE,
  control_condition = "no_letter",
  control_augment = 0.25,
  data_cols = c(
    condition_col = "condition",
    id_col = "ic_case_id",
    success_col = "success",
    date_col = "appt_date",
    month_col = "recert_month"
  )
)

## 5 Day Periods with Thompson, Treatment Blocking by Service Center,
## Whole experiment FALSE, and hybrid assignment 10\% random, 90\% bandit.
single_mab_simulation(
  data = tanf,
  assignment_method = "Date",
  time_unit = "Day",
  algorithm = "Thompson",
  period_length = 5,
  prior_periods = "All",
  blocking = TRUE,
  block_cols = c("service_center"),
  whole_experiment = TRUE,
  perfect_assignment = TRUE,
  random_assign_prop = 0.1,
  data_cols = c(
    condition_col = "condition",
    id_col = "ic_case_id",
    success_col = "success",
    date_col = "appt_date"
  )
)
}
\references{
Hadad, Vitor, David A. Hirshberg, Ruohan Zhan, Stefan Wager, and Susan Athey. 2021.
"Confidence Intervals for Policy Evaluation in Adaptive Experiments." \emph{Proceedings of the National Academy of Sciences of the United States of America} 118
(15): e2014602118. \doi{10.1073/pnas.2014602118}.

Kuleshov, Volodymyr, and Doina Precup. 2014. "Algorithms for Multi-Armed Bandit Problems."
\emph{arXiv}. \doi{10.48550/arXiv.1402.6028}.

Loecher, Thomas Lotze and Markus. 2022. "Bandit: Functions for Simple a/B Split Test and Multi-Armed Bandit Analysis."
\url{https://cran.r-project.org/package=bandit}.

Offer‐Westort, Molly, Alexander Coppock, and Donald P. Green. 2021.
"Adaptive Experimental Design: Prospects and Applications in Political Science."
\emph{American Journal of Political Science} 65 (4): 826-44. \doi{10.1111/ajps.12597}.

Slivkins, Aleksandrs. 2024. "Introduction to Multi-Armed Bandits." \emph{arXiv}. \doi{10.48550/arXiv.1904.07272}.
}
\seealso{
\code{\link[=multiple_mab_simulation]{multiple_mab_simulation()}}, \code{\link[=summary.mab]{summary.mab()}}, \code{\link[=plot.mab]{plot.mab()}}.
}
