% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cv_class_nested.R
\name{MLNestedCV}
\alias{MLNestedCV}
\title{R6 Class to perform nested cross-validation experiments}
\description{
The \code{MLNestedCV} class is used to construct a nested cross validation object
and to perform a nested cross validation for a specified machine learning
algorithm by performing a hyperparameter optimization with the in-sample
observations of each of the k outer folds and validate them directly on the
out-of-sample observations of the respective fold.
}
\details{
The \code{MLNestedCV} class requires to provide a named list of predefined
row indices for the outer cross validation folds, e.g., created with the
function \code{\link[splitTools:create_folds]{splitTools::create_folds()}}. This list also defines the \code{k} of
the k-fold cross-validation. Furthermore, a strategy needs to be chosen
("grid" or "bayesian") for the hyperparameter optimization as well as the
parameter \code{k_tuning} to define the number of inner cross validation folds.
}
\examples{
dataset <- do.call(
  cbind,
  c(sapply(paste0("col", 1:6), function(x) {
    rnorm(n = 500)
    },
    USE.NAMES = TRUE,
    simplify = FALSE
   ),
   list(target = sample(0:1, 500, TRUE))
))

fold_list <- splitTools::create_folds(
  y = dataset[, 7],
  k = 3,
  type = "stratified",
  seed = 123
)

cv <- MLNestedCV$new(
  learner = LearnerKnn$new(),
  strategy = "grid",
  fold_list = fold_list,
  k_tuning = 3L,
  seed = 123,
  ncores = 2
)

# learner args (not optimized)
cv$learner_args <- list(
  l = 0,
  test = parse(text = "fold_test$x")
)

# parameters for hyperparameter tuning
cv$parameter_grid <- expand.grid(
  k = seq(4, 16, 8)
)
cv$split_type <- "stratified"

# performance parameters
cv$predict_args <- list(type = "response")
cv$performance_metric <- metric("bacc")

# set data
cv$set_data(
  x = data.matrix(dataset[, -7]),
  y = dataset[, 7]
)

cv$execute()


## ------------------------------------------------
## Method `MLNestedCV$new`
## ------------------------------------------------

dataset <- do.call(
  cbind,
  c(sapply(paste0("col", 1:6), function(x) {
    rnorm(n = 500)
    },
    USE.NAMES = TRUE,
    simplify = FALSE
   ),
   list(target = sample(0:1, 500, TRUE))
))

fold_list <- splitTools::create_folds(
  y = dataset[, 7],
  k = 3,
  type = "stratified",
  seed = 123
)

cv <- MLNestedCV$new(
  learner = LearnerKnn$new(),
  strategy = "grid",
  fold_list = fold_list,
  k_tuning = 3L,
  seed = 123,
  ncores = 2
)


## ------------------------------------------------
## Method `MLNestedCV$execute`
## ------------------------------------------------

dataset <- do.call(
  cbind,
  c(sapply(paste0("col", 1:6), function(x) {
    rnorm(n = 500)
    },
    USE.NAMES = TRUE,
    simplify = FALSE
   ),
   list(target = sample(0:1, 500, TRUE))
))

fold_list <- splitTools::create_folds(
  y = dataset[, 7],
  k = 3,
  type = "stratified",
  seed = 123
)

cv <- MLNestedCV$new(
  learner = LearnerKnn$new(),
  strategy = "grid",
  fold_list = fold_list,
  k_tuning = 3L,
  seed = 123,
  ncores = 2
)

# learner args (not optimized)
cv$learner_args <- list(
  l = 0,
  test = parse(text = "fold_test$x")
)

# parameters for hyperparameter tuning
cv$parameter_grid <- expand.grid(
  k = seq(4, 68, 8)
)
cv$split_type <- "stratified"

# performance parameters
cv$predict_args <- list(type = "response")
cv$performance_metric <- metric("bacc")

# set data
cv$set_data(
  x = data.matrix(dataset[, -7]),
  y = dataset[, 7]
)

cv$execute()

}
\seealso{
\code{\link[splitTools:create_folds]{splitTools::create_folds()}}

\code{\link[splitTools:create_folds]{splitTools::create_folds()}}
}
\section{Super classes}{
\code{\link[mlexperiments:MLBase]{mlexperiments::MLBase}} -> \code{\link[mlexperiments:MLExperimentsBase]{mlexperiments::MLExperimentsBase}} -> \code{\link[mlexperiments:MLCrossValidation]{mlexperiments::MLCrossValidation}} -> \code{MLNestedCV}
}
\section{Public fields}{
\if{html}{\out{<div class="r6-fields">}}
\describe{
\item{\code{strategy}}{A character. The strategy to optimize the hyperparameters
(either \code{"grid"} or \code{"bayesian"}).}

\item{\code{parameter_bounds}}{A named list of tuples to define the parameter
bounds of the Bayesian hyperparameter optimization. For further details
please see the documentation of the \code{ParBayesianOptimization} package.}

\item{\code{parameter_grid}}{A matrix with named columns in which each column
represents a parameter that should be optimized and each row represents
a specific hyperparameter setting that should be tested throughout the
procedure. For \code{strategy = "grid"}, each row of the \code{parameter_grid} is
considered as a setting that is evaluated. For \code{strategy = "bayesian"},
the \code{parameter_grid} is passed further on to the \code{initGrid} argument of
the function \code{\link[ParBayesianOptimization:bayesOpt]{ParBayesianOptimization::bayesOpt()}} in order to
initialize the Bayesian process. The maximum rows considered for
initializing the Bayesian process can be specified with the R option
\code{option("mlexperiments.bayesian.max_init")}, which is set to \code{50L} by
default.}

\item{\code{optim_args}}{A named list of tuples to define the parameter
bounds of the Bayesian hyperparameter optimization. For further details
please see the documentation of the \code{ParBayesianOptimization} package.}

\item{\code{split_type}}{A character. The splitting strategy to construct the
k cross-validation folds. This parameter is passed further on to the
function \code{\link[splitTools:create_folds]{splitTools::create_folds()}} and defaults to \code{"stratified"}.}

\item{\code{split_vector}}{A vector If another criteria than the provided \code{y}
should be considered for generating the cross-validation folds, it can
be defined here. It is important, that a vector of the same length as
\code{x} is provided here.}

\item{\code{k_tuning}}{An integer to define the number of cross-validation folds
used to tune the hyperparameters.}
}
\if{html}{\out{</div>}}
}
\section{Methods}{
\subsection{Public methods}{
\itemize{
\item \href{#method-MLNestedCV-new}{\code{MLNestedCV$new()}}
\item \href{#method-MLNestedCV-execute}{\code{MLNestedCV$execute()}}
\item \href{#method-MLNestedCV-clone}{\code{MLNestedCV$clone()}}
}
}
\if{html}{\out{
<details open><summary>Inherited methods</summary>
<ul>
<li><span class="pkg-link" data-pkg="mlexperiments" data-topic="MLExperimentsBase" data-id="set_data"><a href='../../mlexperiments/html/MLExperimentsBase.html#method-MLExperimentsBase-set_data'><code>mlexperiments::MLExperimentsBase$set_data()</code></a></span></li>
</ul>
</details>
}}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-MLNestedCV-new"></a>}}
\if{latex}{\out{\hypertarget{method-MLNestedCV-new}{}}}
\subsection{Method \code{new()}}{
Create a new \code{MLNestedCV} object.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MLNestedCV$new(
  learner,
  strategy = c("grid", "bayesian"),
  k_tuning,
  fold_list,
  seed,
  ncores = -1L,
  return_models = FALSE
)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{learner}}{An initialized learner object that inherits from class
\code{"MLLearnerBase"}.}

\item{\code{strategy}}{A character. The strategy to optimize the hyperparameters
(either \code{"grid"} or \code{"bayesian"}).}

\item{\code{k_tuning}}{An integer to define the number of cross-validation folds
used to tune the hyperparameters.}

\item{\code{fold_list}}{A named list of predefined row indices for the cross
validation folds, e.g., created with the function
\code{\link[splitTools:create_folds]{splitTools::create_folds()}}.}

\item{\code{seed}}{An integer. Needs to be set for reproducibility purposes.}

\item{\code{ncores}}{An integer to specify the number of cores used for
parallelization (default: \code{-1L}).}

\item{\code{return_models}}{A logical. If the fitted models should be returned
with the results (default: \code{FALSE}).}
}
\if{html}{\out{</div>}}
}
\subsection{Details}{
The \code{MLNestedCV} class requires to provide a named list of predefined
row indices for the outer cross validation folds, e.g., created with
the function \code{\link[splitTools:create_folds]{splitTools::create_folds()}}. This list also defines the
\code{k} of the k-fold cross-validation. Furthermore, a strategy needs to
be chosen ("grid" or "bayesian") for the hyperparameter optimization
as well as the parameter \code{k_tuning} to define the number of inner
cross validation folds.
}

\subsection{Examples}{
\if{html}{\out{<div class="r example copy">}}
\preformatted{dataset <- do.call(
  cbind,
  c(sapply(paste0("col", 1:6), function(x) {
    rnorm(n = 500)
    },
    USE.NAMES = TRUE,
    simplify = FALSE
   ),
   list(target = sample(0:1, 500, TRUE))
))

fold_list <- splitTools::create_folds(
  y = dataset[, 7],
  k = 3,
  type = "stratified",
  seed = 123
)

cv <- MLNestedCV$new(
  learner = LearnerKnn$new(),
  strategy = "grid",
  fold_list = fold_list,
  k_tuning = 3L,
  seed = 123,
  ncores = 2
)

}
\if{html}{\out{</div>}}

}

}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-MLNestedCV-execute"></a>}}
\if{latex}{\out{\hypertarget{method-MLNestedCV-execute}{}}}
\subsection{Method \code{execute()}}{
Execute the nested cross validation.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MLNestedCV$execute()}\if{html}{\out{</div>}}
}

\subsection{Details}{
All results of the cross validation are saved in the field \verb{$results} of
the \code{MLNestedCV} class. After successful execution of the nested cross
validation, \verb{$results} contains a list with the items:
\itemize{
\item "results.optimization" A list with the results of the hyperparameter
optimization.
\item "fold" A list of folds containing the following items for each
cross validation fold:
\itemize{
\item "fold_ids" A vector with the utilized in-sample row indices.
\item "ground_truth" A vector with the ground truth.
\item "predictions" A vector with the predictions.
\item "learner.args" A list with the arguments provided to the learner.
\item "model" If \code{return_models = TRUE}, the fitted model.
}
\item "summary" A data.table with the summarized results (same as
the returned value of the \code{execute} method).
\item "performance" A list with the value of the performance metric
calculated for each of the cross validation folds.
}
}

\subsection{Returns}{
The function returns a data.table with the results of the nested
cross validation. More results are accessible from the field \verb{$results}
of the \code{MLNestedCV} class.
}
\subsection{Examples}{
\if{html}{\out{<div class="r example copy">}}
\preformatted{dataset <- do.call(
  cbind,
  c(sapply(paste0("col", 1:6), function(x) {
    rnorm(n = 500)
    },
    USE.NAMES = TRUE,
    simplify = FALSE
   ),
   list(target = sample(0:1, 500, TRUE))
))

fold_list <- splitTools::create_folds(
  y = dataset[, 7],
  k = 3,
  type = "stratified",
  seed = 123
)

cv <- MLNestedCV$new(
  learner = LearnerKnn$new(),
  strategy = "grid",
  fold_list = fold_list,
  k_tuning = 3L,
  seed = 123,
  ncores = 2
)

# learner args (not optimized)
cv$learner_args <- list(
  l = 0,
  test = parse(text = "fold_test$x")
)

# parameters for hyperparameter tuning
cv$parameter_grid <- expand.grid(
  k = seq(4, 68, 8)
)
cv$split_type <- "stratified"

# performance parameters
cv$predict_args <- list(type = "response")
cv$performance_metric <- metric("bacc")

# set data
cv$set_data(
  x = data.matrix(dataset[, -7]),
  y = dataset[, 7]
)

cv$execute()

}
\if{html}{\out{</div>}}

}

}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-MLNestedCV-clone"></a>}}
\if{latex}{\out{\hypertarget{method-MLNestedCV-clone}{}}}
\subsection{Method \code{clone()}}{
The objects of this class are cloneable with this method.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MLNestedCV$clone(deep = FALSE)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{deep}}{Whether to make a deep clone.}
}
\if{html}{\out{</div>}}
}
}
}
