% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cvEPX.R
\name{cv.epx}
\alias{cv.epx}
\title{Balanced K-fold cross-validation for an "\code{epx}" object}
\usage{
cv.epx(
  epx,
  folds = NULL,
  K = 10,
  folds.out = FALSE,
  classifier.args = list(),
  performance.args = list(),
  ...
)
}
\arguments{
\item{epx}{Object of class "\code{\link{epx}}".}

\item{folds}{Optional vector specifying to which fold each observation belongs. Must be an \eqn{n}-length vector (\eqn{n} being the number of
observations) with integer values only in the range from 1 to \eqn{K}.}

\item{K}{Number of folds; default is 10.}

\item{folds.out}{Indicates whether a vector indicating fold membership for
each of the observations will be output; default is \code{FALSE}.}

\item{classifier.args}{Arguments for the base classifier specified by
\code{epx}; default is that used in \code{epx} formation.}

\item{performance.args}{Arguments for the performance measure specified by
\code{epx}; default is that used in \code{epx} formation.}

\item{...}{Further arguments passed to or from other methods.}
}
\value{
An \eqn{(n + 1)} by \eqn{(p + 1)} matrix, where \eqn{n} is the number
of observations used to train \code{epx} and \eqn{p} is the number of
(final) phalanxes. Column \eqn{p + 1} of the matrix contains the predicted
probabilities of relevance from the ensemble of phalanxes,
and row \eqn{n + 1} is the performance (choice of performance measure determined by the
"\code{\link{epx}}" object) of the corresponding column.

Setting \code{folds.out} as \code{TRUE} changes the output of
\code{cv.epx} into a list of two elements:
\item{EPX.CV}{The \eqn{(n + 1)} by \eqn{(p + 1)} matrix returned by
default when \code{folds.out = FALSE}.}
\item{FOLDS.USED}{A vector of length \eqn{n} with integer values only
in the range from 1 to \code{K} indicating to which fold
each observation was randomly assigned for cross-validation.}
}
\description{
Balanced K-fold cross-validation based on an "\code{\link{epx}}" object.
Hence, we have biased cross-validation as we do not re-run the
phalanx-formation algorithm for each fold.
}
\examples{
# Example with data(harvest)

## Phalanx-formation using a base classifier with 50 trees (default = 500)
\donttest{ 
set.seed(761)
model <- epx(x = harvest[, -4], y = harvest[, 4],
            classifier.args = list(ntree = 50))

## 10-fold balanced cross-validation (different base classifier settings)
\dontrun{
set.seed(761)
cv.100 <- cv.epx(model, classifier.args = list(ntree = 100))
tail(cv.100) # see performance (here, AHR) for all phalanxes and the ensemble


## Option to output the vector assigning observations to the K folds
## (Commented out for speed.)
set.seed(761)
cv.folds <- cv.epx(model, folds.out = TRUE)
tail(cv.folds[[1]])  # same as first example
table(cv.folds[[2]])  # number of observations in each of the 10 folds

## 10 runs of 10-fold balanced cross-validation (using default settings)
set.seed(761)
cv.ahr <- NULL  # store AHR of each ensemble
for (i in 1:10) {
  cv.i <- cv.epx(model)
  cv.ahr <- c(cv.ahr, cv.i[nrow(cv.i), ncol(cv.i)])
}
boxplot(cv.ahr)  # to see variation in AHR
}
}
}
