% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/scut.R
\name{SCUT}
\alias{SCUT}
\alias{SCUT_parallel}
\title{SMOTE and cluster-based undersampling technique.}
\usage{
SCUT(
  data,
  cls_col,
  oversample = oversample_smote,
  undersample = undersample_mclust,
  osamp_opts = list(),
  usamp_opts = list()
)

SCUT_parallel(
  data,
  cls_col,
  ncores = detectCores()\%/\%2,
  oversample = oversample_smote,
  undersample = undersample_mclust,
  osamp_opts = list(),
  usamp_opts = list()
)
}
\arguments{
\item{data}{Numeric data frame.}

\item{cls_col}{The column in \code{data} with class membership.}

\item{oversample}{Oversampling method. Must be a function with the signature \code{foo(data, cls, cls_col, m, ...)} that returns a data frame, one of the \verb{oversample_*} functions, or \code{\link{resample_random}()}.}

\item{undersample}{Undersampling method. Must be a function with the signature \code{foo(data, cls, cls_col, m, ...)} that returns a data frame, one of the \verb{undersample_*} functions, or \code{\link{resample_random}()}.}

\item{osamp_opts}{List of options passed to the oversampling function.}

\item{usamp_opts}{List of options passed to the undersampling function.}

\item{ncores}{Number of cores to use with \code{\link{SCUT_parallel}()}.}
}
\value{
A dataframe with equal class distribution.
}
\description{
This function balances multiclass training datasets. In a dataframe with \code{n} classes and \code{m} rows, the resulting dataframe will have \code{m / n} rows per class. \code{\link{SCUT_parallel}()} distributes each over/undersampling task across multiple cores. Speedup usually occurs only if there are many classes using one of the slower resampling techniques (e.g. \code{\link{undersample_mclust}()}). Note that \code{\link{SCUT_parallel}()} will always run on one core on Windows.
}
\details{
Custom functions can be used to perform under/oversampling (see the required signature below). Parameters represented by \code{...} should be passsed via \code{osamp_opts} or \code{usamp_opts} as a list.
}
\examples{
ret <- SCUT(iris, "Species", undersample = undersample_hclust,
            usamp_opts = list(dist_calc="manhattan"))
ret2 <- SCUT(chickwts, "feed", undersample = undersample_kmeans)
table(ret$Species)
table(ret2$feed)
# SCUT_parallel fires a warning if ncores > 1 on Windows and will run on
# one core only.
ret <- SCUT_parallel(wine, "type", ncores = 1, undersample = undersample_kmeans)
table(ret$type)
}
\references{
Agrawal A, Viktor HL, Paquet E (2015). 'SCUT: Multi-class imbalanced data classification using SMOTE and cluster-based undersampling.' In \emph{2015 7th International Joint Conference on Knowledge Discovery, Knowledge Engineering and Knowledge Management (IC3K)}, volume 01, 226-234.

Chawla NV, Bowyer KW, Hall LO, Kegelmeyer WP (2002). 'SMOTE: Synthetic Minority Over-sampling Technique.' \emph{Journal of Artificial Intelligence Research}, 16, 321-357. ISSN 1076-9757, \doi{10.1613/jair.953}, https://www.jair.org/index.php/jair/article/view/10302.
}
