% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simSingleCell.R
\name{estimateZinbwaveParams}
\alias{estimateZinbwaveParams}
\title{Estimate the parameters of the ZINB-WaVE model to simulate new single-cell
RNA-Seq expression profiles}
\usage{
estimateZinbwaveParams(
  object,
  cell.type.column,
  cell.ID.column,
  gene.ID.column,
  cell.cov.columns,
  gene.cov.columns,
  subset.cells = NULL,
  proportional = TRUE,
  set.type = "All",
  threads = 1,
  verbose = TRUE
)
}
\arguments{
\item{object}{\code{\linkS4class{DigitalDLSorter}} object with a
\code{single.cell.real} slot.}

\item{cell.type.column}{Name or column number corresponding to the cell type
of each cell in cells metadata.}

\item{cell.ID.column}{Name or column number corresponding to the cell names
of expression matrix in cells metadata.}

\item{gene.ID.column}{Name or column number corresponding to the notation
used for features/genes in genes metadata.}

\item{cell.cov.columns}{Name or column number(s) in cells metadata to be used
as covariates during model fitting (if no covariates are used, set to empty
or \code{NULL}).}

\item{gene.cov.columns}{Name or column number(s) in genes metadata that will
be used as covariates during model fitting (if no covariates are used, set
to empty or \code{NULL}).}

\item{subset.cells}{Number of cells to fit the ZINB-WaVE model. Useful when
the original data set is too large to fit the model. Set a value according
to the original data set and the resources available on your computer. If
\code{NULL} (by default), all cells will be used. Must be an integer
greater than or equal to the number of cell types considered and less than
or equal to the total number of cells.}

\item{proportional}{If \code{TRUE}, the original cell type proportions in the
subset of cells generated by \code{subset.cells} will not be altered as far
as possible. If \code{FALSE}, all cell types will have the same number of
cells as far as possible (\code{TRUE} by default).}

\item{set.type}{Cell type(s) to evaluate (\code{'All'} by default). It is
recommended fitting the model to all cell types rather than using only a
subset of them to capture the total variability present in the original
experiment even if only a subset of cell types is simulated.}

\item{threads}{Number of threads used for estimation (1 by default). To set
up the parallel environment, the \pkg{BiocParallel} package must be
installed.}

\item{verbose}{Show informative messages during the execution (\code{TRUE} by
default).}
}
\value{
A \code{\linkS4class{DigitalDLSorter}} object with \code{zinb.params}
slot containing a \code{\linkS4class{ZinbParametersModel}} object. This
object contains a slot with the estimated ZINB-WaVE parameters from the
real single-cell RNA-Se`q data.
}
\description{
Estimate the parameters of the ZINB-WaVE model using a real single-cell
RNA-Seq data set as reference to simulate new single-cell profiles and
increase the signal of underrepresented cell types. This step is optional,
only is needed if the size of you dataset is too small or there are
underrepresented cell types in order to train the Deep Neural Network model
in a more balanced way. After this step, the \code{\link{simSCProfiles}}
function will use the estimated parameters to simulate new single-cell
profiles. See \code{?\link{simSCProfiles}} for more information.
}
\details{
ZINB-WaVE is a flexible model for zero-inflated count data. This function
carries out the model fit to real single-cell data modeling \eqn{Y_{ij}} (the
count of feature \eqn{j} for sample \eqn{i}) as a random variable following a
zero-inflated negative binomial (ZINB) distribution. The estimated parameters
will be used for the simulation of new single-cell expression profiles by
sampling a negative binomial distribution and inserting dropouts from a
binomial distribution. To do so, \pkg{digitalDLSorteR} uses the
\code{\link[zinbwave]{zinbFit}} function from the \pkg{zinbwave} package
(Risso et al., 2018). For more details about the model, see Risso et al.,
2018.
}
\examples{
set.seed(123) # reproducibility
sce <- SingleCellExperiment::SingleCellExperiment(
  assays = list(
    counts = matrix(
      rpois(30, lambda = 5), nrow = 15, ncol = 10,
      dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10)))
    )
  ),
  colData = data.frame(
    Cell_ID = paste0("RHC", seq(10)),
    Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10,
                       replace = TRUE)
  ),
  rowData = data.frame(
    Gene_ID = paste0("Gene", seq(15))
  )
)
DDLS <- loadSCProfiles(
  single.cell.data = sce,
  cell.ID.column = "Cell_ID",
  gene.ID.column = "Gene_ID"
)
DDLS <- estimateZinbwaveParams(
  object = DDLS,
  cell.type.column = "Cell_Type",
  cell.ID.column = "Cell_ID",
  gene.ID.column = "Gene_ID",
  subset.cells = 2,
  verbose = TRUE
)

}
\references{
Risso, D., Perraudeau, F., Gribkova, S. et al. (2018). A general
and flexible method for signal extraction from single-cell RNA-seq data.
Nat Commun 9, 284. doi: \doi{10.1038/s41467-017-02554-5}.

Torroja, C. and Sánchez-Cabo, F. (2019). digitalDLSorter: A Deep Learning
algorithm to quantify immune cell populations based on scRNA-Seq data.
Frontiers in Genetics 10, 978. doi: \doi{10.3389/fgene.2019.00978}.
}
\seealso{
\code{\link{simSCProfiles}}
}
