% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/integration.R
\name{IntegrateData}
\alias{IntegrateData}
\title{Integrate data}
\usage{
IntegrateData(
  anchorset,
  new.assay.name = "integrated",
  normalization.method = c("LogNormalize", "SCT"),
  features = NULL,
  features.to.integrate = NULL,
  dims = 1:30,
  k.weight = 100,
  weight.reduction = NULL,
  sd.weight = 1,
  sample.tree = NULL,
  preserve.order = FALSE,
  do.cpp = TRUE,
  eps = 0,
  verbose = TRUE
)
}
\arguments{
\item{anchorset}{An \code{\link{AnchorSet}} object generated by
\code{\link{FindIntegrationAnchors}}}

\item{new.assay.name}{Name for the new assay containing the integrated data}

\item{normalization.method}{Name of normalization method used: LogNormalize
or SCT}

\item{features}{Vector of features to use when computing the PCA to determine
the weights. Only set if you want a different set from those used in the
anchor finding process}

\item{features.to.integrate}{Vector of features to integrate. By default,
will use the features used in anchor finding.}

\item{dims}{Number of dimensions to use in the anchor weighting procedure}

\item{k.weight}{Number of neighbors to consider when weighting anchors}

\item{weight.reduction}{Dimension reduction to use when calculating anchor
weights. This can be one of:
\itemize{
   \item{A string, specifying the name of a dimension reduction present in
   all objects to be integrated}
   \item{A vector of strings, specifying the name of a dimension reduction to
   use for each object to be integrated}
   \item{A vector of \code{\link{DimReduc}} objects, specifying the object to
   use for each object in the integration}
   \item{NULL, in which case a new PCA will be calculated and used to
   calculate anchor weights}
}
Note that, if specified, the requested dimension reduction will only be used
for calculating anchor weights in the first merge between reference and
query, as the merged object will subsequently contain more cells than was in
query, and weights will need to be calculated for all cells in the object.}

\item{sd.weight}{Controls the bandwidth of the Gaussian kernel for weighting}

\item{sample.tree}{Specify the order of integration. If NULL, will compute
automatically.}

\item{preserve.order}{Do not reorder objects based on size for each pairwise
integration.}

\item{do.cpp}{Run cpp code where applicable. This argument is being
deprecated and will be set to TRUE by default.}

\item{eps}{Error bound on the neighbor finding algorithm (from
\code{\link{RANN}})}

\item{verbose}{Print progress bars and output}
}
\value{
Returns a \code{\link{Seurat}} object with a new integrated
\code{\link{Assay}}. If \code{normalization.method = "LogNormalize"}, the
integrated data is returned to the \code{data} slot and can be treated as
log-normalized, corrected data. If \code{normalization.method = "SCT"}, the
integrated data is returned to the \code{scale.data} slot and can be treated
as centered, corrected Pearson residuals.
}
\description{
Perform dataset integration using a pre-computed \code{\link{AnchorSet}}.
}
\details{
The main steps of this procedure are outlined below. For a more detailed
description of the methodology, please see Stuart, Butler, et al Cell 2019.
\url{https://doi.org/10.1016/j.cell.2019.05.031};
\url{https://doi.org/10.1101/460147}

For pairwise integration:

\itemize{
  \item{Construct a weights matrix that defines the association between each
  query cell and each anchor. These weights are computed as 1 - the distance
  between the query cell and the anchor divided by the distance of the query
  cell to the \code{k.weight}th anchor multiplied by the anchor score
  computed in \code{\link{FindIntegrationAnchors}}. We then apply a Gaussian
  kernel width a bandwidth defined by \code{sd.weight} and normalize across
  all \code{k.weight} anchors.}
  \item{Compute the anchor integration matrix as the difference between the
  two expression matrices for every pair of anchor cells}
  \item{Compute the transformation matrix as the product of the integration
  matrix and the weights matrix.}
  \item{Subtract the transformation matrix from the original expression
  matrix.}
}

For multiple dataset integration, we perform iterative pairwise integration.
To determine the order of integration (if not specified via
\code{sample.tree}), we
\itemize{
  \item{Define a distance between datasets as the total number of cells in
  the smaller dataset divided by the total number of anchors between the two
  datasets.}
  \item{Compute all pairwise distances between datasets}
  \item{Cluster this distance matrix to determine a guide tree}
}
}
\examples{
\dontrun{
# to install the SeuratData package see https://github.com/satijalab/seurat-data
library(SeuratData)
data("panc8")

# panc8 is a merged Seurat object containing 8 separate pancreas datasets
# split the object by dataset
pancreas.list <- SplitObject(panc8, split.by = "tech")

# perform standard preprocessing on each object
for (i in 1:length(pancreas.list)) {
  pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE)
  pancreas.list[[i]] <- FindVariableFeatures(
    pancreas.list[[i]], selection.method = "vst",
    nfeatures = 2000, verbose = FALSE
  )
}

# find anchors
anchors <- FindIntegrationAnchors(object.list = pancreas.list)

# integrate data
integrated <- IntegrateData(anchorset = anchors)
}

}
\references{
Stuart T, Butler A, et al. Comprehensive Integration of
Single-Cell Data. Cell. 2019;177:1888-1902 \url{https://doi.org/10.1016/
j.cell.2019.05.031}
}
