% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/FIRM.R
\name{FIRM}
\alias{FIRM}
\title{Flexible Integration of Single-Cell RNA-Seq Data}
\usage{
FIRM(
  SS2,
  tenx,
  hvg1,
  hvg2,
  dims,
  all_genes = FALSE,
  res_seq_SS2 = seq(0.1, 2, 0.1),
  res_seq_tenx = seq(0.1, 2, 0.1),
  coreNum = 1,
  verbose = FALSE,
  seed = NULL
)
}
\arguments{
\item{SS2}{\code{Seurat} object for reference dataset (e.g. Smart-seq2).}

\item{tenx}{\code{Seurat} object for query dataset (e.g. 10x Genomics).}

\item{hvg1, hvg2}{Character vectors giving the high-variable gene names
selected in SS2 and tenx, respectively.}

\item{dims}{Integer scalar, number of principal components to use
during integration.}

\item{all_genes}{Logical scalar.  If \code{TRUE} the integration is
carried out on the union of all genes; otherwise only on the
intersected HVGs (default).}

\item{res_seq_SS2, res_seq_tenx}{Numeric vectors of clustering
resolutions to be screened for the reference and query dataset,
respectively.  Defaults to \code{seq(0.1, 2, 0.1)}.}

\item{coreNum}{Integer scalar, number of CPU cores used for parallel
screening.}

\item{verbose}{Logical scalar.  If \code{TRUE} a list containing the
integrated matrix and quality metrics is returned; otherwise only
the integrated expression matrix is returned.}

\item{seed}{Integer, random seed for reproducibility (optional). If NULL, uses current random state.}
}
\value{
By default (\code{verbose = FALSE}) a single \code{matrix} of
batch-corrected, scaled expression values with genes as rows and
combined cells as columns.

If \code{verbose = TRUE} a named \code{list} is returned:
\describe{
  \item{integrated}{As above, the corrected expression matrix.}
  \item{Metric_PCA}{Mean MNN mixing score of the naive PCA
    (no correction).}
  \item{Metric_FIRM}{Mean MNN mixing score of the FIRM-corrected
    embedding (matrix when multiple resolution pairs were screened).}
}
}
\description{
Performs unsupervised integration of two single-cell RNA-seq datasets
by searching for the optimal clustering resolution
pair that maximises mutual-nearest-neighbour (MNN) mixing in the
combined PCA space.  The final integrated expression matrix is returned
after batch-effect correction.
}
\details{
The algorithm performs the following steps:
\enumerate{
  \item PCA on each dataset using the intersected HVGs.
  \item SNN graph construction (via \code{Seurat::FindNeighbors}).
  \item Screening of clustering resolution pairs
    (\code{res_seq_SS2} × \code{res_seq_tenx}) to maximise
    mutual-nearest-neighbour mixing in the joint PCA space.
  \item Batch-effect correction with \code{FIRM_res*} functions.
  \item Final integrated expression matrix is scaled and returned.
}

Quality control: the integrated embedding is compared with the naive
PCA; if correction does not improve mixing the latter is returned.
}
\examples{
\donttest{
set.seed(42)

library(Seurat)
library(FIRM)

data("ExampleData")
prep_SS2  <- prep_data(ExampleData$SS2, hvg_genes = 1000)
Dataset1  <- prep_SS2$Dataset
hvg1      <- prep_SS2$hvg

prep_tenx <- prep_data(ExampleData$tenx, hvg_genes = 1000)
Dataset2  <- prep_tenx$Dataset
hvg2      <- prep_tenx$hvg

res <- FIRM(Dataset1, Dataset2, hvg1, hvg2,
           dims = 15, all_genes = FALSE, seed = 42)

dim(res)
}

}
\references{
Ming, J., Lin, Z., Zhao, J., Wan, X., Ezran, C., Liu, S., ... & TTM Consortium. (2022). FIRM: Flexible integration of single-cell RNA-sequencing data for large-scale multi-tissue cell atlas datasets. \emph{Briefings in bioinformatics}, 23(5).
}
\seealso{
\code{\link{prep_data}} for Data preprocessing.
}
