% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/cate.R, R/factor_functions.R
\name{est.confounder.num}
\alias{est.confounder.num}
\alias{est.factor.num}
\title{Estimate the number of confounders}
\usage{
est.confounder.num(formula, X.data = NULL, Y, method = c("bcv", "ed"),
  rmax = 20, nRepeat = 20, bcv.plot = TRUE, log = "")

est.factor.num(Y, method = c("bcv", "ed"), rmax = 20, nRepeat = 12,
  bcv.plot = TRUE, log = "")
}
\arguments{
\item{formula}{a formula indicating the known covariates including both primary variables and nuisance variables, which are seperated by \code{|}. The variables before \code{|} are primary variables and the variables after \code{|} are nuisance variables. It's OK if there is no nuisance variables, then \code{|} is not needed and \code{formula} becomes a typical formula with all the covariates considered primary. An intercept term will still be automatically added as a nuisance variable for the latter case.}

\item{X.data}{the data frame used for \code{formula}}

\item{Y}{outcome, n*p matrix}

\item{method}{method to estimate the number of factors. There are currently two choices,
"ed" is the eigenvalue difference method proposed by Onatski (2010) and "bcv" is the
bi-cross-validation method proposed by Owen and Wang (2015). "bcv" tends to estimate more
weak factors and takes longer time}

\item{rmax}{the maximum number of factors to consider. If the estimated number of factors is rmax,
then users are encouraged to increase rmax and run again. Default is 20.}

\item{nRepeat}{the number of repeats of bi-cross-validation. A larger nRepeat will result in a
more accurate estimate of the bcv error, but will need longer time to run.}

\item{bcv.plot}{whether to plot the relative bcv error versus the number of estimated
ranks. The relative bcv error is the entrywise mean square error devided by the average of
the estimated noise variance.}

\item{log}{if \code{log = "y"}, then the y-axis of the bcv plot is in log scale.}
}
\value{
if \code{method} is "ed", then return the estimated number of confounders/factors.
If \code{method} is "bcv", then return the a list of objects
\describe{
\item{r}{estimated number of confounders/factors}
\item{errors}{the relative bcv errors of length \code{1 + rmax}}
}
}
\description{
Estimate the number of confounders
}
\section{Functions}{
\itemize{
\item \code{est.factor.num}: Estimate the number of factors
}}
\examples{
## example for est.confounder.num
data <- gen.sim.data(n = 50, p = 100, r = 5)
X.data <- data.frame(X1 = data$X1)
est.confounder.num(~ X1, X.data, data$Y, method = "ed")
est.confounder.num(~ X1, X.data, data$Y, method = "bcv")
## example for est.factor.num
n <- 50
p <- 100
r <- 5
Z <- matrix(rnorm(n * r), n, r)
Gamma <- matrix(rnorm(p * r), p, r)
Y <- Z \%*\% t(Gamma) + rnorm(n * p)

est.factor.num(Y, method = "ed")
est.factor.num(Y, method = "bcv")
}
\references{
{
A. B. Owen and J. Wang (2015), Bi-cross-validation for factor analysis. \emph{arXiv:1503.03515}.

A. Onatski (2010), Determining the number of factors from empirical distribution of eigenvalues.
\emph{The Review of Economics and Statistics} 92(4).
}
}

