% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gtReg.R
\name{gtReg}
\alias{gtReg}
\title{Fitting group testing regression models}
\usage{
gtReg(
  type = "sp",
  formula,
  data,
  groupn = NULL,
  subg = NULL,
  coln = NULL,
  rown = NULL,
  arrayn = NULL,
  retest = NULL,
  sens = 1,
  spec = 1,
  linkf = c("logit", "probit", "cloglog"),
  method = c("Vansteelandt", "Xie"),
  sens.ind = NULL,
  spec.ind = NULL,
  start = NULL,
  control = gtRegControl(...),
  ...
)
}
\arguments{
\item{type}{\kbd{"sp"} for simple pooling (Dorfman testing with
or without retests), \kbd{"halving"} for halving
protocol, or \kbd{"array"} for array testing. See 'Details' for
descriptions of the group testing algorithms.}

\item{formula}{an object of class "formula" (or one that
can be coerced to that class); a symbolic description of
the model to be fitted. The details of model specification
are under 'Details'.}

\item{data}{an optional data frame, list, or environment
(or object coercible by \kbd{as.data.frame} to a data frame)
containing the variables in the model. If not found in data,
the variables are taken from \kbd{environment(formula)},
typically the environment from which \code{gtReg} is called.}

\item{groupn}{a vector, list, or data frame of the group
numbers that designates individuals to groups (for use with
simple pooling, \kbd{type = "sp"}, or the halving protocol,
\kbd{type = "halving"}).}

\item{subg}{a vector, list, or data frame of the group numbers
that designates individuals to subgroups (for use with the
halving protocol, \kbd{type = "halving"}).}

\item{coln}{a vector, list, or data frame that specifies the
column group number for each sample (for use with array
testing, \kbd{type = "array"}).}

\item{rown}{a vector, list, or data frame that specifies the
row group number for each sample (for use with array testing,
\kbd{type = "array"}).}

\item{arrayn}{a vector, list, or data frame that specifies the
array number for each sample (for use with array testing,
\kbd{type = "array"}).}

\item{retest}{a vector, list, or data frame of individual
retest results. Default value is \kbd{NULL} for no retests.
See 'Details' for details on how to specify \kbd{retest}.}

\item{sens}{sensitivity of the test. Default value is set
to 1.}

\item{spec}{specificity of the test. Default value is set
to 1.}

\item{linkf}{a character string specifying one of the three
link functions for a binomial model: \kbd{"logit"} (default),
\kbd{"probit"}, or \kbd{"cloglog"}.}

\item{method}{the method to fit the regression model.
Options include \kbd{"Vansteelandt"} (default) or \kbd{"Xie"}.
The \kbd{"Vansteelandt"} option finds estimates by directly
maximizing the likelihood function based on the group responses,
while the \kbd{"Xie"} option uses the EM algorithm to
maximize the likelihood function in terms of the unobserved
individual responses.}

\item{sens.ind}{sensitivity of the individual retests. If NULL,
set to be equal to \kbd{sens}.}

\item{spec.ind}{specificity of the individual retests. If NULL,
set to be equal to \kbd{spec}.}

\item{start}{starting values for the parameters in the linear
predictor.}

\item{control}{a list of parameters for controlling the fitting
process in method \kbd{"Xie"}. These parameters will be passed
to the \code{\link{gtRegControl}} function for use.}

\item{...}{arguments to be passed to \code{\link{gtRegControl}} by
default. See argument \kbd{control}.}
}
\value{
An object of class \kbd{"gtReg"}, a list which may include:
\item{coefficients}{a named vector of coefficients.}
\item{hessian}{estimated Hessian matrix of the negative
log-likelihood function. This serves as an estimate of the
information matrix.}
\item{residuals}{the response residuals. This is the difference
of the observed group responses and the fitted group
responses. Not included for array testing.}
\item{fitted.values}{the fitted mean values of group responses.
Not included for array testing.}
\item{deviance}{the deviance between the fitted model and the
saturated model. Not included for array testing.}
\item{aic}{Akaike's Information Criterion. This is minus twice
the maximized log-likelihood plus twice the number of
coefficients. Not included for array testing.}
\item{null.deviance}{the deviance for the null model,
comparable with \kbd{deviance}. The null model will
include only the intercept, if there is one in the model.
Provided for simple pooling, \kbd{type = "sp"}, only.}
\item{counts}{the number of iterations in \kbd{optim}
(Vansteelandt's method) or the number of iterations in the
EM algorithm (Xie's method, halving, and array testing).}
\item{Gibbs.sample.size}{the number of Gibbs samples
generated in each E step. Provided for array testing,
\kbd{type = "array"}, only.}
\item{df.residual}{the residual degrees of freedom.
Provided for simple pooling, \kbd{type = "sp"}, only.}
\item{df.null}{the residual degrees of freedom for the null model.
Provided for simple pooling, \kbd{type = "sp"}, only.}
\item{z}{the vector of group responses. Not included for array testing.}
\item{call}{the matched call.}
\item{formula}{the formula supplied.}
\item{terms}{the terms object used.}
\item{method}{the method (\kbd{"Vansteelandt"} or \kbd{"Xie"})
used to fit the model. For the halving protocol, the
\kbd{"Xie"} method is used. Not included for array testing.}
\item{link}{the link function used in the model.}
}
\description{
Fits the group testing regression model specified
through a symbolic description of the linear predictor and
descriptions of the group testing setting. This function allows
for fitting regression models with simple pooling, halving, or array
testing data.
}
\details{
With simple pooling and halving, a typical predictor
has the form \kbd{groupresp ~ covariates} where \kbd{groupresp}
is the (numeric) group response vector. With array testing,
individual samples are placed in a matrix-like grid where
samples are pooled within each row and within each column.
This leads to two kinds of group responses: row and column
group responses. Thus, a typical predictor has the form
\kbd{cbind(col.resp, row.resp) ~ covariates}, where
\kbd{col.resp} is the (numeric) column group response vector
and \kbd{row.resp} is the (numeric) row group response vector.
For all methods, \kbd{covariates} is a series of terms which
specifies a linear predictor for individual responses.
Note that it is actually the unobserved individual responses,
not the observed group responses, which are modeled by the
covariates. When denoting group responses (\kbd{groupresp},
\kbd{col.resp}, and \kbd{row.resp}), a 0 denotes a negative
response and a 1 denotes a positive response, where the
probability of an individual positive response is being
modeled directly.

A terms specification of the form
\kbd{first + second} indicates all the terms in \kbd{first}
together with all the terms in \kbd{second} with duplicates
removed. A specification of the form \kbd{first:second}
indicates the set of terms obtained by taking the interactions
of all terms in \kbd{first} with all terms in \kbd{second}.
The specification \kbd{first*second} indicates the cross of
\kbd{first} and \kbd{second}. This is the same as \kbd{first +
second + first:second}. The terms in the formula will be
re-ordered so that main effects come first, followed by the
interactions, all second-order, all third-order, and so on;
to avoid this, pass a terms object as the formula.

For simple pooling (\kbd{type = "sp"}), the functions \kbd{gtreg.fit},
\kbd{EM}, and \kbd{EM.ret}, where the first corresponds to Vansteelandt's
method described in Vansteelandt et al. (2000) and the last two correspond
to Xie's method described in Xie (2001), are called to carry out the
model fitting. The \kbd{gtreg.fit} function uses the \kbd{optim}
function with default method \kbd{"Nelder-Mead"} to maximize
the likelihood function of the observed group responses.
If this optimization method produces a Hessian matrix of all
zero elements, the \kbd{"SANN"} method in \kbd{optim} is
employed to find the coefficients and Hessian matrix. For
the \kbd{"SANN"} method, the number of iterations in \kbd{optim}
is set to be 10000. For the background on the use of \kbd{optim},
see \kbd{help(optim)}.

The \kbd{EM} and \kbd{EM.ret} functions apply Xie's EM
algorithm to the likelihood function written in terms of the
unobserved individual responses; the functions use \kbd{glm.fit}
to update the parameter estimates within each M step. The
\kbd{EM} function is used when there are no retests and
\kbd{EM.ret} is used when individual retests are available.
Thus, within the \kbd{retest} argument, individual observations
in observed positive groups are 0 (negative) or 1 (positive);
the remaining individual observations are \kbd{NA}s, meaning
that no retest is performed for them. Retests cannot be used
with Vansteelandt's method; a warning message will be given
in this case, and the individual retests will be ignored in
the model fitting. There could be slight differences in the
estimates between Vansteelandt's and Xie's methods (when
retests are not available) due to different convergence criteria.

With simple pooling (i.e., Dorfman testing, two-stage hierarchical
testing), each individual appears in exactly one pool. When only the
group responses are observed, the null degrees of freedom are the number
of groups minus 1 and the residual degrees of freedom are the number of
groups minus the number of parameters. When individual retests are
observed too, it is an open research question for what the degrees of
freedom and the deviance for the null model should be; therefore, the
degrees of freedom and \kbd{null.deviance} will not be displayed.

Under the halving protocol, the \kbd{EM.halving} function
applies Xie's EM algorithm to the
likelihood function written in terms of the unobserved
individual responses; the functions use \kbd{glm.fit} to update
the parameter estimates within each M step. In the halving
protocol, if the initial group tests positive, it is split
into two subgroups. The two subgroups are subsequently tested
and if either subgroup tests positive, the third and final
step is to test all individuals within the subgroup. Thus,
within \kbd{subg}, subgroup responses in observed positive
groups are 0 (negative) or 1 (positive); the remaining
subgroup responses are \kbd{NA}s, meaning that no tests are
performed for them. The individual retests are similarly coded.

With array testing (also known as matrix pooling), the
\kbd{EM.mp} function applies Xie's
EM algorithm to the likelihood function written in terms of the
unobserved individual responses. In each E step, the Gibbs
sampling technique is used to estimate the conditional
probabilities. Because of the large number of Gibbs samples
needed to achieve convergence, the model fitting process could
be quite slow, especially when multiple positive rows and
columns are observed. In this case, we can either increase the
Gibbs sample size to help achieve convergence or loosen the
convergence criteria by increasing \kbd{tol} at the expense
of perhaps poorer estimates. If follow-up retests are performed,
the retest results going into the model will help achieve
convergence faster with the same Gibbs sample size and
convergence criteria. In each M step, we use \kbd{glm.fit} to
update the parameter estimates.

For simple pooling, \kbd{retest} provides individual retest
results for Dorfman's retesting procedure. Under the halving
protocol, \kbd{retest} provides individual retest results
within a subgroup that tests positive. The \kbd{retest}
argument provides individual retest results, where a 0
denotes negative and 1 denotes positive status. An \kbd{NA}
denotes that no retest is performed for that individual.
The default value is \kbd{NULL} for no retests.

For simple pooling, \kbd{control} provides parameters for
controlling the fitting process in the \kbd{"Xie"} method only.

\kbd{gtReg} returns an object of class \kbd{"gtReg"}.
The function \kbd{summary} (i.e., \code{\link{summary.gtReg}}
is used to obtain or print a summary of the results.
The group testing function \kbd{predict} (i.e.,
\code{\link{predict.gtReg}}) is used to make predictions
on \kbd{"gtReg"} objects.
}
\examples{

data(hivsurv)
fit1 <- gtReg(type = "sp", formula  =  groupres ~ AGE + EDUC.,
              data  =  hivsurv, groupn  =  gnum, sens  =  0.9,
              spec  =  0.9, method  =  "Xie")
fit1

set.seed(46)
gt.data <- gtSim(type = "sp", par = c(-12, 0.2),
                 size1 = 700, size2 = 5)
fit2 <- gtReg(type = "sp", formula = gres ~ x, data = gt.data,
              groupn = groupn)
fit2

set.seed(21)
gt.data <- gtSim(type = "sp", par = c(-12, 0.2),
                 size1 = 700, size2 = 6, sens = 0.95, spec = 0.95,
                 sens.ind = 0.98, spec.ind = 0.98)
fit3 <- gtReg(type = "sp", formula = gres ~ x, data = gt.data,
              groupn = groupn, retest = retest, method = "Xie",
              sens = 0.95, spec = 0.95, sens.ind = 0.98,
              spec.ind = 0.98, trace = TRUE)
summary(fit3)

set.seed(46)
gt.data <- gtSim(type = "halving", par = c(-6, 0.1), gshape = 17,
                 gscale = 1.4, size1 = 5000, size2 = 5,
                 sens = 0.95, spec = 0.95)
fit4 <- gtReg(type = "halving", formula = gres ~ x,
              data = gt.data, groupn = groupn, subg = subgroup,
              retest = retest, sens = 0.95, spec = 0.95,
              start = c(-6, 0.1), trace = TRUE)
summary(fit4)

# 5x6 and 4x5 array
set.seed(9128)
sa1a <- gtSim(type = "array", par = c(-7, 0.1), size1 = c(5, 4),
              size2 = c(6, 5), sens = 0.95, spec = 0.95)
sa1 <- sa1a$dframe
\donttest{
fit5 <- gtReg(type = "array",
              formula = cbind(col.resp, row.resp) ~ x,
              data = sa1, coln = coln, rown = rown,
              arrayn = arrayn, sens = 0.95, spec = 0.95,
              tol = 0.005, n.gibbs = 2000, trace = TRUE)
fit5
summary(fit5)}

}
\references{
\insertRef{Vansteelandt2000}{binGroup2}

\insertRef{Xie2001}{binGroup2}
}
\seealso{
\code{\link{gtSim}} for simulation of data in the
group testing form to be used by \kbd{gtReg},
\code{\link{summary.gtReg}} and \code{\link{predict.gtReg}}
for \kbd{gtreg} methods.
}
\author{
The majority of this function was originally written as
\kbd{gtreg.sp}, \kbd{gtreg.halving}, and \kbd{gtreg.mp} by Boan Zhang
for the \code{binGroup} package. Minor modifications have been made for
inclusion of the functions in the \code{binGroup2} package.
}
