% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/isee.R
\name{isee}
\alias{isee}
\alias{isee}
\alias{isee.formula}
\alias{isee.default}
\alias{acts.fit}
\alias{hila.fit}
\title{Interaction stagewise estimating equations}
\usage{
isee(y, ...)

\method{isee}{formula}(formula, data = list(), clusterID, waves = NULL,
  interactionID = NULL, contrasts = NULL, subset, method = "ACTS", ...)

\method{isee}{default}(y, x, waves = NULL, interactionID, method = "ACTS",
  ...)

acts.fit(y, x, interactionID, family, clusterID, waves = NULL,
  corstr = "independence", alpha = NULL, intercept = TRUE, offset = 0,
  control = sgee.control(maxIt = 200, epsilon = 0.05, stoppingThreshold =
  min(length(y), ncol(x)) - intercept, undoThreshold = 0), standardize = TRUE,
  verbose = FALSE, ...)

hila.fit(y, x, interactionID, family, clusterID, waves = NULL,
  corstr = "independence", alpha = NULL, intercept = TRUE, offset = 0,
  control = sgee.control(maxIt = 200, epsilon = 0.05, stoppingThreshold =
  min(length(y), ncol(x)) - intercept, undoThreshold = 0.005),
  standardize = TRUE, verbose = FALSE, ...)
}
\arguments{
\item{y}{Vector of response measures that corresponds with modeling family
given in 'family' parameter. 'y' is assumed to be the same length as
'clusterID' and is assumed to be organized into clusters as dictated by
'clusterID'.}

\item{...}{Not currently used}

\item{formula}{Object of class 'formula'; a symbolic description of
the model to be fitted}

\item{data}{Optional data frame containing the variables in the model.}

\item{clusterID}{Vector of integers that identifies the clusters of response
measures in 'y'. Data and 'clusterID' are assumed to 1) be of equal lengths,
2) sorted so that observations of a cluster are in contiguous rows, and 3)
organized so that 'clusterID' is a vector of consecutive integers.}

\item{waves}{An integer vector which identifies components in clusters.
The length of \code{waves} should be the same as the number of
observations. \code{waves} is automatically generated if none is supplied,
but when using \code{subset} parameter, the \code{waves} parameter must be
provided by the user for proper calculation.}

\item{interactionID}{A (p^2+p)/2 x 2 matrix of interaction IDs. Main effects
have the same (unique) number in both columns for their corresponding row.
Interaction effects have each of their corresponding main effects in the
two columns. it is assumed that main effects are listed first. It is
assumed that the main effect IDs used start at 1 and go up tp the number
of main effects, p.}

\item{contrasts}{An optional list provided when using a formula.
similar to \code{contrasts} from \code{glm}.
See the \code{contrasts.arg} of \code{model.matrix.default}.}

\item{subset}{An optional vector specifying a subset of observations to be
used in the fitting process.}

\item{method}{A character string indicating desired method to be used to
perform interaction selection. Value can either be "ACTS", where an active
set approach is taken and interaction terms are considered for selection
only after main effects are brought in, or "HiLa", where the hierarchical
lasso penalty is used to ensure hierarchy is maintained in each step.
Default Value is "ACTS".}

\item{x}{Design matrix of dimension length(y) x nvars where each row is
represents an obersvation of predictor variables. Assumed to be scaled.}

\item{family}{Modeling family that describes the marginal distribution of
the response. Assumed to be an object such as 'gaussian()' or 'poisson()'}

\item{corstr}{A character string indicating the desired working correlation
structure. The following are implemented : "independence" (default value),
"exchangeable", and "ar1".}

\item{alpha}{An intial guess for the correlation parameter value
between -1 and 1 . If left NULL (the default), the initial estimate is 0.}

\item{intercept}{Binary value indicating where an intercept term is
to be included in the model for estimation. Default is to include an
intercept.}

\item{offset}{Vector of offset value(s) for the linear predictor. 'offset'
is assumed to be either of length one, or of the same length as 'y'.
Default is to have no offset.}

\item{control}{A list of parameters used to contorl the path generation
process; see \code{sgee.control}.}

\item{standardize}{A logical parameter that indicates whether or not
the covariates need to be standardized before fitting (but after generating
interaction terms from main covariates).
If standardized before fitting, the unstandardized
path is returned as the default, with a \code{standardizedPath} and
\code{standardizedX} included
separately. Default value is \code{TRUE}.}

\item{verbose}{Logical parameter indicating whether output should be produced
while isee is running. Default value is FALSE.}
}
\value{
Object of class 'sgee' containing the path of coefficient estimates,
the path of scale estimates, the path of correlation parameter
estimates, and the iteration at which iSEE terminated, and initial regression
values including \code{x}, \code{y}, code{family}, \code{clusterID},
\code{interactionID}, \code{offset}, \code{epsilon}, and \code{numIt}.
}
\description{
Perform model selection with clustered data while considering interaction
terms using one of two stagewise methods. The first (ACTS) uses an active set
approach in which interaction terms are only considered for a given update
if the corresponding main effects have already been added to the model.
The second approach (HiLa) approximates the regularized path for
hierarchical lasso with Generalized Estimating Equations. In this second
approach, the model hierarchy is guaranteed in each individual step, thus
ensuring the desired hierarchy throughout the path.
}
\note{
While the two different possible methods that can be used with
\code{isee} reflect two different "styles" of stagewise estimation,
both achieve a desired hierarchy in the resulting model paths.

When considering models with interaction terms, there are three forms
of hierarchy that may be present. Strong hierarchy implies that
interaction effects are included in the model only if both of its
corresponding main effects are also included in the model. Weak hierarchy
implies that an interaction effect can be in the model only if AT LEAST
one of its corresponding main effects is also included. The third type
of hierarchy is simply a lack of hierarchy; that is an interaction term
can be included regardless of main effects.

In practice strong hierarchy is usually what is desired as it is the
simplest to interpret, but requires a higher amount of computation when
performing model selection. Weak hierarchy is sometimes used as a compromise
between the interpret-ability of strong hierarchy and the computational ease
of no hierarchy. Both \code{isee} methods only implement strong hierarchy
as the use of stagewise procedures greatly reduces the computational burden.

The active set appraoch, ACTS, tends to have slightly better predictive
and model selection performance when the true model is closer to a purely
strong hierarchy, but HiLa tends to do better if the true model hierarchy
is closer to having a purely weak hierarchy. Thus, in practice, it is
important to use external information and judgement to determine which
approach is more appropriate.
}
\examples{

#####################
## Generate test data
#####################

## Initialize covariate values
p <- 5 
beta <- c(1, 0, 1.5, 0, .5, ## Main effects
          rep(0.5,4), ## Interaction terms
          0.5, 0, 0.5,
          0,1,
          0)


generatedData <- genData(numClusters = 50,
                         clusterSize = 4,
                         clusterRho = 0.6,
                         clusterCorstr = "exchangeable",
                         yVariance = 1,
                         xVariance = 1,
                         beta = beta,
                         numMainEffects = p,
                         family = gaussian(),
                         intercept = 1)

 
## Perform Fitting by providing formula and data
genDF <- data.frame(Y = generatedData$y, X = generatedData$xMainEff)

## Using "ACTS" method
coefMat1 <- isee(formula(paste0("Y~(",
                               paste0("X.", 1:p, collapse = "+"),
                                 ")^2")),
                  data = genDF,
                  family = gaussian(),
                  clusterID = generatedData$clusterID,
                  corstr = "exchangeable",
                  method = "ACTS",
                  control = sgee.control(maxIt = 50, epsilon = 0.5))

## Using "HiLa" method
coefMat2 <- isee(formula(paste0("Y~(",
                               paste0("X.", 1:p, collapse = "+"),
                                 ")^2")),
                  data = genDF,
                  family = gaussian(),
                  clusterID = generatedData$clusterID,
                  corstr = "exchangeable",
                  method = "HiLa",
                  control = sgee.control(maxIt = 50, epsilon = 0.5))

}
\references{
Vaughan, G., Aseltine, R., Chen, K., Yan, J., (2017). Efficient
interaction selection for clustered data via stagewise generalized
estimating equations.  Department of Statistics, University of
Connecticut. Technical Report.

Zhu, R., Zhao, H., and Ma, S. (2014). Identifying
gene-environment and gene-gene interactions using a progressive
penalization approach. Genetic Epidemiology 38, 353--368.

Bien, J., Taylor, J., and Tibshirani, R. (2013). A lasso
for hierarchical interactions. The Annals of Statistics 41, 1111--1141.
}
\author{
Gregory Vaughan
}
