\name{earth.object}
\alias{earth.object}
\title{An earth object}
\description{
The object returned by the \code{\link{earth}} function.

This is an \code{S3} model of \code{\link{class}} \code{"earth"}.
It is a list with the components listed below.

\emph{Term} refers to a term created during the
forward pass (each line of the output from \code{\link{format.earth}}
is a term).
Term number 1 is always the intercept.
}
\value{
\item{\code{rss}}{
     Residual sum-of-squares (RSS) of the model (summed over all responses,
     if \code{y} has multiple columns).
}
\item{\code{rsq}}{
    \code{1-rss/tss}.
     R-Squared of the model (calculated over all responses,
     and calculated using the \code{weights} argument if it was supplied).
     A measure of how well the model fits the training data.
     Note that \code{tss} is the total sum-of-squares, \code{sum((y - mean(y))^2)}.
}
\item{\code{gcv}}{
     Generalized Cross Validation (GCV) of the model (summed over all responses).
     The GCV is calculated using the \code{penalty} argument.
     For details of the GCV calculation, see
     equation 30 in Friedman's MARS paper and \code{earth:::get.gcv}.
}
\item{\code{grsq}}{
     \code{1-gcv/gcv.null}.
     An estimate of the predictive power of the model (calculated over all responses,
and calculated using the \code{weights} argument if it was supplied).
     \code{gcv.null} is the GCV of an intercept-only model.
     See \dQuote{\emph{Can \code{GRSq} be negative?}} in the vignette.
}
\item{\code{bx}}{
     Matrix of basis functions applied to \code{x}.
     Each column corresponds to a selected term.
     Each row corresponds to a row in in the input matrix \code{x},
     after taking \code{subset}.
     See \code{\link{model.matrix.earth}} for an example of \code{bx} handling.
     Example \code{bx}:\preformatted{     (Intercept)  h(Girth-12.9)  h(12.9-Girth)  h(Girth-12.9)*h(...
[1,]           1            0.0            4.6                    0
[2,]           1            0.0            4.3                    0
[3,]           1            0.0            4.1                    0
...}
% \cr
}
\item{\code{dirs}}{
     Matrix with one row per MARS term, and with with ij-th element equal to\cr

\code{0}  if predictor j is not in term i\cr
\code{-1} if an expression of the form \code{h(const - xj)} is in term i\cr
\code{1}  if an expression of the form \code{h(xj - const)} is in term i\cr
\code{2}  if predictor j should enter term i linearly
(either because specified by the \code{linpreds} argument or because earth
discovered that a knot was unnecessary).\cr

     This matrix includes all terms generated by the forward pass,
     including those not in \code{selected.terms}.
     Note that here the terms may not all be in pairs, because
     although the forward pass add terms as hinged pairs (so both sides of
     the hinge are available as building blocks for further terms), it also
     deletes linearly dependent terms before handing control to the pruning pass.
     Example \code{dirs}:\preformatted{                        Girth Height
(Intercept)                 0      0  # intercept
h(12.9-Girth)              -1      0  # 2nd term uses Girth
h(Girth-12.9)               1      0  # 3rd term uses Girth
h(Girth-12.9)*h(Height-76)  1      1  # 4th term uses Girth and Height
...
}
% \cr
}
\item{\code{cuts}}{
     Matrix with ij-th element equal to the cut point (hinge value)
     for predictor j in term i.
     This matrix includes all terms generated by the forward pass,
     including those not in \code{selected.terms}.
     Note for programmers: the precedent is to use \code{dirs}
     for term names etc. and to only use \code{cuts} where cut information needed.
     Example \code{cuts}:\preformatted{                           Girth Height
(Intercept)                    0      0  # intercept, no cuts
h(12.9-Girth)               12.9      0  # 2nd term has cut at 12.9
h(Girth-12.9)               12.9      0  # 3rd term has cut at 12.9
h(Girth-12.9)*h(Height-76)  12.9     76  # 4th term has two cuts
...}
% \cr
}
\item{\code{prune.terms}}{
     A matrix specifying which terms appear in which pruning pass subsets.
     The row index of \code{prune.terms} is the model size.
     (The model size is the number of terms in the model.
     The intercept is counted as a term.)
     Each row is a vector of term numbers for the best model of that size.
     An element is 0 if the term is not in the model, thus \code{prune.terms} is a
     lower triangular matrix, with dimensions \code{nprune x nprune}.
     The model selected by the pruning pass is at row number \code{length(selected.terms)}.

     Example \code{prune.terms}:\preformatted{     [,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,]    1    0    0    0    0    0    0   # intercept-only model
[2,]    1    2    0    0    0    0    0   # best 2 term model uses terms 1,2
[3,]    1    2    4    0    0    0    0   # best 3 term model uses terms 1,2,4
[4,]    1    2    6    9    0    0    0   # and so on
...}
% \cr
}
\item{\code{selected.terms}}{
     Vector of term numbers in the selected model.
     Can be used as a row index vector into \code{cuts} and \code{dirs}.
     The first element \code{selected.terms[1]} is always 1, the intercept.
}
\item{\code{fitted.values}}{
    Fitted values.
    A matrix with dimensions \code{nrow(y) x ncol(y)}
    after factors in \code{y} have been expanded.
}
\item{\code{residuals}}{
    Residuals.
    A matrix with dimensions \code{nrow(y) x ncol(y)}
    after factors in \code{y} have been expanded.
}
\item{\code{coefficients}}{
    Regression coefficients.
    A matrix with dimensions \code{length(selected.terms) x ncol(y)}
    after factors in \code{y} have been expanded.
    Each column holds the least squares coefficients from regressing that
    column of \code{y} on \code{bx}.
    The first row holds the intercept coefficient(s).
}
\item{\code{rss.per.response}}{
    A vector of the RSS for each response.
    Length is the number of responses, i.e., \code{ncol(y)} after factors in \code{y} have been expanded.
    The \code{rss} component above is  equal to \code{sum(rss.per.response)}.
}
\item{\code{rsq.per.response}}{
    A vector of the R-Squared for each response
(where R-Squared is calculated using the \code{weights} argument if it was supplied).
    Length is the number of responses.
}
\item{\code{gcv.per.response}}{
    A vector of the GCV for each response.
    Length is the number of responses.
    The \code{gcv} component above is equal to \code{sum(gcv.per.response)}.
}
\item{\code{grsq.per.response}}{
    A vector of the GRSq for each response
(calculated using the \code{weights} argument if it was supplied).
    Length is the number of responses.
}
\item{\code{rss.per.subset}}{
     A vector of the RSS
     for each model subset generated by the pruning pass.
     Length is \code{nprune}.
     For multiple responses, the RSS is summed over all responses for each subset.
     The \code{rss} above is\cr
     \code{rss.per.subset[length(selected.terms)]}.
     The RSS of an intercept only-model is \code{rss.per.subset[1]}.
}
\item{\code{gcv.per.subset}}{
     A vector of the GCV for each model in \code{prune.terms}.
     Length is \code{nprune}.
     For multiple responses, the GCV is summed over all responses for each subset.
     The \code{gcv} above is \code{gcv.per.subset[length(selected.terms)]}.
     The GCV of an intercept-only model is \code{gcv.per.subset[1]}.
}
\item{\code{leverages}}{
    Diagonal of the hat matrix (from the linear regression of the response on \code{bx}).
}
\item{\code{penalty,nk,thresh}}{
     Copies of the corresponding arguments to \code{earth}.
}
\item{\code{pmethod,nprune}}{
     Copies of the corresponding arguments to \code{earth}.
}
\item{\code{weights,wp}}{
     Copies of the corresponding arguments to \code{earth}.
}
\item{\code{termcond}}{
     Reason the forward pass terminated (an integer).
}
\item{\code{call}}{
     The call used to invoke \code{earth}.
}
\item{\code{terms}}{
     Model frame terms.
     This component exists only if the model was built using \code{earth.formula}.
}
\item{\code{modvars}}{
A matrix specifying which input variables
are used in each column of the model matrix.
(This field is \bold{new in earth 5.2.0}.)\cr
Columns correspond to columns of the model matrix (same as cols of \code{dirs}, see above).\cr
Rows correspond to variables in the formula.

For example, the formula:\preformatted{    survived ~ age + pclass + sqrt(age) - sex
}
results in:

\code{attr(terms,"factors")}:\preformatted{                age  pclass  sqrt(age)
    survived      0       0          0  # the response will be dropped
    age           1       0          0
    pclass        0       1          0
    sqrt(age)     0       0          1  # sqrt(age) will be merged with age
    sex           0       0          0  # sex is unused and will be dropped
}
\code{modvars}:\preformatted{            age pclass2nd pclass3rd sqrt(age)
    age       1         0         0         1  # age and sqrt(age) use "age"
    pclass    0         1         1         0  # pclass2nd and pclass3rd use "pclass"
}
}
\item{\code{namesx}}{
     Variable names in the input data.  Deprecated (subsumed by \code{modvars}).
}
\item{\code{xlevels}}{
     This component exists only if the model was built using \code{earth.formula}.\cr
     Same as \code{lm}.  A record of the levels of the factors used in fitting,
     needed under certain conditions by \code{predict.earth}.
}
\item{\code{levels}}{
     This component exists only if the model was built using \code{earth.default}.\cr
     Levels of \code{y} if \code{y} is a \code{\link{factor}},\cr
     \code{c(FALSE,TRUE)} if \code{y} is \code{\link{logical}},\cr
     Else \code{NULL}.
  \cr\cr
  \bold{The following fields appear only if \code{earth}'s argument \code{keepxy} is \code{TRUE}.}
}
% \item{\code{x}}{}
% \item{\code{y}}{}
% \item{\code{data}}{}
% \item{\code{subset}}{}{
\item{\code{x},\code{y},\code{data},\code{subset}}{
Copies of the corresponding arguments to \code{earth}.
Only exist if \code{keepxy=TRUE}.
  \cr\cr
  \bold{The following fields appear only if \code{earth}'s \code{glm} argument is used.}
}
\item{\code{glm.list}}{
     List of GLM models. Each element is the value returned by \code{earth}'s
     internal call to \code{\link{glm}} for each response.\cr
     Thus if there is a single response (or a single binomial pair, see
     \dQuote{\emph{Binomial pairs}} in the vignette)
     this will be a one element list and you access the GLM model with
     \code{earth.mod$glm.list[[1]]}.
}
\item{\code{glm.coefficients}}{
    GLM regression coefficients.
    Analogous to the \code{coefficients} field described above but for the GLM model(s).
    A matrix with dimensions \code{length(selected.terms) x ncol(y)}
    after factors in \code{y} have been expanded.
    Each column holds the coefficients from the GLM regression of that
    column of \code{y} on \code{bx}.
    This duplicates, for convenience, information buried in \code{glm.list}.
}
\item{\code{glm.stats}}{
       GLM summary statistics such as \code{devratio}, \code{AIC}, and \code{iters}.
}
\item{\code{glm.bpairs}}{
       Is \code{NULL} unless there are paired binomial columns.
       Else a logical vector \code{c(TRUE, FALSE)}.
       See \dQuote{\emph{Binomial pairs}} in the vignette.
       Retained for backwards compatibility with old versions of earth.
   \cr\cr
   \bold{The following fields appear only if the \code{nfold} argument is greater than 1.}
}
\item{\code{cv.list}}{
      List of \code{earth} models, one model for each fold (\code{ncross * nfold} models).\cr
      The fold models have two extra fields,
      \code{icross} (an integer from \code{1} to \code{ncross})
      and \code{ifold} (an integer from \code{1} to \code{nfold}).\cr
      To save memory, lengthy fields
      in the fold models are removed unless you use \code{keepxy=TRUE}.
      The \dQuote{lengthy fields} are \code{$bx}, \code{$fitted.values}, and \code{$residuals}.
}
\item{\code{cv.nterms}}{
      Vector of length \code{ncross * nfold + 1}.
      Number of MARS terms in the model generated at each cross-validation fold,
      with the final element being the mean of these.
}
\item{\code{cv.nvars}}{
      Vector of length \code{ncross * nfold + 1}.
      Number of predictors in the model generated at each cross-validation fold,
      with the final element being the mean of these.
}
\item{\code{cv.groups}}{
     Specifies which cases went into which folds.
     Matrix with two columns and number of rows equal to the the number of cases \code{nrow(x)}
     Elements of the first column specify the cross-validation number, \code{1:ncross}.
     Elements of the second column specify the fold number, \code{1:nfold}.
}
\item{\code{cv.rsq.tab}}{
      Matrix with \code{ncross * nfold + 1} rows and \code{nresponse+1} columns,
      where \code{nresponse} is the number of responses,
      i.e., \code{ncol(y)} after factors in \code{y} have been expanded.
      The first \code{nresponse} elements of a row are the \code{cv.rsq}'s on
      the out-of-fold data for each response of the model generated at that row's fold.
      (A \code{cv.rsq} is calculated from predictions on the out-of-fold data
      using the best model built from the in-fold data;
      where \dQuote{best} means the model was selected using the in-fold GCV.
      The R-Squareds are calculated using the \code{weights} argument if it was supplied.
      The final column holds the row mean (a weighted mean if \code{wp} if specified)).
      The final row holds the column means.
      The values in this final row is the mean \code{cv.rsq}
      printed by \code{\link{summary.earth}}.
\cr\cr
      Example for a single response model (where the \code{mean} column
is redundant but included for uniformity with multiple response models):
\preformatted{           y  mean
fold1  0.909 0.909
fold2  0.869 0.869
fold3  0.952 0.952
fold4  0.157 0.157
fold5  0.961 0.961
mean   0.769 0.769
}
      Example for a multiple response model:
\preformatted{         y1   y2    y3   mean
fold1 0.915 0.951 0.944 0.937
fold2 0.962 0.970 0.970 0.968
fold3 0.914 0.940 0.942 0.932
fold4 0.907 0.929 0.925 0.920
fold5 0.947 0.987 0.979 0.971
mean  0.929 0.955 0.952 0.946

}
}
\item{\code{cv.class.rate.tab}}{
      Like \code{cv.rsq.tab} but is the classification rate at each fold
      i.e. the fraction of classes correctly predicted.
      Models with discrete response only.
      Calculated with \code{thresh=.5} for binary responses.
      For responses with more than two
      levels, the final row is the overall classification rate.  The other
      rows are the classification rates for each level (the level
      versus not-the-level), which are usually higher than the overall
      classification rate (predicting the level versus not-the-level is
      easier than correctly predicting one of many levels).
      The \code{weights} argument is ignored for all cross-validation stats except R-Squareds.
}
\item{\code{cv.maxerr.tab}}{
      Like \code{cv.rsq.tab} but is the \code{MaxErr} at each fold.
      This is the signed max absolute value at each fold.
      Results are aggregated for the final column and final row
      using the signed max absolute value.
      The \emph{signed max absolute value} is defined
      as the maximum of the absolute difference
      between the predicted and observed response values, multiplied
      by \code{-1} if the sign of that difference is negative.
}
\item{\code{cv.auc.tab}}{
      Like \code{cv.rsq.tab} but is the \code{AUC} at each fold.
      Binomial models only.
}
\item{\code{cv.cor.tab}}{
      Like \code{cv.rsq.tab} but is the \code{cor} at each fold.
      Poisson models only.
}
\item{\code{cv.deviance.tab}}{
      Like \code{cv.rsq.tab} but is the \code{MeanDev} at each fold.
      Binomial models only.
}
\item{\code{cv.calib.int.tab}}{
      Like \code{cv.rsq.tab} but is the \code{CalibInt} at each fold.
      Binomial models only.
}
\item{\code{cv.calib.slope.tab}}{
      Like \code{cv.rsq.tab} but is the \code{CalibSlope} at each fold.
      Binomial models only.
}
\item{\code{cv.oof.rsq.tab}}{
     Generated only if \code{keepxy=TRUE} or \code{pmethod="cv"}.\cr
     A matrix with \code{ncross * nfold + 1} rows and \code{max.nterms} columns,
     Each element holds an out-of-fold RSq (\code{oof.rsq}),
     calculated from predictions from the out-of-fold observations using
     the model built with the in-fold data.  The final row is the mean over
     all folds.
     The R-Squareds are calculated using the \code{weights} argument if it was supplied.
}
\item{\code{cv.infold.rsq.tab}}{
     Generated only if \code{keepxy=TRUE}.
     Like \code{cv.oof.rsq.tab} but from predictions made on the in-fold observations.
}
\item{\code{cv.oof.fit.tab}}{
      Generated only if the \code{varmod.method} argument is used.
      Predicted values on the out-of-fold data.
      Dataframe with \code{nrow(data)} rows and \code{ncross} columns.
\cr\cr
   \bold{The following field appears only if the \code{varmod.method} is specified.}
}
\item{\code{varmod}}{
      An object of class \code{"varmod"}.
      See the \code{\link[=predict.varmod]{varmod}} help page for a description.
      Only appears if the \code{varmod.method} argument is used.
}
}
\seealso{
  \code{\link{earth}}
}
