% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods.R
\name{plot.vsel}
\alias{plot.vsel}
\title{Plot summary statistics of a variable selection}
\usage{
\method{plot}{vsel}(
  x,
  nterms_max = NULL,
  stats = "elpd",
  deltas = FALSE,
  alpha = 2 * pnorm(-1),
  baseline = if (!inherits(x$refmodel, "datafit")) "ref" else "best",
  thres_elpd = NA,
  resp_oscale = TRUE,
  ...
)
}
\arguments{
\item{x}{An object of class \code{vsel} (returned by \code{\link[=varsel]{varsel()}} or \code{\link[=cv_varsel]{cv_varsel()}}).}

\item{nterms_max}{Maximum submodel size for which the statistics are
calculated. Using \code{NULL} is effectively the same as using
\code{length(solution_terms(object))}. Note that \code{nterms_max} does not count the
intercept, so use \code{nterms_max = 0} for the intercept-only model. For
\code{\link[=plot.vsel]{plot.vsel()}}, \code{nterms_max} must be at least \code{1}.}

\item{stats}{One or more character strings determining which performance
statistics (i.e., utilities or losses) to calculate. Available statistics
are:
\itemize{
\item \code{"elpd"}: (expected) sum of log predictive densities.
\item \code{"mlpd"}: mean log predictive density, that is, \code{"elpd"} divided by the
number of observations.
\item \code{"mse"}: mean squared error (only available in the situations mentioned
in section "Details" below).
\item \code{"rmse"}: root mean squared error (only available in the situations
mentioned in section "Details" below). For the corresponding standard error
and lower and upper confidence interval bounds, bootstrapping is used.
\item \code{"acc"} (or its alias, \code{"pctcorr"}): classification accuracy (only
available in the situations mentioned in section "Details" below).
\item \code{"auc"}: area under the ROC curve (only available in the situations
mentioned in section "Details" below). For the corresponding standard error
and lower and upper confidence interval bounds, bootstrapping is used.
}}

\item{deltas}{If \code{TRUE}, the submodel statistics are estimated as differences
from the baseline model (see argument \code{baseline}). With a "difference
\emph{from} the baseline model", we mean to take the submodel statistic minus
the baseline model statistic (not the other way round).}

\item{alpha}{A number determining the (nominal) coverage \code{1 - alpha} of the
normal-approximation (or bootstrap; see argument \code{stats}) confidence
intervals. For example, in case of the normal approximation, \code{alpha = 2 * pnorm(-1)} corresponds to a confidence interval stretching by one standard
error on either side of the point estimate.}

\item{baseline}{For \code{\link[=summary.vsel]{summary.vsel()}}: Only relevant if \code{deltas} is \code{TRUE}.
For \code{\link[=plot.vsel]{plot.vsel()}}: Always relevant. Either \code{"ref"} or \code{"best"}, indicating
whether the baseline is the reference model or the best submodel found (in
terms of \code{stats[1]}), respectively.}

\item{thres_elpd}{Only relevant if \code{any(stats \%in\% c("elpd", "mlpd"))}. The
threshold for the ELPD difference (taking the submodel's ELPD minus the
baseline model's ELPD) above which the submodel's ELPD is considered to be
close enough to the baseline model's ELPD. An equivalent rule is applied in
case of the MLPD. See \code{\link[=suggest_size]{suggest_size()}} for a formalization. Supplying \code{NA}
deactivates this.}

\item{resp_oscale}{Only relevant for the latent projection. A single logical
value indicating whether to calculate the performance statistics on
response scale (\code{TRUE}) or on latent scale (\code{FALSE}).}

\item{...}{Arguments passed to the internal function which is used for
bootstrapping (if applicable; see argument \code{stats}). Currently, relevant
arguments are \code{B} (the number of bootstrap samples, defaulting to \code{2000})
and \code{seed} (see \code{\link[=set.seed]{set.seed()}}, defaulting to
\code{sample.int(.Machine$integer.max, 1)}, but can also be \code{NA} to not call
\code{\link[=set.seed]{set.seed()}} at all).}
}
\description{
This is the \code{\link[=plot]{plot()}} method for \code{vsel} objects (returned by \code{\link[=varsel]{varsel()}} or
\code{\link[=cv_varsel]{cv_varsel()}}).
}
\details{
The \code{stats} options \code{"mse"} and \code{"rmse"} are only available for:
\itemize{
\item the traditional projection,
\item the latent projection with \code{resp_oscale = FALSE},
\item the latent projection with \code{resp_oscale = TRUE} in combination with
\verb{<refmodel>$family$cats} being \code{NULL}.
}

The \code{stats} option \code{"acc"} (= \code{"pctcorr"}) is only available for:
\itemize{
\item the \code{\link[=binomial]{binomial()}} family in case of the traditional projection,
\item all families in case of the augmented-data projection,
\item the \code{\link[=binomial]{binomial()}} family (on the original response scale) in case of the
latent projection with \code{resp_oscale = TRUE} in combination with
\verb{<refmodel>$family$cats} being \code{NULL},
\item all families (on the original response scale) in case of the latent
projection with \code{resp_oscale = TRUE} in combination with
\verb{<refmodel>$family$cats} being not \code{NULL}.
}

The \code{stats} option \code{"auc"} is only available for:
\itemize{
\item the \code{\link[=binomial]{binomial()}} family in case of the traditional projection,
\item the \code{\link[=binomial]{binomial()}} family (on the original response scale) in case of the
latent projection with \code{resp_oscale = TRUE} in combination with
\verb{<refmodel>$family$cats} being \code{NULL}.
}
}
\section{Horizontal lines}{
As long as the reference model's performance is computable, it is always
shown in the plot as a dashed red horizontal line. If \code{baseline = "best"},
the baseline model's performance is shown as a dotted black horizontal line.
If \code{!is.na(thres_elpd)} and \code{any(stats \%in\% c("elpd", "mlpd"))}, the value
supplied to \code{thres_elpd} (which is automatically adapted internally in case
of the MLPD or \code{deltas = FALSE}) is shown as a dot-dashed gray horizontal
line for the reference model and, if \code{baseline = "best"}, as a long-dashed
green horizontal line for the baseline model.
}

\examples{
if (requireNamespace("rstanarm", quietly = TRUE)) {
  # Data:
  dat_gauss <- data.frame(y = df_gaussian$y, df_gaussian$x)

  # The "stanreg" fit which will be used as the reference model (with small
  # values for `chains` and `iter`, but only for technical reasons in this
  # example; this is not recommended in general):
  fit <- rstanarm::stan_glm(
    y ~ X1 + X2 + X3 + X4 + X5, family = gaussian(), data = dat_gauss,
    QR = TRUE, chains = 2, iter = 500, refresh = 0, seed = 9876
  )

  # Variable selection (here without cross-validation and with small values
  # for `nterms_max`, `nclusters`, and `nclusters_pred`, but only for the
  # sake of speed in this example; this is not recommended in general):
  vs <- varsel(fit, nterms_max = 3, nclusters = 5, nclusters_pred = 10,
               seed = 5555)
  print(plot(vs))
}

}
