% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stan_glm.R, R/stan_glm.fit.R
\name{stan_glm}
\alias{stan_glm}
\alias{stan_glm.fit}
\alias{stan_glm.nb}
\title{Bayesian generalized linear models via Stan}
\usage{
stan_glm(formula, family = gaussian(), data, weights, subset,
  na.action = NULL, offset = NULL, model = TRUE, x = FALSE, y = TRUE,
  contrasts = NULL, ..., prior = normal(), prior_intercept = normal(),
  prior_ops = prior_options(), prior_PD = FALSE, algorithm = c("sampling",
  "optimizing", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE,
  sparse = FALSE)

stan_glm.nb(formula, data, weights, subset, na.action = NULL, offset = NULL,
  model = TRUE, x = FALSE, y = TRUE, contrasts = NULL, link = "log",
  ..., prior = normal(), prior_intercept = normal(),
  prior_ops = prior_options(), prior_PD = FALSE, algorithm = c("sampling",
  "optimizing", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE)

stan_glm.fit(x, y, weights = rep(1, NROW(x)), offset = rep(0, NROW(x)),
  family = gaussian(), ..., prior = normal(), prior_intercept = normal(),
  prior_ops = prior_options(), group = list(), prior_PD = FALSE,
  algorithm = c("sampling", "optimizing", "meanfield", "fullrank"),
  adapt_delta = NULL, QR = FALSE, sparse = FALSE)
}
\arguments{
\item{formula, data, subset}{Same as \code{\link[stats]{glm}}.}

\item{family}{Same as \code{\link[stats]{glm}}, except negative binomial GLMs
are also possible using the \code{\link{neg_binomial_2}} family object.}

\item{na.action, contrasts}{Same as \code{\link[stats]{glm}}, but
rarely specified.}

\item{model, offset, weights}{Same as \code{\link[stats]{glm}}.}

\item{x, y}{In \code{stan_glm, stan_glm.nb}, logical scalars indicating whether to
return the design matrix and response vector. In \code{stan_glm.fit},
a design matrix and response vector.}

\item{...}{Further arguments passed to the function in the \pkg{rstan} 
package (\code{\link[rstan]{sampling}}, \code{\link[rstan]{vb}}, or 
\code{\link[rstan]{optimizing}}), corresponding to the estimation method 
named by \code{algorithm}. For example, if \code{algorithm} is
\code{"sampling"} it is possibly to specify \code{iter}, \code{chains},
\code{cores}, \code{refresh}, etc.}

\item{prior}{The prior distribution for the regression coefficients. 
\code{prior} can be a call to \code{normal}, \code{student_t},
\code{cauchy}, \code{hs} or \code{hs_plus}. See \code{\link{priors}} for
details. To to omit a prior ---i.e., to use a flat (improper) uniform
prior--- set \code{prior} to \code{NULL}.}

\item{prior_intercept}{The prior distribution for the intercept. 
\code{prior_intercept} can be a call to \code{normal}, \code{student_t} or 
\code{cauchy}. See \code{\link{priors}} for details. To to omit a prior 
---i.e., to use a flat (improper) uniform prior--- set 
\code{prior_intercept} to \code{NULL}. (\strong{Note:} if a dense 
representation of the design matrix is utilized ---i.e., if the
\code{sparse} argument is left at its default value of \code{FALSE}--- then
the prior distribution for the intercept is set so it applies to the value
when all predictors are centered.)}

\item{prior_ops}{Additional options related to prior distributions. Can 
be \code{NULL} to omit a prior on the dispersion and see 
\code{\link{prior_options}} otherwise.}

\item{prior_PD}{A logical scalar (defaulting to \code{FALSE}) indicating
whether to draw from the prior predictive distribution instead of
conditioning on the outcome.}

\item{algorithm}{Character string (possibly abbreviated) indicating the 
estimation approach to use. Can be \code{"sampling"} for MCMC (the
default), \code{"optimizing"} for optimization, \code{"meanfield"} for
variational inference with independent normal distributions, or
\code{"fullrank"} for variational inference with a multivariate normal
distribution. See \code{\link{rstanarm-package}} for more details on the
estimation algorithms. NOTE: not all fitting functions support all four
algorithms.}

\item{adapt_delta}{Only relevant if \code{algorithm="sampling"}. See 
\code{\link{adapt_delta}} for details.}

\item{QR}{A logical scalar (defaulting to \code{FALSE}) but if \code{TRUE}
applies a scaled \code{\link{qr}} decomposition to the design matrix, 
\eqn{X = Q^\ast R^\ast}{X = Q* R*}, where 
\eqn{Q^\ast = Q \sqrt{n-1}}{Q* = Q (n-1)^0.5} and
\eqn{R^\ast = \frac{1}{\sqrt{n-1}} R}{R* = (n-1)^(-0.5) R}. The coefficients
relative to \eqn{Q^\ast}{Q*} are obtained and then premultiplied by the
inverse of \eqn{R^{\ast}}{R*} to obtain coefficients relative to the
original predictors, \eqn{X}. These transformations do not change the 
likelihood of the data but are recommended for computational reasons when 
there are multiple predictors. However, because the coefficients relative
to \eqn{Q^\ast}{Q*} are not very interpretable it is hard to specify an 
informative prior. Setting \code{QR=TRUE} is therefore only recommended 
if you do not have an informative prior for the regression coefficients.}

\item{sparse}{A logical scalar (defaulting to \code{FALSE}) indicating
whether to use a sparse representation of the design (X) matrix. 
Setting this to \code{TRUE} will likely be twice as slow, even if the
design matrix has a considerable number of zeros, but it may allow the
model to be estimated when the computer has too little RAM to
utilize a dense design matrix. If \code{TRUE}, the the design matrix
is not centered (since that would destroy the sparsity) and it is
not possible to specify both \code{QR = TRUE} and \code{sparse = TRUE}.}

\item{link}{For \code{stan_glm.nb} only, the link function to use. See 
\code{\link{neg_binomial_2}}.}

\item{group}{A list, possibly of length zero (the default), but otherwise
having the structure of that produced by \code{\link[lme4]{mkReTrms}} to
indicate the group-specific part of the model. In addition, this list must
have elements for the \code{regularization}, \code{concentration} 
\code{shape}, and \code{scale} components of a \code{\link{decov}}
prior for the covariance matrices among the group-specific coefficients.}
}
\value{
A \link[=stanreg-objects]{stanreg} object is returned 
for \code{stan_glm, stan_glm.nb}.

A \link[=stanfit-class]{stanfit} object (or a slightly modified 
  stanfit object) is returned if \code{stan_glm.fit} is called directly.
}
\description{
Generalized linear modeling with optional prior distributions for 
the coefficients, intercept, and nuisance parameter.
}
\details{
The \code{stan_glm} function is similar in syntax to 
  \code{\link[stats]{glm}} but rather than performing maximum likelihood 
  estimation of generalized linear models, full Bayesian estimation is 
  performed (if \code{algorithm} is \code{"sampling"}) via MCMC. The Bayesian
  model adds independent priors on the coefficients of the GLM. The 
  \code{stan_glm} function calls the workhorse \code{stan_glm.fit} function, 
  but it is also possible to call the latter directly.
  
  The \code{stan_glm.nb} function, which takes the extra argument
  \code{link}, is a simple wrapper for \code{stan_glm} with \code{family =
  \link{neg_binomial_2}(link)}.
}
\examples{
if (!grepl("^sparc",  R.version$platform)) {
### Linear regression
fit <- stan_glm(mpg / 10 ~ ., data = mtcars, QR = TRUE,
                algorithm = "fullrank") # for speed only
plot(fit, ci_level = 0.5)
plot(fit, ci_level = 0.5, pars = "beta")

### Logistic regression
data(lalonde, package = "arm")
dat <- within(lalonde, {
 re74_1k <- re74 / 1000
 re75_1k <- re75 / 1000
})
t7 <- student_t(df = 7)
fmla <- treat ~ re74_1k + re75_1k + educ + black + hisp + 
               married + nodegr + u74 + u75
fit2 <- stan_glm(fmla, data = dat, family = binomial(link="logit"), 
                 prior = t7, prior_intercept = t7, 
                 algorithm = "fullrank") # for speed only
plot(fit2, pars = c("black", "hisp", "nodegr", "u74", "u75"), 
     ci_level = 0.67, outer_level = 1, show_density = TRUE)
pp_check(fit2, check = "resid")
pp_check(fit2, check = "test", test = "mean")
}
\dontrun{
### Poisson regression (example from help("glm")) 
counts <- c(18,17,15,20,10,20,25,13,12)
outcome <- gl(3,1,9)
treatment <- gl(3,3)
fit3 <- stan_glm(counts ~ outcome + treatment, family = poisson(link="log"),
                 prior = normal(0, 1), prior_intercept = normal(0, 5))
plot(fit3, fill_color = "skyblue4", est_color = "maroon")

### Gamma regression (example from help("glm"))
clotting <- data.frame(log_u = log(c(5,10,15,20,30,40,60,80,100)),
                       lot1 = c(118,58,42,35,27,25,21,19,18),
                       lot2 = c(69,35,26,21,18,16,13,12,12))
fit4 <- stan_glm(lot1 ~ log_u, data = clotting, family = Gamma) 
print(fit4, digits = 2)                 
fit5 <- update(fit4, formula = lot2 ~ log_u)
}

}
\references{
Gelman, A. and Hill, J. (2007). \emph{Data Analysis Using
  Regression and Multilevel/Hierarchical Models.} Cambridge University Press,
  Cambridge, UK. (Ch. 3-6)
}
\seealso{
\code{\link{stanreg-methods}} and 
\code{\link[stats]{glm}}.

The various vignettes for \code{stan_glm}.
}

