\name{tpm}
\alias{tpm}
\alias{tpm,formula-method}
\alias{summary,twopartm-method}
\alias{print.twopartm}
\alias{show,twopartm-method}

\title{Fit Two-part Regression Models for Zero-inflated Data}

\description{
  Fit two-part regression models for zero-inflated data. The first-model is a binomial regression model for indicators about any non-zero responses. The second-model is a generalized linear regression model for non-zero response values.
}

\usage{
tpm(formula_part1, formula_part2 = NULL,data, link_part1 = c("logit",
"probit", "cloglog", "cauchit", "log"), family_part2 = gaussian(), weights = NULL, \dots)

\S4method{summary}{twopartm}(object,\dots)

}

\arguments{
    \item{formula_part1}{formula specifying the dependent variable and the regressors used for the first-part model, i.e., the binomial model for probabilities of non-zero responses. If \code{formula_part2} is \code{NULL}, the same regressors specified here are employed in both parts.}
    \item{formula_part2}{formula specifying the dependent variable and the regressors used for the second-part model, i.e., the glm model for non-zero responses. If it's \code{NULL}, the same regressors specified in \code{formula_part1} are employed in the second-part model.}
    \item{data}{a data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the models for both parts.}
    \item{link_part1}{character string specifying the link function of the first-part model, i.e., the binomial model for probabilities of non-zero responses. It could be \code{logit}, \code{probit}, \code{cauchit}, (corresponding to logistic, normal and Cauchy CDFs respectively) \code{log} or \code{cloglog} (complementary log-log).}
    \item{family_part2}{a description of the error distribution and link function to be used in the second-part model,  i.e.,  the glm model for non-zero responses. This can be a character string naming a family function, a family function or the result of a call to a family function.}
    \item{weights}{an optional numeric vector of weights to be used in the fitting process for both parts. Should be NULL or a numeric vector.}
    \item{object}{a fitted two-part model object of class \code{twopartm} as returned by \code{tpm}.}
    \item{\dots}{arguments passed to \code{\link{glm}} or \code{\link{summary.glm}} in the default setup.}
}

\details{
  Two-part models are two-component models for zero-inflated data, one modeling indicators
  about any non-zero responses and another modeling non-zero response values. It models
  the zeros and non-zeros as two separate processes.  For instance, in explaining individual
  annual health expenditure, the event is represented by a specific disease. If the illness
  occurs, then some not-for-free treatment will be needed, and a positive expense will be
  observed. In these situations, a two-part model allows the censoring mechanism and the
  outcome to be modeled to use separate processes. In other words, it permits the zeros and
  nonzeros to be generated by different densities as a special type of mixture model.


  In function \code{tpm}, the zeros are handled using the first-model, specifically a glm with binomial family
  and specified link function for the probability of a non-zero outcome. The second-model is a
  glm with specified family function with link for non-zero values. The regressors for both parts
  could be different and specified separately. The two components of the model are estimated separately
  using \code{glm} calls, with iterated reweighted least-squares (IRLS) optimization.


  The returned fitted model object is of class \code{twopartm}.A set of standard extractor functions
  for fitted model objects is available for objects of class \code{twopartm}, including methods to
  the generic functions \code{print}, \code{summary}, \code{plot}, \code{coef},
  \code{logLik}, \code{residuals}, and \code{predict}.See
  \code{\link[twopartm]{predict-methods}} for more details on prediction method.


  The \code{summary} method lists result summaries of two fitted glm models for each part respectively.
}

\value{
\code{tpm} returns an object of class \code{twopartm}.

\code{summary} returns a list with two objects of class \code{summary.glm} for first-part model and second-part model respectively.
}

\seealso{\code{\link{twopartm-class}}, \code{\link{glm}}, \code{\link{summary.glm}}, \code{\link[twopartm]{predict-methods}}
}

\references{
Belotti, F., Deb, P., Manning, W.G. and Norton, E.C. (2015). twopm: Two-part models.
\emph{The Stata Journal}, 15(1), pp.3-20.

Hay, J. W., and R. J. Olsen. (1984). Let them eat cake: A note on comparing alternative
models of the demand for medical care. \emph{Journal of Business and Economic Statistics}
2: 279–282.

Leung, S. F., and S. Yu. (1996). On the choice between sample selection and two-part
models. \emph{Journal of Econometrics} 72: 197–229

Mihaylova, B., A. Briggs, A. O’Hagan, and S. G. Thompson. (2011). Review of statistical
methods for analyzing healthcare resources and costs. \emph{Health Economics} 20: 897–916.
}

\author{Yajie Duan, Birol Emir, Griffith Bell and Javier Cabrera}

\examples{

##data about health expenditures, i.e., non-negative continuous response
data(meps,package = "twopartm")


##fit two-part model with the same regressors in both parts, with logistic
##regression model for the first part, and glm with Gamma family with log
##link for the second-part model
tpmodel = tpm(exp_tot~female+age, data = meps,link_part1 = "logit",
family_part2 = Gamma(link = "log"))

tpmodel

summary(tpmodel)


##fit two-part model with different regressors in both parts, with probit
##regression model for the first part, and glm with Gamma family with log
##link for the second-part model
tpmodel = tpm(formula_part1 = exp_tot~female+age, formula_part2 =
exp_tot~female+age+ed_colplus,data = meps,link_part1 = "probit",
family_part2 = Gamma(link = "log"))

tpmodel

summary(tpmodel)

##fit two-part model with transformed regressors and randomly assigned weights
meps$weights = sample(1:30,nrow(meps),replace = TRUE)

tpmodel = tpm(formula_part1 = exp_tot~female+age, formula_part2 =
exp_tot~female+I(age^2)+ed_colplus,data = meps,link_part1 = "logit",
family_part2 = Gamma(link = "log"),weights = meps$weights)

tpmodel

summary(tpmodel)


##data for count response
data("bioChemists")

##fit two-part model with the same regressors in both parts, with logistic
##regression model for the first part, and poisson regression model with
##default log link for the second-part model
tpmodel = tpm(art ~ .,data = bioChemists,link_part1 = "logit",
family_part2 = poisson)

tpmodel

summary(tpmodel)


}

\keyword{models}
\keyword{regression}
