% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/binsregselect.R
\name{binsregselect}
\alias{binsregselect}
\title{Data-driven IMSE-Optimal Partitioning/Binning Selection for Binscatter}
\usage{
binsregselect(y, x, w = NULL, deriv = 0, bins = c(0, 0),
  binspos = "qs", binsmethod = "dpi", nbinsrot = NULL,
  simsgrid = 20, savegrid = F, vce = "HC1", useeffn = NULL,
  cluster = NULL, dfcheck = c(20, 30), masspoints = "on",
  weights = NULL, subset = NULL, norotnorm = F, numdist = NULL,
  numclust = NULL)
}
\arguments{
\item{y}{outcome variable. A vector.}

\item{x}{independent variable of interest. A vector.}

\item{w}{control variables. A matrix or a vector.}

\item{deriv}{derivative order of the regression function for estimation, testing and plotting.
The default is \code{deriv=0}, which corresponds to the function itself.}

\item{bins}{a vector. \code{bins=c(p,s)} set a piecewise polynomial of degree \code{p} with \code{s} smoothness constraints
for data-driven (IMSE-optimal) selection of the partitioning/binning scheme. The default is
\code{bins=c(0, 0)}, which corresponds to piecewise constant (canonical binscatter).}

\item{binspos}{position of binning knots.  The default is \code{binspos="qs"}, which corresponds to quantile-spaced
binning (canonical binscatter).  The other options is \code{"es"} for evenly-spaced binning.}

\item{binsmethod}{method for data-driven selection of the number of bins. The default is \code{binsmethod="dpi"},
which corresponds to the IMSE-optimal direct plug-in rule.  The other option is: \code{"rot"}
for rule of thumb implementation.}

\item{nbinsrot}{initial number of bins value used to construct the DPI number of bins selector.
If not specified, the data-driven ROT selector is used instead.}

\item{simsgrid}{number of evaluation points of an evenly-spaced grid within each bin used for evaluation of
the supremum (or infimum) operation needed to construct confidence bands and hypothesis testing
procedures. The default is \code{simsgrid=20}, which corresponds to 20 evenly-spaced
evaluation points within each bin for approximating the supremum (or infimum) operator.}

\item{savegrid}{If true, a data frame produced containing grid.}

\item{vce}{procedure to compute the variance-covariance matrix estimator. Options are
\itemize{
\item \code{"const"} homoskedastic variance estimator.
\item \code{"HC0"} heteroskedasticity-robust plug-in residuals variance estimator
                   without weights.
\item \code{"HC1"} heteroskedasticity-robust plug-in residuals variance estimator
                   with hc1 weights. Default.
\item \code{"HC2"} heteroskedasticity-robust plug-in residuals variance estimator
                   with hc2 weights.
\item \code{"HC3"} heteroskedasticity-robust plug-in residuals variance estimator
                   with hc3 weights.
}}

\item{useeffn}{effective sample size to be used when computing the (IMSE-optimal) number of bins. This option
is useful for extrapolating the optimal number of bins to larger (or smaller) datasets than
the one used to compute it.}

\item{cluster}{cluster ID. Used for compute cluster-robust standard errors.}

\item{dfcheck}{adjustments for minimum effective sample size checks, which take into account number of unique
values of \code{x} (i.e., number of mass points), number of clusters, and degrees of freedom of
the different stat models considered. The default is \code{dfcheck=c(20, 30)}.
See \href{https://arxiv.org/abs/1902.09615}{Cattaneo, Crump, Farrell and Feng (2019b)} for more details.}

\item{masspoints}{how mass points in \code{x} are handled. Available options:
\itemize{
\item \code{"on"} all mass point and degrees of freedom checks are implemented. Default.
\item \code{"noadjust"} mass point checks and the corresponding effective sample size adjustments are omitted.
\item \code{"nolocalcheck"} within-bin mass point and degrees of freedom checks are omitted.
\item \code{"off"} "noadjust" and "nolocalcheck" are set simultaneously.
\item \code{"veryfew"} forces the function to proceed as if \code{x} has only a few number of mass points (i.e., distinct values).
                       In other words, forces the function to proceed as if the mass point and degrees of freedom checks were failed.
}}

\item{weights}{an optional vector of weights to be used in the fitting process. Should be \code{NULL} or
a numeric vector. For more details, see \code{\link{lm}}.}

\item{subset}{optional rule specifying a subset of observations to be used.}

\item{norotnorm}{if true, a uniform density rather than normal density used for ROT selection.}

\item{numdist}{number of distinct for selection. Used to speed up computation.}

\item{numclust}{number of clusters for selection. Used to speed up computation.}
}
\value{
\item{\code{nbinsrot.poly}}{ROT number of bins, unregularized.}
       \item{\code{nbinsrot.regul}}{ROT number of bins, regularized.}
       \item{\code{nbinsrot.uknot}}{ROT number of bins, unique knots.}
       \item{\code{nbinsdpi}}{DPI number of bins.}
       \item{\code{nbinsdpi.uknot}}{DPI number of bins, unique knots.}
       \item{\code{opt}}{ A list containing options passed to the function, as well as total sample size \code{n},
                          number of distinct values \code{Ndist} in \code{x}, and number of clusters \code{Nclust}.}
       \item{\code{data.grid}}{A data frame containing grid.}
}
\description{
\code{binsregselect} implements data-driven procedures for selecting the number of bins for binscatter
            estimation. The selected number is optimal in minimizing integrated mean squared error (IMSE).
}
\examples{
 x <- runif(500); y <- sin(x)+rnorm(500)
 est <- binsregselect(y,x)
 summary(est)
}
\references{
Cattaneo, M. D., R. K. Crump, M. H. Farrell, and Y. Feng. 2019a: \href{https://arxiv.org/abs/1902.09608}{On Binscatter}. Working Paper.

Cattaneo, M. D., R. K. Crump, M. H. Farrell, and Y. Feng. 2019b: \href{https://arxiv.org/abs/1902.09615}{Binscatter Regressions}. Working Paper.
}
\seealso{
\code{\link{binsreg}}, \code{\link{binsregtest}}.
}
\author{
Matias D. Cattaneo, University of Michigan, Ann Arbor, MI. \email{cattaneo@umich.edu}.

Richard K. Crump, Federal Reserve Bank of New York, New York, NY. \email{richard.crump@ny.frb.org}.

Max H. Farrell, University of Chicago, Chicago, IL. \email{max.farrell@chicagobooth.edu}.

Yingjie Feng (maintainer), University of Michigan, Ann Arbor, MI. \email{yjfeng@umich.edu}.
}
