% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/edsurveyTable.R
\name{edsurveyTable}
\alias{edsurveyTable}
\title{EdSurvey Tables With Conditional Means}
\usage{
edsurveyTable(formula, data, weightVar = NULL, jrrIMax = 1,
  pctAggregationLevel = NULL, returnMeans = TRUE, returnSepct = TRUE,
  varMethod = c("jackknife", "Taylor"), drop = FALSE,
  omittedLevels = TRUE, defaultConditions = TRUE, recode = NULL,
  returnVarEstInputs = FALSE)
}
\arguments{
\item{formula}{object of class \ifelse{latex}{\code{formula}}{\code{\link[stats]{formula}}},
potentially with
a subject scale or subscale
on the left-hand side and
variables to tabulate
on the right-hand side.
When the left-hand side of the
formula is omitted and \code{returnMeans} is \code{TRUE},
then the default subject scale or subscale is used.
You can find the default composite scale and all subscales
using the function \code{\link{showPlausibleValues}}.
Note that the order of the right-hand side variables affects the output.}

\item{data}{object of class \code{edsurvey.data.frame}. See \code{\link{readNAEP}}
for how to generate an \code{edsurvey.data.frame}.}

\item{weightVar}{character string indicating the weight variable to use.
Note that only the name of the
weight variable needs to be included here, and any
replicate weights will be automatically included.
When this argument is \code{NULL}, the function uses the default.
Use \code{\link{showWeights}} to find the default.}

\item{jrrIMax}{a numeric value; when using the jackknife variance estimation method, the default estimation option, \code{jrrIMax=1}, uses the 
  sampling variance from the first plausible value as the component for sampling variance estimation. The \eqn{V_{jrr}} 
  term (see the Details section of
\code{\link{lm.sdf}} to see the definition of \eqn{V_{jrr}}) can be estimated with any number of plausible values, and values larger than the number of 
  plausible values on the survey (including \code{Inf}) will result in all of the plausible values being used. 
  Higher values of \code{jrrIMax} lead to longer computing times and more accurate variance estimates.}

\item{pctAggregationLevel}{the percentage variable sums up to 100 for the first
\code{pctAggregationLevel} columns.
So, when set to \code{0}, the \code{PCT} column adds up to 1
across the entire sample.
When set to \code{1}, the \code{PCT} column adds up to 1
within each level of the first variable on the
right-hand side of the formula; when set to \code{2},
then the percentage
adds up to 100 within the interaction of the
first and second variable, and so on.
Default is \code{NULL}, which will result in the
lowest feasible aggregation level. 
See Examples section.}

\item{returnMeans}{a logical value; set to \code{TRUE} (the default) to get the \code{MEAN} and
\code{SE(MEAN)} columns in the returned table described in the Value section.}

\item{returnSepct}{set to \code{TRUE} (the default) to get the \code{SEPCT} column in the returned table described in the Value section.}

\item{varMethod}{a character set to \dQuote{jackknife} or \dQuote{Taylor} that indicates the variance estimation method
to be used.}

\item{drop}{a logical value. When set to the default value of \code{FALSE}, when a single column is returned, it is still represented as a \code{data.frame} and is
not converted to a vector.}

\item{omittedLevels}{a logical value. When set to the default value of \code{TRUE}, drops those levels of all factor variables that are specified
in an \code{edsurvey.data.frame}. Use \code{print} on an \code{edsurvey.data.frame} to see the omitted levels.}

\item{defaultConditions}{a logical value. When set to the default value of \code{TRUE}, uses the default conditions stored in an \code{edsurvey.data.frame}
to subset the data. Use \code{print} on an \code{edsurvey.data.frame} to see the default conditions.}

\item{recode}{a list of lists to recode variables. Defaults to \code{NULL}. Can be set as
\code{recode} \code{=} \code{list(var1} \code{=} \code{list(from} \code{=} \code{c("a", "b", "c"),} \code{to} \code{=} \code{"c"))}. See Examples.}

\item{returnVarEstInputs}{a logical value set to \code{TRUE} to return the
inputs to the jackknife and imputation variance
estimates. This is intended to allow for
the computation
of covariances between estimates.}
}
\value{
A table with the following columns:
   \item{RHS levels}{one column for each right-hand side variable. Each row
                     regards students who are at the levels shown in that row.}
   \item{\code{N}}{count of the number of students in the survey in the \code{RHS levels}}
   \item{\code{WTD_N}}{the weighted \emph{N} count of students in the survey in \code{RHS levels}}
   \item{\code{PCT}}{the percentage of students at the aggregation level specified by \code{pctAggregationLevel} (see Arguments).
                     See the vignette titled
        \href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{Statistics}
            in the section
\dQuote{Estimation of Weighted Percentages} and its first subsection
\dQuote{Estimation of Weighted Percentages When Plausible Values Are Not Present.}}
   \item{\code{SE(PCT)}}{the standard  error of the percentage, accounting
                         for the survey sampling methodology. When \code{varMethod}
                         is \code{jackknife}, the calculation of this column is
                         described in the vignette titled
\href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{Statistics}
 in the section
\dQuote{Estimation of the Standard Error of Weighted Percentages When Plausible Values Are Not Present, Using the Jackknife Method.}
                      When \code{varMethod} is set to \code{Taylor}, the calculation of this column is described in
\dQuote{Estimation of the Standard Error of Weighted Percentages When Plausible Values Are Not Present, Using the Taylor Series Method.}
}
   \item{\code{MEAN}}{the mean assessment score for units in the \code{RHS levels}, calculated according to the  vignette titled
\href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{Statistics}
in the section
\dQuote{Estimation of Weighted Means When Plausible Values Are Present.}}
   \item{\code{SE(MEAN)}}{the standard error of the \code{MEAN} column (the mean assessment score for units in the \code{RHS levels}), calculated according to the vignette titled
\href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{Statistics}                      
in the sections
\dQuote{Estimation of Standard Errors of Weighted Means When Plausible Values Are Present, Using the Jackknife Method}
or 
\dQuote{Estimation of Standard Errors of Weighted Means When Plausible Values Are Present, Using the Taylor Series Method,}
depending on the value of \code{varMethod}.}

 When \code{returnVarEstInputs} is \code{TRUE}, two additional elements are
 returned. These are \code{meanVarEstInputs} and \code{pctVarEstInputs} and
 regard the \code{MEAN} and \code{PCT} columns, respectively. These two 
 objects can be used for calculating covariances with
 \code{\link{varEstToCov}}.
}
\description{
Returns a summary table (as a \ifelse{latex}{\code{data.frame}}{\code{\link[base]{data.frame}}})
that shows the number of students, the percentage of students, and the mean
value of the outcome (or left-hand side) variable by the
predictor (or right-hand side) variable(s).
}
\details{
This method can be used to generate a simple one-way, two-way, or
\emph{n}-way
table with unweighted and weighted \emph{n} values and percentages. It also
can calculate the average of the subject scale or subscale for students at
each level of the cross-tabulation table. 
      
A detailed description of all statistics is given in the vignette titled
\href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{Statistics}.
}
\examples{
\dontrun{
# read in the example data (generated, not real student data)

sdf <- readNAEP(system.file("extdata/data", "M36NT2PM.dat", package = "NAEPprimer"))

# create a table that shows only the breakdown of dsex
edsurveyTable(composite ~ dsex, data=sdf, returnMeans=FALSE, returnSepct=FALSE)

# create a table with composite scores by dsex
edsurveyTable(composite ~ dsex, data=sdf)

# add a second variable
edsurveyTable(composite ~ dsex + b017451, data=sdf)

# add a second variable, do not omit any levels
edsurveyTable(composite ~ dsex + b017451 + b003501, data=sdf, omittedLevels=FALSE)

# add a second variable, do not omit any levels, change aggregation level
edsurveyTable(composite ~ dsex + b017451 + b003501, data=sdf, omittedLevels=FALSE,
	            pctAggregationLevel=0)

edsurveyTable(composite ~ dsex + b017451 + b003501, data=sdf, omittedLevels=FALSE,
	            pctAggregationLevel=1)

edsurveyTable(composite ~ dsex + b017451 + b003501, data=sdf, omittedLevels=FALSE,
	            pctAggregationLevel=2)

# variance estimation using the Taylor series 
edsurveyTable(composite ~ dsex + b017451 + b003501, data=sdf, varMethod="Taylor")
}
}
\references{
Binder, D. A. (1983). On the variances of asymptotically normal estimators from complex surveys. \emph{International Statistical Review}, \emph{51}(3), 279--292. 

Rubin, D. B. (1987). \emph{Multiple imputation for nonresponse in surveys}. New York, NY: Wiley.
}
\author{
Paul Bailey and Ahmad Emad
}
