% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rdif.R
\name{rdif}
\alias{rdif}
\alias{rdif.default}
\alias{rdif.est_irt}
\alias{rdif.est_item}
\title{IRT residual-based differential item functioning (RDIF) detection framework}
\usage{
rdif(x, ...)

\method{rdif}{default}(
  x,
  data,
  score = NULL,
  group,
  focal.name,
  D = 1,
  alpha = 0.05,
  missing = NA,
  purify = FALSE,
  purify.by = c("rdifrs", "rdifr", "rdifs"),
  max.iter = 10,
  min.resp = NULL,
  method = "ML",
  range = c(-5, 5),
  norm.prior = c(0, 1),
  nquad = 41,
  weights = NULL,
  ncore = 1,
  verbose = TRUE,
  ...
)

\method{rdif}{est_irt}(
  x,
  score = NULL,
  group,
  focal.name,
  alpha = 0.05,
  missing = NA,
  purify = FALSE,
  purify.by = c("rdifrs", "rdifr", "rdifs"),
  max.iter = 10,
  min.resp = NULL,
  method = "ML",
  range = c(-5, 5),
  norm.prior = c(0, 1),
  nquad = 41,
  weights = NULL,
  ncore = 1,
  verbose = TRUE,
  ...
)

\method{rdif}{est_item}(
  x,
  group,
  focal.name,
  alpha = 0.05,
  missing = NA,
  purify = FALSE,
  purify.by = c("rdifrs", "rdifr", "rdifs"),
  max.iter = 10,
  min.resp = NULL,
  method = "ML",
  range = c(-5, 5),
  norm.prior = c(0, 1),
  nquad = 41,
  weights = NULL,
  ncore = 1,
  verbose = TRUE,
  ...
)
}
\arguments{
\item{x}{A data frame containing the item metadata (e.g., item parameters, number of categories, models ...), an object of class \code{\link{est_item}}
obtained from the function \code{\link{est_item}}, or an object of class \code{\link{est_irt}} obtained from the function \code{\link{est_irt}}.
The data frame of item metadata can be easily obtained using the function \code{\link{shape_df}}. See \code{\link{est_irt}}, \code{\link{irtfit}},
\code{\link{info}} or \code{\link{simdat}} for more details about the item metadata.}

\item{...}{additional arguments for further updates.}

\item{data}{A matrix containing examinees' response data for the items in the argument \code{x}. A row and column indicate
the examinees and items, respectively.}

\item{score}{A vector of examinees' ability estimates. If the abilities are not provided, \code{\link{rdif}} function estimates the abilities before
computing RDIF statistics. See \code{\link{est_score}} for more details about scoring methods. Default is NULL.}

\item{group}{A numeric or character vector indicating group membership of examinees. The length of the vector should the same with the number of rows
in the response data matrix.}

\item{focal.name}{A single numeric or character indicating the level of group which corresponds to the focal group.
For example, if \code{group = c(0, 1, 0, 1, 1)} and '1' indicates the focal group, then \code{focal.name = 1}.}

\item{D}{A scaling factor in IRT models to make the logistic function as close as possible to the normal ogive function (if set to 1.7).
Default is 1.}

\item{alpha}{A numeric value to specify significance \eqn{\alpha}-level of the hypothesis test using the RDIF fit statistics.
Default is .05.}

\item{missing}{A value indicating missing values in the response data set. Default is NA.}

\item{purify}{A logical value indicating whether a purification process will be implemented or not. Default is FALSE.}

\item{purify.by}{A character string specifying a RDIF statistic with which the purification is implemented. Available statistics
are "rdifrs" for \eqn{RDIF_{RS}}, "rdifr" for \eqn{RDIF_{R}}, and "rdifs" for \eqn{RDIF_{S}}.}

\item{max.iter}{An positive integer value specifying the maximum number of iterations for the purification process. Default is 10.}

\item{min.resp}{An positive integer value specifying the minimum number of item responses for an examinee when scores are computed.
Default is NULL. See details below for more information.}

\item{method}{A character string indicating a scoring method. Available methods are "ML" for the maximum likelihood estimation,
"MAP" for the maximum a posteriori estimation, and "EAP" for the expected a posteriori estimation. Default method is "ML".}

\item{range}{A numeric vector of two components to restrict the range of ability scale for the ML, EAP, and MAP scoring methods. Default is c(-5, 5).}

\item{norm.prior}{A numeric vector of two components specifying a mean and standard deviation of the normal prior distribution.
These two parameters are used to obtain the gaussian quadrature points and the corresponding weights from the normal distribution. Default is
c(0,1). Ignored if \code{method} is "ML".}

\item{nquad}{An integer value specifying the number of gaussian quadrature points from the normal prior distribution. Default is 41.
Ignored if \code{method} is "ML" or "MAP".}

\item{weights}{A two-column matrix or data frame containing the quadrature points (in the first column) and the corresponding weights
(in the second column) of the latent variable prior distribution. The weights and quadrature points can be easily obtained
using the function \code{\link{gen.weight}}. If NULL and \code{method} is "EAP", default values are used (see the arguments
of \code{norm.prior} and \code{nquad}). Ignored if \code{method} is "ML" or "MAP".}

\item{ncore}{The number of logical CPU cores to use. Default is 1. See \code{\link{est_score}} for details.}

\item{verbose}{A logical value. If TRUE, the progress messages of purification procedure are suppressed. Default is TRUE.}
}
\value{
This function returns a list of four internal objects. The four objects are:
\item{no_purify}{A list of several sub-objects containing the results of DIF analysis without a purification procedure. The sub-objects are:
    \describe{
      \item{dif_stat}{A data frame containing the results of three RDIF statistics across all evaluated items. From the first column, each column
       indicates item's ID, \eqn{RDIF_{R}} statistic, standardized \eqn{RDIF_{R}}, \eqn{RDIF_{S}} statistic, standardized \eqn{RDIF_{S}},
       \eqn{RDIF_{RS}} statistic, p-value of the \eqn{RDIF_{R}}, p-value of the \eqn{RDIF_{S}}, p-value of the \eqn{RDIF_{RS}}, sample size of
       the reference group, sample size of the focal group, and total sample size, respectively. Note that \eqn{RDIF_{RS}} does not have its standardized
       value because it is a \eqn{\chi^{2}} statistic.}
      \item{moments}{A data frame containing the moments of three RDIF statistics. From the first column, each column indicates item's ID,
       mean of \eqn{RDIF_{R}}, standard deviation of \eqn{RDIF_{R}}, mean of \eqn{RDIF_{S}}, standard deviation of \eqn{RDIF_{S}}, and
       covariance of \eqn{RDIF_{R}} and \eqn{RDIF_{S}}, respectively.}
      \item{dif_item}{A list of three numeric vectors showing potential DIF items flagged by each of the RDIF statistics. Each of the numeric vector
       means the items flagged by \eqn{RDIF_{R}}, \eqn{RDIF_{S}}, and \eqn{RDIF_{RS}}, respectively.}
      \item{score}{A vector of ability estimates used to compute the RDIF statistics.}
   }
}
\item{purify}{A logical value indicating whether the purification process was used.}
\item{with_purify}{A list of several sub-objects containing the results of DIF analysis with a purification procedure. The sub-objects are:
    \describe{
      \item{purify.by}{A character string indicating which RDIF statistic is used for the purification. "rdifr", "rdifs", and "rdifrs" refers to
       \eqn{RDIF_{R}}, \eqn{RDIF_{S}}, and \eqn{RDIF_{RS}}, respectively.}
      \item{dif_stat}{A data frame containing the results of three RDIF statistics across all evaluated items. From the first column, each column
       indicates item's ID, \eqn{RDIF_{R}} statistic, standardized \eqn{RDIF_{R}}, \eqn{RDIF_{S}} statistic, standardized \eqn{RDIF_{S}},
       \eqn{RDIF_{RS}} statistic, p-value of the \eqn{RDIF_{R}}, p-value of the \eqn{RDIF_{S}}, p-value of the \eqn{RDIF_{RS}}, sample size of
       the reference group, sample size of the focal group, total sample size, and \emph{n}th iteration where the RDIF statistics were computed,
       respectively.}
      \item{moments}{A data frame containing the moments of three RDIF statistics. From the first column, each column indicates item's ID,
       mean of \eqn{RDIF_{R}}, standard deviation of \eqn{RDIF_{R}}, mean of \eqn{RDIF_{S}}, standard deviation of \eqn{RDIF_{S}}, covariance
       of \eqn{RDIF_{R}} and \eqn{RDIF_{S}}, and \emph{n}th iteration where the RDIF statistics were computed, respectively.}
      \item{dif_item}{A list of three numeric vectors showing potential DIF items flagged by each of the RDIF statistics. Each of the numeric vector
       means the items flagged by \eqn{RDIF_{R}}, \eqn{RDIF_{S}}, and \eqn{RDIF_{RS}}, respectively.}
      \item{n.iter}{A total number of iterations implemented for the purification.}
      \item{score}{A vector of final purified ability estimates used to compute the RDIF statistics.}
      \item{complete}{A logical value indicating whether the purification process was completed. If FALSE, it means that the purification process
       reached the maximum iteration number but it was not complete.}
    }
}
\item{alpha}{A significance \eqn{\alpha}-level used to compute the p-values of RDIF statistics.}
}
\description{
This function computes three RDIF statistics (Lim, Choe, & Han, 2022; Lim, Choe, Han, Lee, & Hong, 2021),
which are \eqn{RDIF_{R}}, \eqn{RDIF_{S}}, and \eqn{RDIF_{RS}}, for each item. \eqn{RDIF_{R}} primarily
captures the typical contrast in raw residual pattern between two groups caused by uniform DIF whereas
\eqn{RDIF_{S}} primarily captures the typical contrast in squared residual pattern between two groups caused
by nonuniform DIF. \eqn{RDIF_{RS}} can reasonably capture both types of DIF.
}
\details{
The RDIF framework (Lim et al., 2022; Lim et al., 2021) consists of three IRT residual-based statistics: \eqn{RDIF_{R}}, \eqn{RDIF_{S}},
and \eqn{RDIF_{RS}}. Under the null hypothesis that a test contains no DIF items, \eqn{RDIF_{R}} and \eqn{RDIF_{S}} follow
normal distributions asymptotically. \eqn{RDIF_{RS}} is a based on a bivariate normal distribution of \eqn{RDIF_{R}} and
\eqn{RDIF_{S}} statistics. Under the null hypothesis of no DIF items, it follows a \eqn{\chi^{2}} distribution asymptotically
with 2 degrees of freedom. See Lim et al. (2022) for more details about RDIF framework.

The \code{\link{rdif}} function computes all three RDIF statistics of \eqn{RDIF_{R}}, \eqn{RDIF_{S}}, and \eqn{RDIF_{RS}}. The current
version of \code{\link{rdif}} function only supports dichotomous item response data. To compute the three statistics, the \code{\link{rdif}} function
requires (1) item parameter estimates obtained from aggregate data regardless of group membership, (2) examinees' ability estimates
(e.g., ML), and (3) examinees' item response data. Note that the ability estimates need to be computed using the aggregate data-based
item parameter estimates. The item parameter estimates should be provided in the \code{x} argument, the ability estimates should
be provided in the \code{score} argument, and the response data should be provided in the \code{data} argument. When the abilities
are not given in the \code{score} argument (i.e., \code{score = NULL}), the \code{\link{rdif}} function estimates examinees' abilities
automatically using the scoring method specified in the \code{method} argument (e.g., \code{method = "ML"}).

The \code{group} argument accepts a vector of either two distinct numeric or character variables. Between two distinct variable, one is to
represent the reference group and another one is to represent the focal group. The length of the vector should be the same with the number
of rows in the response data and each value in the vector should indicate each examinee of the response data. Once the \code{gruop} is
specified, a single numeric or character value needs to be provided in the \code{focal.name} argument to define which group variable in
the \code{group} argument represents the focal group.

As other DIF detection approaches, an iterative purification process can be implemented for the RDIF framework.
When \code{purify = TRUE}, the purification process is implemented based on one of RDIF statistics specified in the \code{purify.by}
argument (e.g, \code{purify.by="rdifrs"}). At each iterative purification, examinees' latent abilities are computed using purified items and
scoring method specified in the \code{method} argument. The iterative purification process stops when no further DIF items are found or
the process reaches a predetermined limit of iteration, which can be specified in the \code{max.iter} argument. See Lim et al. (2022)
for more details about the purification procedure.

Scoring with a few items entails large standard errors which in turn could compromise DIF detection with RDIF framework.
The \code{min.resp} argument can be used to avoid using scores with large standard errors when computing the RDIF statistics, especially
during the purification process. For example, if \code{min.resp} is not NULL (e.g., \code{min.resp=5}), item responses of examinees
whose tally of item responses are less than the specified minimum number are treated as missing values (i.e., NA). Accordingly,
their ability estimates become missing values and are not used for computing the RDIF statistics. If \code{min.resp=NULL},
an examinee's score will be computed as long as there exists, at least, 1 item response for the examinee.
}
\section{Methods (by class)}{
\itemize{
\item \code{default}: Default method to computes three RDIF statistics using a data frame \code{x} containing the item metadata.

\item \code{est_irt}: An object created by the function \code{\link{est_irt}}.

\item \code{est_item}: An object created by the function \code{\link{est_item}}.
}}

\examples{
\donttest{
# call library
library("dplyr")

## Uniform DIF detection
###############################################
# (1) manipulate true uniform DIF data
###############################################
# import the "-prm.txt" output file from flexMIRT
flex_sam <- system.file("extdata", "flexmirt_sample-prm.txt", package = "irtQ")

# select 36 of 3PLM items which are non-DIF items
par_nstd <-
  bring.flexmirt(file=flex_sam, "par")$Group1$full_df \%>\%
  dplyr::filter(.data$model == "3PLM") \%>\%
  dplyr::filter(dplyr::row_number() \%in\% 1:36) \%>\%
  dplyr::select(1:6)
par_nstd$id <- paste0("nondif", 1:36)

# generate four new items to inject uniform DIF
difpar_ref <-
  shape_df(par.drm=list(a=c(0.8, 1.5, 0.8, 1.5), b=c(0.0, 0.0, -0.5, -0.5), g=0.15),
           item.id=paste0("dif", 1:4), cats=2, model="3PLM")

# manipulate uniform DIF on the four new items by adding constants to b-parameters
# for the focal group
difpar_foc <-
  difpar_ref \%>\%
  dplyr::mutate_at(.vars="par.2", .funs=function(x) x + rep(0.7, 4))

# combine the 4 DIF and 36 non-DIF items for both reference and focal groups
# thus, the first four items have uniform DIF
par_ref <- rbind(difpar_ref, par_nstd)
par_foc <- rbind(difpar_foc, par_nstd)

# generate the true thetas
set.seed(123)
theta_ref <- rnorm(500, 0.0, 1.0)
theta_foc <- rnorm(500, 0.0, 1.0)

# generate the response data
resp_ref <- simdat(par_ref, theta=theta_ref, D=1)
resp_foc <- simdat(par_foc, theta=theta_foc, D=1)
data <- rbind(resp_ref, resp_foc)

###############################################
# (2) estimate the item and ability parameters
#     using the aggregate data
###############################################
# estimate the item parameters
est_mod <- est_irt(data=data, D=1, model="3PLM")
est_par <- est_mod$par.est

# estimate the ability parameters using ML
score <- est_score(x=est_par, data=data, method="ML")$est.theta

###############################################
# (3) conduct DIF analysis
###############################################
# create a vector of group membership indicators
# where '1' indicates the focal group
group <- c(rep(0, 500), rep(1, 500))

# (a)-1 compute RDIF statistics by providing scores,
#       and without a purification
dif_nopuri_1 <- rdif(x=est_par, data=data, score=score,
                     group=group, focal.name=1, D=1, alpha=0.05)
print(dif_nopuri_1)

# (a)-2 compute RDIF statistics by not providing scores
#       and without a purification
dif_nopuri_2 <- rdif(x=est_par, data=data, score=NULL,
                     group=group, focal.name=1, D=1, alpha=0.05,
                     method="ML")
print(dif_nopuri_2)

# (b)-1 compute RDIF statistics with a purification
#       based on RDIF(R)
dif_puri_r <- rdif(x=est_par, data=data, score=score,
                   group=group, focal.name=1, D=1, alpha=0.05,
                   purify=TRUE, purify.by="rdifr")
print(dif_puri_r)

# (b)-2 compute RDIF statistics with a purification
#       based on RDIF(S)
dif_puri_s <- rdif(x=est_par, data=data, score=score,
                   group=group, focal.name=1, D=1, alpha=0.05,
                   purify=TRUE, purify.by="rdifs")
print(dif_puri_s)

# (b)-3 compute RDIF statistics with a purification
#       based on RDIF(RS)
dif_puri_rs <- rdif(x=est_par, data=data, score=score,
                    group=group, focal.name=1, D=1, alpha=0.05,
                    purify=TRUE, purify.by="rdifrs")
print(dif_puri_rs)
}

}
\references{
Lim, H., Choe, E. M., & Han, K. T. (2022). A residual-based differential item functioning detection framework in
item response theory. \emph{Journal of Educational Measurement, 59}(1), 80-104. \doi{doi.org/10.1111/jedm.12313}.

Lim, H., Choe, E. M., Han, K. T., Lee, S., & Hong, M. (2021, June). \emph{IRT residual approach
to detecting DIF.} Paper presented at the Annual Meeting of the National Council on Measurement
in Education. Online.
}
\seealso{
\code{\link{est_item}}, \code{\link{info}}, \code{\link{simdat}}, \code{\link{shape_df}},
\code{\link{gen.weight}}, \code{\link{est_score}}
}
\author{
Hwanggyu Lim \email{hglim83@gmail.com}
}
