% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/peer.gen.R, R/peer.lin.R, R/peer.q.R
\name{genpeer}
\alias{genpeer}
\alias{linpeer}
\alias{qpeer}
\title{Estimating Peer Effects Models}
\usage{
genpeer(
  formula,
  excluded.instruments,
  endogenous.variables,
  Glist,
  data,
  estimator = "IV",
  structural = FALSE,
  drop = NULL,
  fixed.effects = FALSE,
  HAC = "iid",
  checkrank = FALSE,
  compute.cov = TRUE,
  tol = 1e-10
)

linpeer(
  formula,
  excluded.instruments,
  Glist,
  data,
  estimator = "IV",
  structural = FALSE,
  drop = NULL,
  fixed.effects = FALSE,
  HAC = "iid",
  checkrank = FALSE,
  compute.cov = TRUE,
  tol = 1e-10
)

qpeer(
  formula,
  excluded.instruments,
  Glist,
  tau,
  type = 7,
  data,
  estimator = "IV",
  structural = FALSE,
  fixed.effects = FALSE,
  HAC = "iid",
  checkrank = FALSE,
  drop = NULL,
  compute.cov = TRUE,
  tol = 1e-10
)
}
\arguments{
\item{formula}{An object of class \link[stats]{formula}: a symbolic description of the model. \code{formula} should be specified as \code{y ~ x1 + x2},
where \code{y} is the outcome and \code{x1} and \code{x2} are control variables, which can include contextual variables such as averages or quantiles among peers.}

\item{excluded.instruments}{An object of class \link[stats]{formula} to indicate excluded instruments. It should be specified as \code{~ z1 + z2},
where \code{z1} and \code{z2} are excluded instruments for the quantile peer outcomes.}

\item{endogenous.variables}{An object of class \link[stats]{formula} that allows specifying endogenous variables. It is used to indicate the peer variables whose effects will be estimated. These can include average peer variables, quantile peer variables,
or a combination of multiple variables. It should be specified as \code{~ y1 + y2}, where \code{y1} and \code{y2} are the endogenous peer variables.}

\item{Glist}{The adjacency matrix. For networks consisting of multiple subnets (e.g., schools), \code{Glist} must be a list of subnets, with the \code{m}-th element being an \eqn{n_m \times n_m} adjacency matrix, where \eqn{n_m} is the number of nodes in the \code{m}-th subnet.}

\item{data}{An optional data frame, list, or environment (or an object that can be coerced by \link[base]{as.data.frame} to a data frame) containing the variables
in the model. If not found in \code{data}, the variables are taken from \code{environment(formula)}, typically the environment from which \code{qpeer} is called.}

\item{estimator}{A character string specifying the estimator to be used. The available options are:
\code{"IV"} for the standard instrumental variable estimator,
\code{"gmm.identity"} for the GMM estimator with the identity matrix as the weight,
\code{"gmm.optimal"} for the GMM estimator with the optimal weight matrix,
\code{"JIVE"} for the Jackknife instrumental variable estimator, and
\code{"JIVE2"} for the Type 2 Jackknife instrumental variable estimator.}

\item{structural}{A logical value indicating whether the reduced-form or structural specification should be estimated (see Details).}

\item{drop}{A dummy vector of the same length as the sample, indicating whether an observation should be dropped.
This can be used, for example, to remove false isolates or to estimate the model only on non-isolated agents.
These observations cannot be directly removed from the network by the user because they may still be friends with other agents.}

\item{fixed.effects}{A logical value or string specifying whether the model includes subnet fixed effects. The fixed effects may differ between isolated and non-isolated nodes. Accepted values are \code{"no"} or \code{"FALSE"} (indicating no fixed effects),
\code{"join"} or \code{TRUE} (indicating the same fixed effects for isolated and non-isolated nodes within each subnet), and \code{"separate"} (indicating different fixed effects for isolated and non-isolated nodes within each subnet). Note that \code{"join"} fixed effects are not applicable for structural models;
\code{"join"} and \code{TRUE} are automatically converted to \code{"separate"}.}

\item{HAC}{A character string specifying the correlation structure among the idiosyncratic error terms for covariance computation. Options are \code{"iid"} for independent errors, \code{"hetero"} for heteroskedastic non-autocorrelated errors, and \code{"cluster"} for heteroskedastic errors with potential within-subnet correlation.}

\item{checkrank}{A logical value indicating whether the instrument matrix should be checked for full rank. If the matrix is not of full rank, unimportant columns will be removed to obtain a full-rank matrix.}

\item{compute.cov}{A logical value indicating whether the covariance matrix of the estimator should be computed.}

\item{tol}{A tolerance value used in the QR factorization to identify columns of explanatory variable and instrument matrices that ensure a full-rank matrix (see the \link[base]{qr} function).}

\item{tau}{A numeric vector specifying the quantile levels.}

\item{type}{An integer between 1 and 9 selecting one of the nine quantile algorithms used to compute peer quantiles (see the \link[stats]{quantile} function).}
}
\value{
A list containing:
\item{model.info}{A list with information about the model, such as the number of subnets, number of observations, and other key details.}
\item{gmm}{A list of GMM estimation results, including parameter estimates, the covariance matrix, and related statistics.}
\item{data}{A list containing the outcome, outcome quantiles among peers, control variables, and excluded instruments used in the model.}
}
\description{
\code{qpeer} estimates the quantile peer effect models introduced by Houndetoungan (2025). In the \code{\link{linpeer}} function, quantile peer variables are replaced with the average peer variable, and they can be replaced with other peer variables in the \code{\link{genpeer}} function.
}
\details{
Let \eqn{\mathcal{N}} be a set of \eqn{n} agents indexed by the integer \eqn{i \in [1, n]}.
Agents are connected through a network that is characterized by an adjacency matrix \eqn{\mathbf{G} = [g_{ij}]} of dimension \eqn{n \times n}, where \eqn{g_{ij} = 1} if agent \eqn{j} is a friend of agent \eqn{i}, and \eqn{g_{ij} = 0} otherwise.
In weighted networks, \eqn{g_{ij}} can be a nonnegative variable (not necessarily binary) that measures the intensity of the outgoing link from \eqn{i} to \eqn{j}. The model can also accommodate such networks. Note that the network is generally constituted in many independent subnets (eg: schools).
The \code{Glist} argument is the list of subnets. In the case of a single subnet, \code{Glist} will be a list containing one matrix.\cr

Let \eqn{\mathcal{T}} be a set of quantile levels. The reduced-form specification of quantile peer effect models is given by:
\deqn{y_i = \sum_{\tau \in \mathcal{T}} \lambda_{\tau} q_{\tau,i}(\mathbf{y}_{-i}) + \mathbf{x}_i^{\prime}\beta + \varepsilon_i,}
where \eqn{\mathbf{y}_{-i} = (y_1, \ldots, y_{i-1}, y_{i+1}, \ldots, y_n)^{\prime}} is the vector of outcomes for other units, and \eqn{q_{\tau,i}(\mathbf{y}_{-i})} is the
sample \eqn{\tau}-quantile of peer outcomes. The term \eqn{\varepsilon_i} is an idiosyncratic error term, \eqn{\lambda_{\tau}} captures the effect of the \eqn{\tau}-quantile of peer outcomes on \eqn{y_i},
and \eqn{\beta} captures the effect of \eqn{\mathbf{x}_i} on \eqn{y_i}. For the definition of the sample \eqn{\tau}-quantile, see Hyndman and Fan (1996).
If the network matrix is weighted, the sample weighted quantile can be used, where the outcome for friend \eqn{j} of \eqn{i} is weighted by \eqn{g_{ij}}. It can be shown that
the sample \eqn{\tau}-quantile is a weighted average of two peer outcomes. For more details, see the \link[stats]{quantile} and \code{\link{qpeer.instruments}} functions. \cr

The quantile \eqn{q_{\tau,i}(\mathbf{y}_{-i})} can be replaced with the average peer variable in \code{\link{linpeer}} or with other measures in \code{\link{genpeer}} through the \code{endogenous.variables} argument.
In \code{\link{genpeer}}, it is possible to specify multiple peer variables, such as male peer averages and female peer averages. Additionally, both quantiles and averages can be included (\code{\link{genpeer}}
is general and encompasses \code{\link{qpeer}} and \code{\link{linpeer}}). See examples. \cr

One issue in linear peer effect models is that individual preferences with conformity and complementarity/substitution lead to the same reduced form.
However, it is possible to disentangle both types of preferences using isolated individuals (individuals without friends).
The structural specification of the model differs between isolated and nonisolated individuals.
For isolated \eqn{i}, the specification is similar to a standard linear-in-means model without social interactions, given by:
\deqn{y_i = \mathbf{x}_i^{\prime}\beta + \varepsilon_i.}
If node \eqn{i} is non-isolated, the specification is given by:
\deqn{y_i = \sum_{\tau \in \mathcal{T}} \lambda_{\tau} q_{\tau,i}(\mathbf{y}_{-i}) + (1 - \lambda_2)(\mathbf{x}_i^{\prime}\beta  + \varepsilon_i),}
where \eqn{\lambda_2} determines whether preferences exhibit conformity or complementarity/substitution. In general, \eqn{\lambda_2 > 0} and this means that that preferences are conformist (anti-conformity may be possible in some models when \eqn{\lambda_2 < 0}).
In contrast, when \eqn{\lambda_2 = 0}, there is complementarity/substitution between individuals depending on the signs of the \eqn{\lambda_{\tau}} parameters.
It is obvious that \eqn{\beta} and \eqn{\lambda_2} can be identified only if the network includes enough isolated individuals.
}
\examples{
\donttest{
set.seed(123)
ngr  <- 50  # Number of subnets
nvec <- rep(30, ngr)  # Size of subnets
n    <- sum(nvec)

### Simulating Data
## Network matrix
G <- lapply(1:ngr, function(z) {
  Gz <- matrix(rbinom(nvec[z]^2, 1, 0.3), nvec[z], nvec[z])
  diag(Gz) <- 0
  # Adding isolated nodes (important for the structural model)
  niso <- sample(0:nvec[z], 1, prob = (nvec[z] + 1):1 / sum((nvec[z] + 1):1))
  if (niso > 0) {
    Gz[sample(1:nvec[z], niso), ] <- 0
  }
  Gz
})

tau <- seq(0, 1, 1/3)
X   <- cbind(rnorm(n), rpois(n, 2))
l   <- c(0.2, 0.15, 0.1, 0.2)
b   <- c(2, -0.5, 1)
eps <- rnorm(n, 0, 0.4)

## Generating `y`
y <- qpeer.sim(formula = ~ X, Glist = G, tau = tau, lambda = l, 
               beta = b, epsilon = eps)$y

### Estimation
## Computing instruments
Z <- qpeer.inst(formula = ~ X, Glist = G, tau = seq(0, 1, 0.1), 
                max.distance = 2, checkrank = TRUE)
Z <- Z$instruments

## Reduced-form model 
rest <- qpeer(formula = y ~ X, excluded.instruments = ~ Z, Glist = G, tau = tau)
summary(rest)
summary(rest, diagnostic = TRUE)  # Summary with diagnostics

## Structural model
sest <- qpeer(formula = y ~ X, excluded.instruments = ~ Z, Glist = G, tau = tau,
              structural = TRUE)
summary(sest, diagnostic = TRUE)
# The lambda^* parameter is y_q (conformity) in the outputs.
# There is no conformity in the data, so the estimate will be approximately 0.

## Structural model with double fixed effects per subnet using optimal GMM 
## and controlling for heteroskedasticity
sesto <- qpeer(formula = y ~ X, excluded.instruments = ~ Z, Glist = G, tau = tau,
               structural = TRUE, fixed.effects = "separate", HAC = "hetero", 
               estimator = "gmm.optimal")
summary(sesto, diagnostic = TRUE)

## Average peer effect model
# Row-normalized network to compute instruments
Gnorm <- lapply(G, function(g) {
  d <- rowSums(g)
  d[d == 0] <- 1
  g / d
})

# GX and GGX
Gall <- Matrix::bdiag(Gnorm)
GX   <- as.matrix(Gall \%*\% X)
GGX  <- as.matrix(Gall \%*\% GX)

# Standard linear model
lpeer <- linpeer(formula = y ~ X + GX, excluded.instruments = ~ GGX, Glist = Gnorm)
summary(lpeer, diagnostic = TRUE)
# Note: The normalized network is used here by definition of the model.
# Contextual effects are also included (this is also possible for the quantile model).

# The standard model can also be structural
lpeers <- linpeer(formula = y ~ X + GX, excluded.instruments = ~ GGX, Glist = Gnorm,
                  structural = TRUE, fixed.effects = "separate")
summary(lpeers, diagnostic = TRUE)

## Estimation using `genpeer`
# Average peer variable computed manually and included as an endogenous variable
Gy     <- as.vector(Gall \%*\% y)
gpeer1 <- genpeer(formula = y ~ X + GX, excluded.instruments = ~ GGX, 
                  endogenous.variables = ~ Gy, Glist = Gnorm, structural = TRUE, 
                  fixed.effects = "separate")
summary(gpeer1, diagnostic = TRUE)

# Using both average peer variables and quantile peer variables as endogenous,
# or only the quantile peer variable
# Quantile peer `y`
qy <- qpeer.inst(formula = y ~ 1, Glist = G, tau = tau)
qy <- qy$qy

# Model estimation
gpeer2 <- genpeer(formula = y ~ X + GX, excluded.instruments = ~ GGX + Z, 
                  endogenous.variables = ~ Gy + qy, Glist = Gnorm, structural = TRUE, 
                  fixed.effects = "separate")
summary(gpeer2, diagnostic = TRUE)}
}
\references{
Houndetoungan, A. (2025). Quantile peer effect models. arXiv preprint arXiv:2405.17290, \doi{10.48550/arXiv.2506.12920}.

Hyndman, R. J., & Fan, Y. (1996). Sample quantiles in statistical packages. The American Statistician, 50(4), 361-365, \doi{10.1080/00031305.1996.10473566}.
}
\seealso{
\code{\link{qpeer.sim}}, \code{\link{qpeer.instruments}}
}
