% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sim_tv.R
\name{sim_tv_DGP}
\alias{sim_tv_DGP}
\title{Simulate a Time-varying Panel With a Latent Group Structure}
\usage{
sim_tv_DGP(
  N = 50,
  n_periods = 40,
  intercept = TRUE,
  p = 1,
  n_groups = 3,
  d = 3,
  dynamic = FALSE,
  group_proportions = NULL,
  error_spec = "iid",
  locations = NULL,
  scales = NULL,
  polynomial_coef = NULL,
  sd_error = 1,
  DGP = lifecycle::deprecated()
)
}
\arguments{
\item{N}{the number of cross-sectional units. Default is 50.}

\item{n_periods}{the number of simulated time periods \eqn{T}. Default is 40.}

\item{intercept}{logical. If \code{TRUE}, a time-varying intercept is generated.}

\item{p}{the number of simulated explanatory variables}

\item{n_groups}{the number of latent groups \eqn{K}. Default is 3.}

\item{d}{the polynomial degree used to construct the time-varying coefficients.}

\item{dynamic}{Logical. If \code{TRUE}, the panel includes one stationary autoregressive lag of \eqn{y_{it}} as a regressor. Default is \code{FALSE}.}

\item{group_proportions}{a numeric vector of length \code{n_groups} indicating the fraction of \eqn{N} each group will contain. If \code{NULL}, all groups are of size \eqn{N / K}. Default is \code{NULL}.}

\item{error_spec}{options include
\describe{
\item{\code{"iid"}}{for \eqn{iid} errors.}
\item{\code{"AR"}}{for an \eqn{AR(1)} error process with an autoregressive coefficient of 0.5.}
}
Default is \code{"iid"}.}

\item{locations}{a \eqn{p \times K} matrix of location parameters of a logistic distribution function used to construct the time-varying coefficients. If left empty, the location parameters are drawn randomly. Default is \code{NULL}.}

\item{scales}{a \eqn{p \times K} matrix of scale parameters of a logistic distribution function used to construct the time-varying coefficients. If left empty, the location parameters are drawn randomly. Default is \code{NULL}.}

\item{polynomial_coef}{a \eqn{p \times d \times K} array of coefficients for a the polynomials used to construct the time-varying coefficients. If left empty, the location parameters are drawn randomly. Default is \code{NULL}.}

\item{sd_error}{standard deviation of the cross-sectional errors. Default is 1.}

\item{DGP}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} the data generating process. Options are
\describe{
\item{1}{generates a trend only.}
\item{2}{simulates a trend and an additional exogenous explanatory variable.}
\item{1}{draws a dynamic panel data model with one \eqn{AR} lag.}
}}
}
\value{
A list holding
\item{\code{alpha}}{a \eqn{T \times p \times K} array of group-specific time-varying parameters}
\item{\code{beta}}{a \eqn{T \times p \times N} array of individual time-varying parameters}
\item{\code{groups}}{a vector indicating the group memberships \eqn{(g_1, \dots, g_N)}, where \eqn{g_i = k} if \eqn{i \in} group \eqn{k}.}
\item{\code{y}}{a \eqn{NT \times 1} vector of the dependent variable, with \eqn{\bold{y}=(y_1, \dots, y_N)^\prime}, \eqn{y_i = (y_{i1}, \dots, y_{iT})^\prime} and the scalar \eqn{y_{it}}.}
\item{\code{X}}{a \eqn{NT \times p} matrix of explanatory variables, with \eqn{\bold{X}=(x_1, \dots, x_N)^\prime}, \eqn{x_i = (x_{i1}, \dots, x_{iT})^\prime} and the \eqn{p \times 1} vector \eqn{x_{it}}.}
\item{\code{data}}{a \eqn{NT \times (p + 1)} data.frame of the outcome and the explanatory variables.}
}
\description{
Construct a time-varying panel data set subject to a latent group structure.
}
\details{
The scalar dependent variable \eqn{y_{it}} is driven by the following panel data model:
\deqn{y_{it} = \gamma_i + \beta^\prime_{it} x_{it} + u_{it}, \quad i = 1, \dots, N, \; t = 1, \dots, T,}
where \eqn{y_{it}} is the scalar dependent variable, \eqn{\gamma_i} is an individual fixed effect and \eqn{x_{it}} is a \eqn{p \times 1} vector of explanatory variables. The errors \eqn{u_{it}} feature a \eqn{iid} standard normal distribution.
The coefficient vector \eqn{\beta_i = \{\beta_{i1}^\prime, \dots, \beta_{iT}^\prime \}^\prime} is subject to the group pattern
\deqn{\beta_i \left( \frac{t}{T} \right) = \sum_{k = 1}^K \alpha_k \left( \frac{t}{T} \right) \bold{1} \{i \in G_k \},}
with \eqn{K =}\code{n_groups}, \eqn{\cup_{k = 1}^K G_k = \{1, \dots, N\}}, \eqn{G_k \cap G_j = \emptyset} and \eqn{\| \alpha_k \| \neq \| \alpha_j \|} for any \eqn{k \neq j}.

The scalar dependent variable \eqn{y_{it}} is generated according to the following grouped time-varying panel data model
\deqn{y_{it} = \gamma_i + \beta_i^\prime (t/T) x_{it} + u_{it}, \quad i = \{1, \dots, N\}, \quad t = \{1, \dots, T\}.}
\eqn{\gamma_i} represents individual fixed effects and \eqn{x_{it}} a \eqn{p \times 1} vector of regressors.
The individual functional slope coefficient vectors \eqn{\beta_i (t/T)} are subject to a latent group structure \eqn{\beta_i (t/T) = \sum_{k = 1}^K \alpha_k (t/T) \bold{1} \{i \in G_k\}}.
As a consequence, the group-level coefficients \eqn{\bold{\alpha} (t/T) = (\alpha^\prime_1 (t/T), \dots, \alpha^\prime_K (t/T))^\prime} follow the partition \eqn{\bold{G}} of \eqn{N} cross-sectional units \eqn{\bold{G} = (G_1, \dots, G_K)} such that \eqn{\cup_{k=1}^K = \{1,\dots,N\}} and \eqn{G_k \cap G_l = \emptyset, \; \alpha_k \neq \alpha_l} for any two groups \eqn{k \neq l}.

The predictors are simulated as:
\deqn{x_{it,j} = 0.2 \gamma_i + e_{it,j}, \quad \gamma_i,e_{it,j} \sim i.i.d. N(0, 1), \quad j = \{1, \dots, p\},}
where \eqn{e_{it,j}} denotes a series of innovations. \eqn{\gamma_i} and \eqn{e_i} are independent of each other.

In case \code{locations = NULL}, the location parameters are drawn from \eqn{\sim U[0.3, 0.9]}.
In case \code{scales = NULL}, the scale parameters are drawn from \eqn{\sim U[0.01, 0.09]}.
In case \code{polynomial_coef = NULL}, the polynomial coefficients are drawn from \eqn{\sim U[-20, 20]} and normalized so that all coefficients of one polynomial sum up to 1.
The final coefficient function follows as \eqn{\alpha_k (t/T) = 3 * F(t/T, location, scale) + \sum_{j=1}^d a_j (t/T)^j}, where \eqn{F(\cdot, location, scale)} denotes a cumulative logistic distribution function and \eqn{a_j} reflects a polynomial coefficient.
}
\examples{
# Simulate a time-varying panel subject to a time trend and a latent group structure
sim <- sim_tv_DGP(N = 20, n_periods = 50, intercept = TRUE, p = 1)
y <- sim$y

}
\author{
Paul Haimerl
}
