% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ctStanFit.R
\name{ctStanFit}
\alias{ctStanFit}
\title{ctStanFit}
\usage{
ctStanFit(datalong, ctstanmodel, stanmodeltext = NA, iter = 1000,
  intoverstates = TRUE, binomial = FALSE, fit = TRUE,
  intoverpop = FALSE, stationary = FALSE, plot = FALSE,
  derrind = "all", optimize = FALSE, optimcontrol = list(),
  nlcontrol = list(), nopriors = FALSE, chains = 1,
  cores = "maxneeded", inits = NULL, forcerecompile = FALSE,
  savescores = TRUE, gendata = FALSE, control = list(),
  verbose = 0, ...)
}
\arguments{
\item{datalong}{long format data containing columns for subject id (numeric values, 1 to max subjects), manifest variables, 
any time dependent (i.e. varying within subject) predictors, 
and any time independent (not varying within subject) predictors.}

\item{ctstanmodel}{model object as generated by \code{\link{ctModel}} with type='stanct' or 'standt', for continuous or discrete time
models respectively.}

\item{stanmodeltext}{already specified Stan model character string, generally leave NA unless modifying Stan model directly.
(Possible after modification of output from fit=FALSE)}

\item{iter}{number of iterations, half of which will be devoted to warmup by default when sampling.
When optimizing, this is the maximum number of iterations to allow -- convergence hopefully occurs before this!}

\item{intoverstates}{logical indicating whether or not to integrate over latent states using a Kalman filter. 
Generally recommended to set TRUE unless using non-gaussian measurement model.}

\item{binomial}{Deprecated. Logical indicating the use of binary rather than Gaussian data, as with IRT analyses.
This now sets \code{intoverstates = FALSE} and the \code{manifesttype} of every indicator to 1, for binary.}

\item{fit}{If TRUE, fit specified model using Stan, if FALSE, return stan model object without fitting.}

\item{intoverpop}{if TRUE, integrates over population distribution of parameters rather than full sampling.
Allows for optimization of non-linearities and random effects.}

\item{stationary}{Logical. If TRUE, T0VAR and T0MEANS input matrices are ignored, 
the parameters are instead fixed to long run expectations. More control over this can be achieved
by instead setting parameter names of T0MEANS and T0VAR matrices in the input model to 'stationary', for
elements that should be fixed to stationarity.}

\item{plot}{if TRUE, a Shiny program is launched upon fitting to interactively plot samples. 
May struggle with many (e.g., > 5000) parameters, and may leave sample files in working directory if sampling is terminated.}

\item{derrind}{vector of integers denoting which latent variables are involved in dynamic error calculations.
latents involved only in deterministic trends or input effects can be removed from matrices (ie, that
obtain no additional stochastic inputs after first observation), speeding up calculations. 
If unsure, leave default of 'all' ! Ignored if intoverstates=FALSE.}

\item{optimize}{if TRUE, use \code{\link{optimstan}} function for maximum a posteriori estimates.}

\item{optimcontrol}{list of parameters sent to \code{\link{optimstan}} governing optimization / importance sampling.}

\item{nlcontrol}{List of non-linear control parameters. 
\code{nldynamics} defaults to "auto", but may also be a logical. Set to FALSE to use estimator that assumes linear dynamics, 
TRUE to use non-linear estimator. "auto" selects linear when the model is obviously linear, 
otherwise nonlinear -- nonlinear is slower.
\code{nlmeasurement} defaults to "auto", but may also be a logical. Set to TRUE to use non linear measurement model estimator, 
FALSE to use linear model. "auto" selects linear if appropriate, otherwise nonlinear. Non-linear methods are slower but applicable to both linear
and non linear cases.
\code{ukffull} may be TRUE or FALSE. If FALSE, nonlinear filtering via the unscented filter uses a minimal number of sigma points,
that does not capture skew in the resulting distribution. 
\code{maxtimestep} must be a positive numeric,  specifying the largest time
span covered by the numerical integration. The large default ensures that for each observation time interval, 
only a single step of exponential integration is used. When \code{maxtimestep} is smaller than the observation time interval, 
the integration is nested within an Euler like loop. 
Smaller values may offer greater accuracy, but are slower and not always necessary. Given the exponential integration,
linear model elements are fit exactly with only a single step. 
\code{ukfspread} should be a small positive numeric value, indicating what fraction of a standard deviation to 
use for unscented sigma points. Values between 1e-4 and 2 have tended to be reasonable, in our experience. 
In general, larger values may not make sense when using the default of \code{ukffull=FALSE}.}

\item{nopriors}{logical. If TRUE, any priors are disabled -- sometimes desirable for optimization.}

\item{chains}{number of chains to sample, during HMC or post-optimization importance sampling.}

\item{cores}{number of cpu cores to use. Either 'maxneeded' to use as many as available,
up to the number of chains, or a positive integer.}

\item{inits}{vector of parameter start values, as returned by the rstan function \code{rstan::unconstrain_pars} for instance.}

\item{forcerecompile}{logical. For development purposes. 
If TRUE, stan model is recompiled, regardless of apparent need for compilation.}

\item{savescores}{Logical. If TRUE, output from the Kalman filter is saved in output. For datasets with many variables
or time points, will increase file size substantially.}

\item{gendata}{Logical -- If TRUE, uses provided data as a time and missingness structure only, and 
generates random data according to the specified model / priors. 
Generated data is in the $Ygen subobject after running \code{extract} on the fit object.
For datasets with many manifest variables or time points, file size may be large.
To generate data based on the posterior of a fitted model, see \code{\link{ctStanGenerateData}}.}

\item{control}{List of arguments sent to \code{\link[rstan]{stan}} control argument, 
regarding warmup / sampling behaviour. Unless specified, values used are:
list(adapt_delta = .8, adapt_window=2, max_treedepth=10, adapt_init_buffer=2, stepsize = .001)}

\item{verbose}{Integer from 0 to 2. Higher values print more information during model fit -- for debugging.}

\item{...}{additional arguments to pass to \code{\link[rstan]{stan}} function.}
}
\description{
Fits a ctsem model specified via \code{\link{ctModel}} with type either 'stanct' or 'standt', using Bayseian inference software
Stan.
}
\examples{
\dontrun{
#test data with 2 manifest indicators measuring 1 latent process each, 
# 1 time dependent predictor, 3 time independent predictors
head(ctstantestdat) 

#generate a ctStanModel
model<-ctModel(type='stanct',
n.latent=2, latentNames=c('eta1','eta2'),
n.manifest=2, manifestNames=c('Y1','Y2'),
n.TDpred=1, TDpredNames='TD1', 
n.TIpred=3, TIpredNames=c('TI1','TI2','TI3'),
LAMBDA=diag(2))

#set all parameters except manifest means to be fixed across subjects
model$pars$indvarying[-c(19,20)] <- FALSE

#fit model to data (takes a few minutes - but insufficient 
# iterations and max_treedepth for inference!)
fit<-ctStanFit(ctstantestdat, model, iter=200, chains=2, 
control=list(max_treedepth=6))

#output functions
summary(fit) 

plot(fit)



###### EXTENDED EXAMPLES #######



library(ctsem)
set.seed(3)

#recommended to adjust these to appropriate number of cores on machine / 
# chains desired. (min 3 chains recommended, but not necessary here)
setcores <- 3
setchains <- 3

#### Data generation (this section needs to be run, but not necessary to understand!)
Tpoints <- 20
nmanifest <- 4
nlatent <- 2
nsubjects<-20

#random effects
age <- rnorm(nsubjects) #standardised
cint1<-rnorm(nsubjects,2,.3)+age*.5
cint2 <- cint1*.5+rnorm(nsubjects,1,.2)+age*.5
tdpredeffect <- rnorm(nsubjects,5,.3)+age*.5

for(i in 1:nsubjects){
  #generating model
  gm<-ctModel(Tpoints=Tpoints,n.manifest = nmanifest,n.latent = nlatent,n.TDpred = 1,
    LAMBDA = matrix(c(1,0,0,0, 0,1,.8,1.3),nrow=nmanifest,ncol=nlatent),
    DRIFT=matrix(c(-.3, .1, 0, -.5),nlatent,nlatent),
    TDPREDEFFECT=matrix(c(tdpredeffect[i],0),nrow=nlatent),
    TDPREDMEANS=matrix(c(rep(0,Tpoints-10),1,rep(0,9)),ncol=1),
    DIFFUSION = matrix(c(.5, 0, 0, .5),2,2),
    CINT = matrix(c(cint1[i],cint2[i]),ncol=1),
    T0VAR=diag(2,nlatent,nlatent),
    MANIFESTVAR = diag(.5, nmanifest))
  
  #generate data
  newdat <- ctGenerate(ctmodelobj = gm,n.subjects = 1,burnin = 2,
    dtmat<-rbind(c(rep(.5,8),3,rep(.5,Tpoints-9))),
      wide = FALSE)
  newdat[,'id'] <- i #set id for each subject
  newdat <- cbind(newdat,age[i]) #include time independent predictor
  if(i ==1) {
    dat <- newdat[1:(Tpoints-10),] #pre intervention data
    dat2 <- newdat #including post intervention data
  }
  if(i > 1) {
    dat <- rbind(dat, newdat[1:(Tpoints-10),])
    dat2 <- rbind(dat2,newdat)
  }
}
colnames(dat)[ncol(dat)] <- 'age'
colnames(dat2)[ncol(dat)] <- 'age'


#plot generated data for sanity
plot(age)
matplot(dat[,gm$manifestNames],type='l',pch=1)
plotvar <- 'Y1'
plot(dat[dat[,'id']==1,'time'],dat[dat[,'id']==1,plotvar],type='l',
  ylim=range(dat[,plotvar],na.rm=TRUE))
for(i in 2:nsubjects){
  points(dat[dat[,'id']==i,'time'],dat[dat[,'id']==i,plotvar],type='l',col=i)
}





#### Model fitting (from here it is good to understand!)

#Specify univariate linear growth curve
#page 5 of https://cran.r-project.org/web/packages/ctsem/vignettes/hierarchical.pdf 
# documents these arguments (or use ?ctModel )

m1 <- ctModel(n.manifest = 1,n.latent = 1,n.TIpred = 1, type = 'stanct',
  manifestNames = c('Y1'), latentNames=c('L1'),TIpredNames = 'age',
  DRIFT=matrix(-1e-5,nrow=1,ncol=1),
  DIFFUSION=matrix(0,nrow=1,ncol=1),
  CINT=matrix(c('cint1'),ncol=1),
  T0MEANS=matrix(c('t0m1'),ncol=1),
  T0VAR=matrix(0,nrow=1,ncol=1),
  LAMBDA = diag(1),
  MANIFESTMEANS=matrix(0,ncol=1),
  MANIFESTVAR=matrix(c('merror1'),nrow=1,ncol=1))

#modify between subject aspects -- alternatively, run: edit(m1$pars)
m1$pars$indvarying[-which(m1$pars$matrix \%in\% c('T0MEANS','CINT'))] <- FALSE
m1$pars$age_effect[-which(m1$pars$matrix \%in\% c('T0MEANS','CINT'))] <- FALSE


plot(m1) #plot prior distributions

#fit
f1 <- ctStanFit(datalong = dat, ctstanmodel = m1, 
  cores = setcores,chains = setchains,plot=TRUE,
  control=list(max_treedepth=7),iter=150)

summary(f1)

#plots of individual subject models v data
ctKalman(f1,timestep=.01,plot=TRUE,subjects=1:4,kalmanvec=c('y','etasmooth'))
ctKalman(f1,timestep=.01,plot=TRUE,subjects=1,kalmanvec=c('y','ysmooth'))

ctStanPlotPost(f1) #compare prior to posterior distributions 
ctStanPlotPost(f1, priorwidth = FALSE) #rescale to width of posterior 

ctStanPostPredict(f1) #compare randomly generated data from posterior to observed data

cf<-ctCheckFit(f1) #compare covariance of randomly generated data to observed cov
plot(cf)

#accessing the stan object directly 
library(rstan)
postsamples <- extract(f1$stanfit,pars='Ygen') #extract data generated from posterior
plot( f1$data$time, 
  postsamples$Ygen[1,1, ,1]) #iteration 1 (already shuffled), chain 1, all occasions, var 1.
points(f1$data$time, f1$data$Y[,1],col='red') #1st manifest variable





#Specify model including dynamics
m2 <- ctModel(n.manifest = 1,n.latent = 1,n.TIpred = 1, type = 'stanct',
  manifestNames = c('Y1'), latentNames=c('L1'),TIpredNames = 'age',
  DRIFT=matrix('drift11',nrow=1,ncol=1),
  DIFFUSION=matrix('diffusion11',nrow=1,ncol=1),
  CINT=matrix(c('cint1'),ncol=1),
  T0MEANS=matrix(c('t0m1'),ncol=1),
  T0VAR=matrix('t0var11',nrow=1,ncol=1),
  LAMBDA = diag(1),
  MANIFESTMEANS=matrix(0,ncol=1),
  MANIFESTVAR=matrix(c('merror1'),nrow=1,ncol=1))

m2$pars$indvarying[-which(m2$pars$matrix \%in\% c('T0MEANS','CINT'))] <- FALSE
m2$pars$age_effect[-which(m2$pars$matrix \%in\% c('T0MEANS','CINT'))] <- FALSE


f2 <- ctStanFit(datalong = dat, ctstanmodel = m2, cores = setcores,
  chains = setchains,plot=TRUE,
  control=list(max_treedepth=7),iter=150)

summary(f2,parmatrices=TRUE,timeinterval=1)

ctKalman(f2,timestep=.01,plot=TRUE,subjects=1,kalmanvec=c('y','etaprior'))
ctKalman(f2,timestep=.01,plot=TRUE,subjects=1:4,kalmanvec=c('y','etasmooth'))
ctKalman(f2,timestep=.01,plot=TRUE,subjects=1:2,kalmanvec=c('y','ysmooth'))

ctStanPlotPost(f2)

ctStanPostPredict(f2)





#Include intervention
m3 <- ctModel(n.manifest = 1,n.latent = 1,n.TIpred = 1, type = 'stanct',
  manifestNames = c('Y1'), latentNames=c('L1'),TIpredNames = 'age',
  n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention
  TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect
  DRIFT=matrix('drift11',nrow=1,ncol=1),
  DIFFUSION=matrix('diffusion11',nrow=1,ncol=1),
  CINT=matrix(c('cint1'),ncol=1),
  T0MEANS=matrix(c('t0m1'),ncol=1),
  T0VAR=matrix('t0var11',nrow=1,ncol=1),
  LAMBDA = diag(1),
  MANIFESTMEANS=matrix(0,ncol=1),
  MANIFESTVAR=matrix(c('merror1'),nrow=1,ncol=1))

m3$pars$indvarying[-which(m3$pars$matrix \%in\% 
  c('T0MEANS','CINT','TDPREDEFFECT'))] <- FALSE
  
m3$pars$age_effect[-which(m3$pars$matrix \%in\% 
  c('T0MEANS','CINT','TDPREDEFFECT'))] <- FALSE

f3 <- ctStanFit(datalong = dat2, ctstanmodel = m3, cores = setcores,
  chains = setchains,plot=TRUE,
  control=list(max_treedepth=7),iter=150)

summary(f3,parmatrices=TRUE)

ctKalman(f3,timestep=.01,plot=TRUE,subjects=1,kalmanvec=c('y','etaprior'))
ctKalman(f3,timestep=.01,plot=TRUE,subjects=1:4,kalmanvec=c('y','etasmooth'))
ctKalman(f3,timestep=.01,plot=TRUE,subjects=1:2,kalmanvec=c('y','ysmooth'))

ctStanPlotPost(f3)

ctStanPostPredict(f3, datarows=0:100)






#include 2nd latent process

#use either full explicit specification
m4 <- ctModel(n.manifest = 2,n.latent = 2,n.TIpred = 1, type = 'stanct', #no of vars updated
  manifestNames = c('Y1','Y2'), latentNames=c('L1','L2'),TIpredNames = 'age', 
  n.TDpred=1,TDpredNames = 'TD1', 
  TDPREDEFFECT=matrix(c('tdpredeffect1','tdpredeffect2'),nrow=2,ncol=1), 
  DRIFT=matrix(c('drift11','drift21','drift12','drift22'),nrow=2,ncol=2),
  DIFFUSION=matrix(c('diffusion11','diffusion21',0,'diffusion22'),nrow=2,ncol=2),
  CINT=matrix(c('cint1','cint2'),nrow=2,ncol=1),
  T0MEANS=matrix(c('t0m1','t0m2'),nrow=2,ncol=1),
  T0VAR=matrix(c('t0var11','t0var21',0,'t0var22'),nrow=2,ncol=2),
  LAMBDA = matrix(c(1,0,0,1),nrow=2,ncol=2),
  MANIFESTMEANS=matrix(c(0,0),nrow=2,ncol=1),
  MANIFESTVAR=matrix(c('merror1',0,0,'merror2'),nrow=2,ncol=2))

#restrict between subjects variation / covariate effects
m4$pars$indvarying[-which(m4$pars$matrix \%in\% c('T0MEANS','CINT','TDPREDEFFECT'))] <- FALSE
m4$pars$age_effect[-which(m4$pars$matrix \%in\% c('T0MEANS','CINT','TDPREDEFFECT'))] <- FALSE

#or rely on defaults (MANIFESTMEANS now free instead of CINT -- 
#  no substantive difference for one indicator factors)
m4 <- ctModel(n.manifest = 2,n.latent = 2,n.TIpred = 1, type = 'stanct', 
  manifestNames = c('Y1','Y2'), latentNames=c('L1','L2'),TIpredNames = 'age',
  n.TDpred=1,TDpredNames = 'TD1',
  LAMBDA = matrix(c(1,0,0,1),nrow=2,ncol=2))

#restrict between subjects variation / covariate effects
m4$pars$indvarying[-which(m4$pars$matrix \%in\% c('T0MEANS','MANIFESTMEANS','TDPREDEFFECT'))] <- FALSE
m4$pars$age_effect[-which(m4$pars$matrix \%in\% c('T0MEANS','MANIFESTMEANS','TDPREDEFFECT'))] <- FALSE

f4 <- ctStanFit(datalong = dat2, ctstanmodel = m4, cores = setcores,chains = setchains,plot=TRUE,
  optimize=TRUE,verbose=1,
  control=list(max_treedepth=7),iter=150)

summary(f4,parmatrices=TRUE)

ctStanDiscretePars(f4,plot=TRUE) #auto and cross regressive plots over time

ctKalman(f4,timestep=.01,plot=TRUE,subjects=1,kalmanvec=c('y','etaprior'))
ctKalman(f4,timestep=.01,plot=TRUE,subjects=1:2,kalmanvec=c('y','etasmooth'))
ctKalman(f4,timestep=.01,plot=TRUE,subjects=1:2,kalmanvec=c('y','ysmooth'))

ctStanPlotPost(f4)

ctStanPostPredict(f4,wait=F)



#non-linear dedpendencies - based on m3 model (including intervention)
#specify intervention as dependent on extra parameter in PARS matrix, and latent process 1

m3nl <- ctModel(n.manifest = 1,n.latent = 1,n.TIpred = 1, type = 'stanct',
  manifestNames = c('Y1'), latentNames=c('L1'),TIpredNames = 'age',
  n.TDpred=1,TDpredNames = 'TD1', 
  TDPREDEFFECT=matrix(c('PARS[1,1] * state[1]'),nrow=1,ncol=1), 
  PARS=matrix(c('tdpredeffect'),1,1),
  DRIFT=matrix('drift11',nrow=1,ncol=1),
  DIFFUSION=matrix('diffusion11',nrow=1,ncol=1),
  CINT=matrix(c('cint1'),ncol=1),
  T0MEANS=matrix(c('t0m1'),ncol=1),
  T0VAR=matrix('t0var11',nrow=1,ncol=1),
  LAMBDA = diag(1),
  MANIFESTMEANS=matrix(0,ncol=1),
  MANIFESTVAR=matrix(c('merror1'),nrow=1,ncol=1))

m3nl$pars$indvarying[-which(m3nl$pars$matrix \%in\% 
  c('T0MEANS','CINT','TDPREDEFFECT'))] <- FALSE
  
m3nl$pars$age_effect[-which(m3nl$pars$matrix \%in\% 
  c('T0MEANS','CINT','TDPREDEFFECT'))] <- FALSE

#here fit using optimization instead of sampling -- not appropriate in all cases!
f3nl <- ctStanFit(datalong = dat2, ctstanmodel = m3nl, 
  cores = setcores, chains = setchains,
  optimize=TRUE)

summary(f3nl)

#plot functions need updating for non-linearities! (as of ctsem v 2.7.3)
#extract.ctStanFit can be used to extract samples and create own plots.
#last index of kalaman subobject denotes element of Kalman output.
# 1 = ll on std scale, 2= ll scale, 3=error, 4=prediction, 
# 5= eta prior, 6= eta upd

ctStanPostPredict(f3nl, datarows=1:100)

e=extract.ctStanFit(f3nl)
subindex = which(f3nl$data$subject ==3) #specify subject
 
 matplot(f3nl$data$time[subindex], # Y predictions given earlier Y
   t(e$kalman[,subindex,4]), 
   type='l',lty=1,col=rgb(0,0,0,.1))
 
 points(f3nl$data$time[subindex], #actual Y
   f3nl$data$Y[subindex,1],type='p',col='red')
   
  matplot(f3nl$data$time[subindex], add = TRUE, #Generated Y from model
    t(e$Ygen[,subindex,1]), 
    type='l',lty=1,col=rgb(0,0,1,.05))
}
}
