% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/DPMGibbsSkewT_SeqPrior_parallel.R
\name{DPMGibbsSkewT_SeqPrior_parallel}
\alias{DPMGibbsSkewT_SeqPrior_parallel}
\title{Slice Sampling of Dirichlet Process Mixture of skew Student's t-distributions}
\usage{
DPMGibbsSkewT_SeqPrior_parallel(
  Ncpus,
  type_connec,
  z,
  prior_inform,
  hyperG0,
  N,
  nbclust_init,
  add.vagueprior = TRUE,
  weightnoninfo = NULL,
  doPlot = FALSE,
  plotevery = N/10,
  diagVar = TRUE,
  verbose = TRUE,
  monitorfile = "",
  ...
)
}
\arguments{
\item{Ncpus}{the number of processors available}

\item{type_connec}{The type of connection between the processors. Supported
cluster types are \code{"SOCK"}, \code{"FORK"}, \code{"MPI"}, and
\code{"NWS"}. See also \code{\link[parallel:makeCluster]{makeCluster}}.}

\item{z}{data matrix \code{d x n} with \code{d} dimensions in rows
and \code{n} observations in columns.}

\item{prior_inform}{an informative prior such as the approximation computed by \code{summary.DPMMclust}.}

\item{hyperG0}{prior mixing distribution.}

\item{N}{number of MCMC iterations.}

\item{nbclust_init}{number of clusters at initialization.
Default to 30 (or less if there are less than 30 observations).}

\item{add.vagueprior}{logical flag indicating whether a non informative component should
be added to the informative prior. Default is \code{TRUE}.}

\item{weightnoninfo}{a real between 0 and 1 giving the weights of the non informative component
in the prior.}

\item{doPlot}{logical flag indicating whether to plot MCMC iteration or not.
Default to \code{TRUE}.}

\item{plotevery}{an integer indicating the interval between plotted iterations when \code{doPlot}
is \code{TRUE}.}

\item{diagVar}{logical flag indicating whether the variance of each cluster is
estimated as a diagonal matrix, or as a full matrix.
Default is \code{TRUE} (diagonal variance).}

\item{verbose}{logical flag indicating whether partition info is
written in the console at each MCMC iteration.}

\item{monitorfile}{a writable \link{connections} or a character string naming a file to write into,
to monitor the progress of the analysis.
Default is \code{""} which is no monitoring. See Details.}

\item{...}{additional arguments to be passed to \code{\link{plot_DPM}}.
Only used if \code{doPlot} is \code{TRUE}.}
}
\value{
a object of class \code{DPMclust} with the following attributes:
 \itemize{
     \item{\code{mcmc_partitions}:}{a list of length \code{N}. Each
      element \code{mcmc_partitions[n]} is a vector of length
      \code{n} giving the partition of the \code{n} observations.}
     \item{\code{alpha}:}{ a vector of length \code{N}. \code{cost[j]} is the cost
associated to partition \code{c[[j]]}}
      \item{\code{U_SS_list}:}{a list of length \code{N} containing the lists of
      sufficient statistics for all the mixture components at each MCMC iteration}
     \item{\code{weights_list}:}{a list of length \code{N} containing the logposterior values
      at each MCMC iterations}
     \item{\code{logposterior_list}:}{a list of length \code{N} containing the logposterior values
      at each MCMC iterations}
     \item{\code{data}:}{the data matrix \code{d x n} with \code{d} dimensions in rows
and \code{n} observations in columns}
     \item{\code{nb_mcmcit}:}{the number of MCMC iterations}
     \item{\code{clust_distrib}:}{the parametric distribution of the mixture component - \code{"skewt"}}
     \item{\code{hyperG0}:}{the prior on the cluster location}
 }
}
\description{
Slice Sampling of Dirichlet Process Mixture of skew Student's t-distributions
}
\examples{
rm(list=ls())

#Number of data
n <- 2000
set.seed(123)


d <- 2
ncl <- 4

# Sample data

sdev <- array(dim=c(d,d,ncl))

#xi <- matrix(nrow=d, ncol=ncl, c(-1.5, 1.5, 1.5, 1.5, 2, -2.5, -2.5, -3))
#psi <- matrix(nrow=d, ncol=4, c(0.4, -0.6, 0.8, 0, 0.3, -0.7, -0.3, -0.8))
xi <- matrix(nrow=d, ncol=ncl, c(-0.2, 0.5, 2.4, 0.4, 0.6, -1.3, -0.9, -2.7))
psi <- matrix(nrow=d, ncol=4, c(0.3, -0.7, -0.8, 0, 0.3, -0.7, 0.2, 0.9))
nu <- c(100,15,8,5)
p <- c(0.15, 0.05, 0.5, 0.3) # frequence des clusters
sdev[, ,1] <- matrix(nrow=d, ncol=d, c(0.3, 0, 0, 0.3))
sdev[, ,2] <- matrix(nrow=d, ncol=d, c(0.1, 0, 0, 0.3))
sdev[, ,3] <- matrix(nrow=d, ncol=d, c(0.3, 0.15, 0.15, 0.3))
sdev[, ,4] <- .3*diag(2)


c <- rep(0,n)
w <- rep(1,n)
z <- matrix(0, nrow=d, ncol=n)
for(k in 1:n){
 c[k] = which(rmultinom(n=1, size=1, prob=p)!=0)
 w[k] <- rgamma(1, shape=nu[c[k]]/2, rate=nu[c[k]]/2)
 z[,k] <- xi[, c[k]] + psi[, c[k]]*rtruncnorm(n=1, a=0, b=Inf, mean=0, sd=1/sqrt(w[k])) +
                (sdev[, , c[k]]/sqrt(w[k]))\%*\%matrix(rnorm(d, mean = 0, sd = 1), nrow=d, ncol=1)
 #cat(k, "/", n, " observations simulated\n", sep="")
}

# Set parameters of G0
hyperG0 <- list()
hyperG0[["b_xi"]] <- rowMeans(z)
hyperG0[["b_psi"]] <- rep(0,d)
hyperG0[["kappa"]] <- 0.001
hyperG0[["D_xi"]] <- 100
hyperG0[["D_psi"]] <- 100
hyperG0[["nu"]] <- d+1
hyperG0[["lambda"]] <- diag(apply(z,MARGIN=1, FUN=var))/3

 # hyperprior on the Scale parameter of DPM
 a <- 0.0001
 b <- 0.0001

 # do some plots
 nbclust_init <- 30

 ## Plot Data
 library(ggplot2)
 q <- (ggplot(data.frame("X"=z[1,], "Y"=z[2,]), aes(x=X, y=Y))
       + geom_point()
       + ggtitle("Simple example in 2d data")
       +xlab("D1")
       +ylab("D2")
       +theme_bw())
 q

if(interactive()){
 MCMCsample_st <- DPMGibbsSkewT(z, hyperG0, a, b, N=2000,
                                doPlot=TRUE, plotevery=250,
                                nbclust_init,
                                gg.add=list(theme_bw(),
                                 guides(shape=guide_legend(override.aes = list(fill="grey45")))),
                                diagVar=FALSE)
 s <- summary(MCMCsample_st, burnin = 1500, thin=5, posterior_approx=TRUE)
 F <- FmeasureC(pred=s$point_estim$c_est, ref=c)

for(k in 1:n){
 c[k] = which(rmultinom(n=1, size=1, prob=p)!=0)
 w[k] <- rgamma(1, shape=nu[c[k]]/2, rate=nu[c[k]]/2)
 z[,k] <- xi[, c[k]] + psi[, c[k]]*rtruncnorm(n=1, a=0, b=Inf, mean=0, sd=1/sqrt(w[k])) +
                (sdev[, , c[k]]/sqrt(w[k]))\%*\%matrix(rnorm(d, mean = 0, sd = 1), nrow=d, ncol=1)
 #cat(k, "/", n, " observations simulated\n", sep="")
}
MCMCsample_st2 <- DPMGibbsSkewT_SeqPrior_parallel(Ncpus=2, type_connec="SOCK",
                                                  z, prior_inform=s$param_posterior,
                                                  hyperG0, N=3000,
                                                  doPlot=TRUE, plotevery=100,
                                                  nbclust_init, diagVar=FALSE, verbose=FALSE,
                                                  gg.add=list(theme_bw(),
                                 guides(shape=guide_legend(override.aes = list(fill="grey45")))))
s2 <- summary(MCMCsample_st2, burnin = 2000, thin=5)
F2 <- FmeasureC(pred=s2$point_estim$c_est, ref=c)
}


}
\references{
Hejblum BP, Alkhassim C, Gottardo R, Caron F and Thiebaut R (2019) 
Sequential Dirichlet Process Mixtures of Multivariate Skew t-distributions for 
Model-based Clustering of Flow Cytometry Data. The Annals of Applied Statistics, 
13(1): 638-660. <doi: 10.1214/18-AOAS1209> <arXiv: 1702.04407> 
\url{https://arxiv.org/abs/1702.04407} \url{https://doi.org/10.1214/18-AOAS1209}
}
\author{
Boris Hejblum
}
