% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Plotting_Functions.R
\name{MoE_gpairs}
\alias{MoE_gpairs}
\title{Generalised Pairs Plots for MoEClust Mixture Models}
\usage{
MoE_gpairs(res,
           response.type = c("points", "uncertainty", "density"),
           subset = list(...),
           scatter.type = c("lm", "points"),
           conditional = c("stripplot", "boxplot"),
           addEllipses = c("outer", "yes", "no", "inner", "both"),
           expert.covar = TRUE,
           border.col = c("purple", "black", "brown", "brown", "navy"),
           bg.col = c("cornsilk", "white", "palegoldenrod", "palegoldenrod", "cornsilk"),
           outer.margins = list(bottom = grid::unit(2, "lines"),
                                left = grid::unit(2, "lines"),
                                top = grid::unit(2, "lines"),
                                right = grid::unit(2, "lines")),
           outer.labels = NULL,
           outer.rot = c(0, 90),
           gap = 0.05,
           buffer = 0.025,
           uncert.cov = FALSE,
           scatter.pars = list(...),
           density.pars = list(...),
           stripplot.pars = list(...),
           barcode.pars = list(...),
           mosaic.pars = list(...),
           axis.pars = list(...),
           diag.pars = list(...),
           ...)
}
\arguments{
\item{res}{An object of class \code{"MoEClust"} generated by \code{\link{MoE_clust}}, or an object of class \code{"MoECompare"} generated by \code{\link{MoE_compare}}. Models with a noise component are facilitated here too.}

\item{response.type}{The type of plot desired for the scatter plots comparing continuous response variables. Defaults to \code{"points"}.

Points can also be sized according to their associated clustering uncertainty with the option \code{"uncertainty"}. In so doing, the transparency of the points will also be proportional to their clustering uncertainty, provided the device supports transparency. See also \code{\link{MoE_Uncertainty}} for an alternative means of visualising observation-specific cluster uncertainties (especially for univariate data).

Alternatively, the bivariate \code{"density"} contours can be displayed (see \code{density.pars}), provided there is at least one Gaussian component in the model. Caution is advised when producing density plots for models with covariates in the expert network; the required number of evaluations of the (multivariate) Gaussian density for each panel (\code{res$G * prod(density.pars$grid.size)}) increases by a factor of \code{res$n}, thus plotting may be slow (particularly for large data sets).}

\item{subset}{A list giving named arguments for producing only a subset of panels:
\describe{
\item{\code{show.map}}{Logical indicating whether to show panels involving the MAP classification (defaults to \code{TRUE}, unless there is only one component, in which case the MAP classification is never plotted.).}
\item{\code{data.ind}}{For subsetting response variables: a vector of column indices corresponding to the variables in the columns of \code{res$data} which should be shown. Defaults to all. Can be \code{0}, in order to suppress plotting the response variables.}
\item{\code{cov.ind}}{For subsetting covariates: a vector of column indices corresponding to the covariates in the columns \code{res$net.covs} which should be shown. Defaults to all. Can be \code{0}, in order to suppress plotting the covariates.}
}
The subsetting must include at least two variables, whether they be the MAP, a response variable, or a covariate, in order to be valid for plotting purposes. The arguments \code{data.ind} and \code{cov.ind} can also be used to simply reorder the panels, without actually subsetting.}

\item{scatter.type}{A vector of length 2 (or 1) giving the plot type for the upper and lower triangular portions of the plot, respectively, pertaining to the associated covariates. Defaults to \code{"lm"} for covariate vs. response panels and \code{"points"} otherwise. Only relevant for models with continuous covariates in the gating &/or expert network. \code{"ci"} and \code{"lm"} type plots are only produced for plots pairing covariates with response, and never response vs. response or covariate vs. covariate. Note that lines &/or confidence intervals will only be drawn for continuous covariates included in the expert network; to include covariates included only in the gating network also, the options \code{"lm2"} or \code{"ci2"} can be used but this is not generally advisable.}

\item{conditional}{A vector of length 2 (or 1) giving the plot type for the upper and lower triangular portions of the plot, respectively, for plots involving a mix of categorical and continuous variables. Defaults to \code{"stripplot"} in the upper triangle and \code{"boxplot"} in the lower triangle (see \code{\link[lattice]{panel.stripplot}} and \code{\link[lattice]{panel.bwplot}}). \code{"barcode"} and \code{"violin"} plots can also be produced. Only relevant for models with categorical covariates in the gating &/or expert network. Comparisons of two categorical variables (which can only ever be covariates) are always displayed via mosaic plots (see \code{\link[vcd]{strucplot}}).}

\item{addEllipses}{Controls whether to add MVN ellipses with axes corresponding to the within-cluster covariances for the response data (\code{"yes"} or \code{"no"}). The options \code{"inner"} and \code{"outer"} (the default) will colour the axes or the perimeter of those ellipses, respectively, according to the cluster they represent (according to \code{scatter.pars$lci.col}). The option \code{"both"} will obviously colour both the axes and the perimeter. Ellipses are only ever drawn for multivariate data, and only when \code{response.type} is \code{"points"} or \code{"uncertainty"}.

Ellipses are centered on the posterior mean of the fitted values when there are expert network covariates, otherwise on the posterior mean of the response variables. In the presence of expert network covariates, the component-specific covariance matrices are also (by default, via the argument \code{expert.covar} below) modified for plotting purposes via the function \code{\link{expert_covar}}, in order to account for the extra variability of the means, usually resulting in bigger shapes & sizes for the MVN ellipses.}

\item{expert.covar}{Logical (defaults to \code{TRUE}) governing whether the extra variability in the component means is added to the MVN ellipses corresponding to the component covariance matrices in the presence of expert network covariates when \code{addEllipses} is invoked accordingly. See the function \code{\link{expert_covar}}. Only relevant when \code{response.type} is \code{"points"} or \code{"uncertainty"}.}

\item{border.col}{A vector of length 5 (or 1) containing \emph{border} colours for plots against the MAP classification, response vs. response, covariate vs. response, response vs. covariate, and covariate vs. covariate panels, respectively.

Defaults to \code{c("purple", "black", "brown", "brown", "navy")}.}

\item{bg.col}{A vector of length 5 (or 1) containing \emph{background} colours for plots against the MAP classification, response vs. response, covariate vs. response, response vs. covariate, and covariate vs. covariate panels, respectively.

Defaults to \code{c("cornsilk", "white", "palegoldenrod", "palegoldenrod", "cornsilk")}.}

\item{outer.margins}{A list of length 4 with units as components named bottom, left, top, and right, giving the outer margins; the defaults uses two lines of text. A vector of length 4 with units (ordered properly) will work, as will a vector of length 4 with numeric variables (interpreted as lines).}

\item{outer.labels}{The default is \code{NULL}, for alternating labels around the perimeter. If \code{"all"}, all labels are printed, and if \code{"none"}, no labels are printed.}

\item{outer.rot}{A 2-vector (\code{x}, \code{y}) rotating the top/bottom outer labels \code{x} degrees and the left/right outer labels \code{y} degrees. Only works for categorical labels of boxplot and mosaic panels. Defaults to \code{c(0, 90)}.}

\item{gap}{The gap between the tiles; defaulting to 0.05 of the width of a tile.}

\item{buffer}{The fraction by which to expand the range of quantitative variables to provide plots that will not truncate plotting symbols. Defaults to \code{0.025}, i.e. 2.5 percent of the range.}

\item{uncert.cov}{A logical indicating whether the expansion factor for points on plots involving covariates should also be modified when \code{response.type="uncertainty"}. Defaults to \code{FALSE}, and only relevant for scatterplot and stripplot panels.}

\item{scatter.pars}{A list supplying select parameters for the continuous vs. continuous scatter plots.

\code{NULL} is equivalent to:
\preformatted{list(scat.pch=if(response.type == "uncertainty") 19 else res$classification,
scat.size=unit(0.25, "char"), scat.col=res$classification, 
lci.col=res$classification, noise.size=unit(0.2, "char")),}
where \code{lci.col} gives the colour of the fitted lines &/or confidence intervals when \code{scatter.type} is one of \code{"ci"} or \code{"lm"} and the colour of the ellipses when \code{addEllipses} is one of \code{"outer"}, \code{"inner"}, or \code{"both"}. Note that \code{scatter.pars$scat.size} will be modified on an observation by observation level when \code{response.type} is \code{"uncertainty"}. Note also that the default for \code{scatter.pars$scat.pch} changes depending on whether \code{response.type} is given as \code{"points"} or \code{"uncertainty"}, though it can of course be modified in both cases. Finally, \code{scatter.pars$noise.size} can be used to modify \code{scatter.pars$scat.size} for observations assigned to the noise component (if any), but only when \code{response.type="points"}.}

\item{density.pars}{A list supplying select parameters for visualising the bivariate density contours, only when \code{response.type} is \code{"density"}.

\code{NULL} is equivalent to:
\preformatted{list(grid.size=c(100, 100), dcol="grey50",
nlevels=11, show.labels=TRUE, label.style="mixed"),}
where \code{density.pars$grid.size} is a vector of length two giving the number of points in the x & y direction of the grid over which the density is evaluated, respectively, and \code{density.pars$dcol} is either a single colour or a vector of length \code{density.pars$nlevels} colours (although note that \code{density.pars$dcol}, when \emph{not} specified, will be adjusted for transparency). Finally, \code{density.pars$label.style} can take the values \code{"mixed"}, \code{"flat"}, or \code{"align"}.}

\item{stripplot.pars}{A list supplying select parameters for continuous vs. categorical panels when one of the entries of \code{conditional} is \code{"stripplot"}.

\code{NULL} is equivalent to:
\preformatted{list(strip.pch=res$classification, strip.size=unit(0.5, "char"),
strip.col=res$classification, jitter=TRUE, size.noise=unit(0.4, "char")),}
where \code{stripplot.pars$strip.size} and \code{stripplot.pars$size.noise} retain the definitions for the similar arguments under \code{scatter.pars} above. However, \code{stripplot.pars$noise.size} is invoked regardless of the \code{response.type}.}

\item{barcode.pars}{A list supplying select parameters for continuous vs. categorical panels when one of the entries of \code{conditional} is \code{"barcode"}. See the help file for \code{barcode::barcode}.

\code{NULL} is equivalent to:
\preformatted{list(bar.col=res$classification, nint=0, ptsize=unit(0.25, "char"), 
ptpch=1, bcspace=NULL, use.points=FALSE),}
where \code{barcode.pars$bar.col} is only invoked for panels where the categorical variable is the MAP classification (i.e. when \code{isTRUE(subset$show.map)}) if it is of length greater than 1, otherwise it is used for all relevant panels. See \code{diag.pars$hist.color} for controlling the colours of non-MAP-related barcode panels.}

\item{mosaic.pars}{A list supplying select parameters for categorical vs. categorical panels. \code{NULL}. Currently \code{shade, gp_labels, gp,} and \code{gp_args} are passed through to \code{\link[vcd]{strucplot}} for producing mosaic tiles.}

\item{axis.pars}{A list supplying select parameters for controlling axes.

\code{NULL} is equivalent to:
\preformatted{list(n.ticks=5, axis.fontsize=9).}
The argument \code{n.ticks} will be overwritten for categorical variables with fewer than 5 levels.}

\item{diag.pars}{A list supplying select parameters for panels along the diagonal.

\code{NULL} is equivalent to:
\preformatted{list(diag.fontsize=9, show.hist=TRUE, diagonal=TRUE,
hist.color=hist.color, show.counts=TRUE),}
where \code{hist.color} is a vector of length 4, giving the colours for the response variables, gating covariates, expert covariates, and covariates entering both networks, respectively. \code{hist.color} also governs the fill colour for boxplot panels involving covariates only and the colour of barcode panels not related to the MAP classification. By default, response variables are \code{"black"} and covariates of any kind are \code{"dimgrey"}. The MAP classification is always coloured by cluster membership. \code{show.counts} is only relevant for categorical variables.

When \code{diagonal=TRUE} (the default), the diagonal from the top left to the bottom right is used for displaying the marginal distributions of variables. Specifying \code{diagonal=FALSE} will place the diagonal running from the top right down to the bottom left.}

\item{...}{Catches unused arguments. Alternatively, named arguments can be passed directly here to any/all of \code{scatter.pars, barcode.pars, mosaic.pars, axis.pars} and \code{diag.pars}.}
}
\value{
A generalised pairs plot showing all pairwise relationships between clustered response variables and associated gating &/or expert network continuous &/or categorical variables, coloured according to the MAP classification, with the marginal distributions of each variable along the diagonal.
}
\description{
Produces a matrix of plots showing pairwise relationships between continuous response variables and continuous/categorical/logical/ordinal associated covariates, as well as the clustering achieved, according to fitted MoEClust mixture models.
}
\note{
For \code{MoEClust} models with more than one associated covariate (entering either network), fitted lines produced in continuous covariate vs. continuous response scatter plots via \code{scatter.type="lm"} or \code{scatter.type="ci"} will \strong{NOT} correspond to the coefficients in the expert network (\code{res$expert}).

\code{\link{plot.MoEClust}} is a wrapper to \code{\link{MoE_gpairs}} which accepts the default arguments, and also produces other types of plots. Caution is advised producing generalised pairs plots when the dimension of the data is large.
}
\examples{
\donttest{data(ais)

res   <- MoE_clust(ais[,3:7], G=2, gating= ~ BMI, expert= ~ sex,
                   network.data=ais, modelNames="EVE")
MoE_gpairs(res)

# Produce the same plot, but with a violin plot in the lower triangle.
# Add fitted lines to the scatter plots. 
# Size points in the response vs. response panels by their clustering uncertainty.
MoE_gpairs(res, conditional=c("stripplot", "violin"),
           scatter.type=c("lm2", "points"), response.type="uncertainty")

# Instead show the bivariate density contours of the reponse variables (without labels).
# (Plotting may be slow when response.type="density" for models with expert covariates.)
# Use different colours for histograms of covariates in the gating/expert/both networks.
# Also use different colours for response vs. covariate & covariate vs. response panels.
MoE_gpairs(res, response.type="density", show.labels=FALSE,
           hist.color=c("black", "cyan", "hotpink", "chartreuse"),
           bg.col=c("whitesmoke", "white", "mintcream", "mintcream", "floralwhite"))
           
# Produce a generalised pairs plot for a model with a noise component.
# Reorder the covariates and omit the variabes "Hc" and "Hg".
# Use barcode plots for the categorical/continuous pairs.
# Magnify the size of scatter points assigned to the noise component.
resN  <- MoE_clust(ais[,3:7], G=2, gating=~SSF + Ht, expert=~sex,
                   network.data=ais, modelNames="EEE", tau0=0.1, noise.gate=FALSE)
MoE_gpairs(resN, data.ind=c(1,2,5), cov.ind=c(3,1,2), 
conditional="barcode", noise.size=grid::unit(0.5, "char"))}
}
\references{
Murphy, K. and Murphy, T. B. (2019). Gaussian parsimonious clustering models with covariates and a noise component. \emph{Advances in Data Analysis and Classification}, 1-33. <\href{https://doi.org/10.1007/s11634-019-00373-8}{doi:10.1007/s11634-019-00373-8}>.

Emerson, J. W., Green, W. A., Schloerke, B., Crowley, J., Cook, D., Hofmann, H. and Wickham, H. (2013). The generalized pairs plot. \emph{Journal of Computational and Graphical Statistics}, 22(1):79-91.
}
\seealso{
\code{\link{MoE_clust}}, \code{\link{MoE_stepwise}}, \code{\link{plot.MoEClust}}, \code{\link{MoE_Uncertainty}}, \code{\link{expert_covar}}, \code{\link[lattice]{panel.stripplot}}, \code{\link[lattice]{panel.bwplot}}, \code{\link[lattice]{panel.violin}}, \code{\link[vcd]{strucplot}}
}
\author{
Keefe Murphy - <\email{keefe.murphy@ucd.ie}>
}
\keyword{plotting}
