% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PlotSampleClustering.R
\name{plot_sample_clustering}
\alias{plot_sample_clustering}
\alias{plot_sample_clustering,ANY-method}
\alias{plot_sample_clustering,familiarCollection-method}
\title{Plot heatmaps for pairwise similarity between features.}
\usage{
plot_sample_clustering(
  object,
  feature_cluster_method = waiver(),
  feature_linkage_method = waiver(),
  sample_cluster_method = waiver(),
  sample_linkage_method = waiver(),
  sample_limit = waiver(),
  draw = FALSE,
  dir_path = NULL,
  split_by = NULL,
  x_axis_by = NULL,
  y_axis_by = NULL,
  facet_by = NULL,
  facet_wrap_cols = NULL,
  ggtheme = NULL,
  gradient_palette = NULL,
  gradient_palette_range = waiver(),
  outcome_palette = NULL,
  outcome_palette_range = waiver(),
  x_label = waiver(),
  x_label_shared = "column",
  y_label = waiver(),
  y_label_shared = "row",
  legend_label = waiver(),
  outcome_legend_label = waiver(),
  plot_title = waiver(),
  plot_sub_title = waiver(),
  caption = NULL,
  x_range = NULL,
  x_n_breaks = 3,
  x_breaks = NULL,
  y_range = NULL,
  y_n_breaks = 3,
  y_breaks = NULL,
  rotate_x_tick_labels = waiver(),
  show_feature_dendrogram = TRUE,
  show_sample_dendrogram = TRUE,
  show_normalised_data = TRUE,
  show_outcome = TRUE,
  dendrogram_height = grid::unit(1.5, "cm"),
  outcome_height = grid::unit(0.3, "cm"),
  evaluation_times = waiver(),
  width = waiver(),
  height = waiver(),
  units = waiver(),
  export_collection = FALSE,
  verbose = TRUE,
  ...
)

\S4method{plot_sample_clustering}{ANY}(
  object,
  feature_cluster_method = waiver(),
  feature_linkage_method = waiver(),
  sample_cluster_method = waiver(),
  sample_linkage_method = waiver(),
  sample_limit = waiver(),
  draw = FALSE,
  dir_path = NULL,
  split_by = NULL,
  x_axis_by = NULL,
  y_axis_by = NULL,
  facet_by = NULL,
  facet_wrap_cols = NULL,
  ggtheme = NULL,
  gradient_palette = NULL,
  gradient_palette_range = waiver(),
  outcome_palette = NULL,
  outcome_palette_range = waiver(),
  x_label = waiver(),
  x_label_shared = "column",
  y_label = waiver(),
  y_label_shared = "row",
  legend_label = waiver(),
  outcome_legend_label = waiver(),
  plot_title = waiver(),
  plot_sub_title = waiver(),
  caption = NULL,
  x_range = NULL,
  x_n_breaks = 3,
  x_breaks = NULL,
  y_range = NULL,
  y_n_breaks = 3,
  y_breaks = NULL,
  rotate_x_tick_labels = waiver(),
  show_feature_dendrogram = TRUE,
  show_sample_dendrogram = TRUE,
  show_normalised_data = TRUE,
  show_outcome = TRUE,
  dendrogram_height = grid::unit(1.5, "cm"),
  outcome_height = grid::unit(0.3, "cm"),
  evaluation_times = waiver(),
  width = waiver(),
  height = waiver(),
  units = waiver(),
  export_collection = FALSE,
  verbose = TRUE,
  ...
)

\S4method{plot_sample_clustering}{familiarCollection}(
  object,
  feature_cluster_method = waiver(),
  feature_linkage_method = waiver(),
  sample_cluster_method = waiver(),
  sample_linkage_method = waiver(),
  sample_limit = waiver(),
  draw = FALSE,
  dir_path = NULL,
  split_by = NULL,
  x_axis_by = NULL,
  y_axis_by = NULL,
  facet_by = NULL,
  facet_wrap_cols = NULL,
  ggtheme = NULL,
  gradient_palette = NULL,
  gradient_palette_range = waiver(),
  outcome_palette = NULL,
  outcome_palette_range = waiver(),
  x_label = waiver(),
  x_label_shared = "column",
  y_label = waiver(),
  y_label_shared = "row",
  legend_label = waiver(),
  outcome_legend_label = waiver(),
  plot_title = waiver(),
  plot_sub_title = waiver(),
  caption = NULL,
  x_range = NULL,
  x_n_breaks = 3,
  x_breaks = NULL,
  y_range = NULL,
  y_n_breaks = 3,
  y_breaks = NULL,
  rotate_x_tick_labels = waiver(),
  show_feature_dendrogram = TRUE,
  show_sample_dendrogram = TRUE,
  show_normalised_data = TRUE,
  show_outcome = TRUE,
  dendrogram_height = grid::unit(1.5, "cm"),
  outcome_height = grid::unit(0.3, "cm"),
  evaluation_times = waiver(),
  width = waiver(),
  height = waiver(),
  units = waiver(),
  export_collection = FALSE,
  verbose = TRUE,
  ...
)
}
\arguments{
\item{object}{A \code{familiarCollection} object, or other other objects from which
a \code{familiarCollection} can be extracted. See details for more information.}

\item{feature_cluster_method}{The method used to perform clustering. These are
the same methods as for the \code{cluster_method} configuration parameter:
\code{none}, \code{hclust}, \code{agnes}, \code{diana} and \code{pam}.

\code{none} cannot be used when extracting data regarding mutual correlation or
feature expressions.

If not provided explicitly, this parameter is read from settings used at
creation of the underlying \code{familiarModel} objects.}

\item{feature_linkage_method}{The method used for agglomerative clustering in
\code{hclust} and \code{agnes}. These are the same methods as for the
\code{cluster_linkage_method} configuration parameter: \code{average}, \code{single},
\code{complete}, \code{weighted}, and \code{ward}.

If not provided explicitly, this parameter is read from settings used at
creation of the underlying \code{familiarModel} objects.}

\item{sample_cluster_method}{The method used to perform clustering based on
distance between samples. These are the same methods as for the
\code{cluster_method} configuration parameter: \code{hclust}, \code{agnes}, \code{diana} and
\code{pam}.

\code{none} cannot be used when extracting data for feature expressions.

If not provided explicitly, this parameter is read from settings used at
creation of the underlying \code{familiarModel} objects.}

\item{sample_linkage_method}{The method used for agglomerative clustering in
\code{hclust} and \code{agnes}. These are the same methods as for the
\code{cluster_linkage_method} configuration parameter: \code{average}, \code{single},
\code{complete}, \code{weighted}, and \code{ward}.

If not provided explicitly, this parameter is read from settings used at
creation of the underlying \code{familiarModel} objects.}

\item{sample_limit}{(\emph{optional}) Set the upper limit of the number of samples
that are used during evaluation steps. Cannot be less than 20.

This setting can be specified per data element by providing a parameter
value in a named list with data elements, e.g.
\code{list("sample_similarity"=100, "permutation_vimp"=1000)}.

This parameter can be set for the following data elements:
\code{sample_similarity} and \code{ice_data}.}

\item{draw}{(\emph{optional}) Draws the plot if TRUE.}

\item{dir_path}{(\emph{optional}) Path to the directory where created performance
plots are saved to. Output is saved in the \code{feature_similarity}
subdirectory. If \code{NULL} no figures are saved, but are returned instead.}

\item{split_by}{(\emph{optional}) Splitting variables. This refers to column names
on which datasets are split. A separate figure is created for each split.
See details for available variables.}

\item{x_axis_by}{(\emph{optional}) Variable plotted along the x-axis of a plot.
The variable cannot overlap with variables provided to the \code{split_by} and
\code{y_axis_by} arguments (if used), but may overlap with other arguments. Only
one variable is allowed for this argument. See details for available
variables.}

\item{y_axis_by}{(\emph{optional}) Variable plotted along the y-axis of a plot.
The variable cannot overlap with variables provided to the \code{split_by} and
\code{x_axis_by} arguments (if used), but may overlap with other arguments. Only
one variable is allowed for this argument. See details for available
variables.}

\item{facet_by}{(\emph{optional}) Variables used to determine how and if facets of
each figure appear. In case the \code{facet_wrap_cols} argument is \code{NULL}, the
first variable is used to define columns, and the remaing variables are
used to define rows of facets. The variables cannot overlap with those
provided to the \code{split_by} argument, but may overlap with other arguments.
See details for available variables.}

\item{facet_wrap_cols}{(\emph{optional}) Number of columns to generate when facet
wrapping. If NULL, a facet grid is produced instead.}

\item{ggtheme}{(\emph{optional}) \code{ggplot} theme to use for plotting.}

\item{gradient_palette}{(\emph{optional}) Sequential or divergent palette used to
colour the similarity or distance between features in a heatmap.}

\item{gradient_palette_range}{(\emph{optional}) Numerical range used to span the
gradient. This should be a range of two values, e.g. \code{c(0, 1)}. Lower or
upper boundary can be unset by using \code{NA}. If not set, the full
metric-specific range is used.}

\item{outcome_palette}{(\emph{optional}) Sequential (\code{continuous}, \code{count}
outcomes) or qualitative (other outcome types) palette used to show outcome
values. This argument is ignored if the outcome is not shown.}

\item{outcome_palette_range}{(\emph{optional}) Numerical range used to span the
gradient of numeric (\code{continuous}, \code{count}) outcome values. This argument is
ignored for other outcome types or if the outcome is not shown.}

\item{x_label}{(\emph{optional}) Label to provide to the x-axis. If NULL, no label
is shown.}

\item{x_label_shared}{(\emph{optional}) Sharing of x-axis labels between facets.
One of three values:
\itemize{
\item \code{overall}: A single label is placed at the bottom of the figure. Tick
text (but not the ticks themselves) is removed for all but the bottom facet
plot(s).
\item \code{column}: A label is placed at the bottom of each column. Tick text (but
not the ticks themselves) is removed for all but the bottom facet plot(s).
\item \code{individual}: A label is placed below each facet plot. Tick text is kept.
}}

\item{y_label}{(\emph{optional}) Label to provide to the y-axis. If NULL, no label
is shown.}

\item{y_label_shared}{(\emph{optional}) Sharing of y-axis labels between facets.
One of three values:
\itemize{
\item \code{overall}: A single label is placed to the left of the figure. Tick text
(but not the ticks themselves) is removed for all but the left-most facet
plot(s).
\item \code{row}: A label is placed to the left of each row. Tick text (but not the
ticks themselves) is removed for all but the left-most facet plot(s).
\item \code{individual}: A label is placed below each facet plot. Tick text is kept.
}}

\item{legend_label}{(\emph{optional}) Label to provide to the legend. If NULL, the
legend will not have a name.}

\item{outcome_legend_label}{(\emph{optional}) Label to provide to the legend for
outcome data. If NULL, the legend will not have a name. By default, \code{class},
\code{value} and \code{event} are used for \code{binomial} and \code{multinomial}, \code{continuous}
and \code{count}, and \code{survival} outcome types, respectively.}

\item{plot_title}{(\emph{optional}) Label to provide as figure title. If NULL, no
title is shown.}

\item{plot_sub_title}{(\emph{optional}) Label to provide as figure subtitle. If
NULL, no subtitle is shown.}

\item{caption}{(\emph{optional}) Label to provide as figure caption. If NULL, no
caption is shown.}

\item{x_range}{(\emph{optional}) Value range for the x-axis.}

\item{x_n_breaks}{(\emph{optional}) Number of breaks to show on the x-axis of the
plot. \code{x_n_breaks} is used to determine the \code{x_breaks} argument in case it
is unset.}

\item{x_breaks}{(\emph{optional}) Break points on the x-axis of the plot.}

\item{y_range}{(\emph{optional}) Value range for the y-axis.}

\item{y_n_breaks}{(\emph{optional}) Number of breaks to show on the y-axis of the
plot. \code{y_n_breaks} is used to determine the \code{y_breaks} argument in case it
is unset.}

\item{y_breaks}{(\emph{optional}) Break points on the y-axis of the plot.}

\item{rotate_x_tick_labels}{(\emph{optional}) Rotate tick labels on the x-axis by
90 degrees. Defaults to \code{TRUE}. Rotation of x-axis tick labels may also be
controlled through the \code{ggtheme}. In this case, \code{FALSE} should be provided
explicitly.}

\item{show_feature_dendrogram}{(\emph{optional}) Show feature dendrogram around the
main panel. Can be \code{TRUE}, \code{FALSE}, \code{NULL}, or a position, i.e. \code{top},
\code{bottom}, \code{left} and \code{right}.

If a position is specified, it should be appropriate with regard to the
\code{x_axis_by} or \code{y_axis_by} argument. If \code{x_axis_by} is \code{sample} (default),
the only valid positions are \code{top} (default) and \code{bottom}. Alternatively, if
\code{y_axis_by} is \code{feature}, the only valid positions are \code{right} (default) and
\code{left}.

A dendrogram can only be drawn from cluster methods that produce dendograms,
such as \code{hclust}. A dendogram can for example not be constructed using the
partioning around medioids method (\code{pam}).}

\item{show_sample_dendrogram}{(\emph{optional}) Show sample dendrogram around the
main panel. Can be \code{TRUE}, \code{FALSE}, \code{NULL}, or a position, i.e. \code{top},
\code{bottom}, \code{left} and \code{right}.

If a position is specified, it should be appropriate with regard to the
\code{x_axis_by} or \code{y_axis_by} argument. If \code{y_axis_by} is \code{sample} (default),
the only valid positions are \code{right} (default) and \code{left}. Alternatively, if
\code{x_axis_by} is \code{sample}, the only valid positions are \code{top} (default) and
\code{bottom}.

A dendrogram can only be drawn from cluster methods that produce dendograms,
such as \code{hclust}. A dendogram can for example not be constructed using the
partioning around medioids method (\code{pam}).}

\item{show_normalised_data}{(\emph{optional}) Flag that determines whether the data
shown in the main heatmap is normalised using the same settings as within
the analysis (\code{fixed}; default), using a standardisation method
(\code{set_normalisation}) that is applied separately to each dataset, or not at
all (\code{none}), which shows the data at the original scale, albeit with
batch-corrections.

Categorial variables are plotted to span 90\% of the entire numerical value
range, i.e. the levels of categorical variables with 2 levels are
represented at 5\% and 95\% of the range, with 3 levels at 5\%, 50\%, and 95\%,
etc.}

\item{show_outcome}{(\emph{optional}) Show outcome column(s) or row(s) in the
graph. Can be \code{TRUE}, \code{FALSE}, \code{NULL} or a poistion, i.e. \code{top}, \code{bottom},
\code{left} and \code{right}.

If a position is specified, it should be appropriate with regard to the
\code{x_axis_by} or \code{y_axis_by} argument. If \code{y_axis_by} is \code{sample} (default),
the only valid positions are \code{left} (default) and \code{right}. Alternatively, if
\code{x_axis_by} is \code{sample}, the only valid positions are \code{top} (default) and
\code{bottom}.

The outcome data will be drawn between the main panel and the sample
dendrogram (if any).}

\item{dendrogram_height}{(\emph{optional}) Height of the dendrogram. The height is
1.5 cm by default. Height is expected to be grid unit (see \code{grid::unit}),
which also allows for specifying relative heights.}

\item{outcome_height}{(\emph{optional}) Height of an outcome data column/row. The
height is 0.3 cm by default. Height is expected to be a grid unit (see
\code{grid::unit}), which also allows for specifying relative heights. In case of
\code{survival} outcome data with multipe \code{evaluation_times}, this height is
multiplied by the number of time points.}

\item{evaluation_times}{(\emph{optional}) Times at which the event status of
time-to-event survival outcomes are determined. Only used for \code{survival}
outcome. If not specified, the values used when creating the underlying
\code{familiarData} objects are used.}

\item{width}{(\emph{optional}) Width of the plot. A default value is derived from
the number of facets.}

\item{height}{(\emph{optional}) Height of the plot. A default value is derived from
the number of features and the number of facets.}

\item{units}{(\emph{optional}) Plot size unit. Either \code{cm} (default), \code{mm} or \verb{in}.}

\item{export_collection}{(\emph{optional}) Exports the collection if TRUE.}

\item{verbose}{Flag to indicate whether feedback should be provided for the
plotting.}

\item{...}{
  Arguments passed on to \code{\link[=as_familiar_collection]{as_familiar_collection}}, \code{\link[ggplot2:ggsave]{ggplot2::ggsave}}, \code{\link[=extract_feature_expression]{extract_feature_expression}}
  \describe{
    \item{\code{familiar_data_names}}{Names of the dataset(s). Only used if the \code{object} parameter
is one or more \code{familiarData} objects.}
    \item{\code{collection_name}}{Name of the collection.}
    \item{\code{filename}}{File name to create on disk.}
    \item{\code{plot}}{Plot to save, defaults to last plot displayed.}
    \item{\code{device}}{Device to use. Can either be a device function
(e.g. \link{png}), or one of "eps", "ps", "tex" (pictex),
"pdf", "jpeg", "tiff", "png", "bmp", "svg" or "wmf" (windows only).}
    \item{\code{path}}{Path of the directory to save plot to: \code{path} and \code{filename}
are combined to create the fully qualified file name. Defaults to the
working directory.}
    \item{\code{scale}}{Multiplicative scaling factor.}
    \item{\code{dpi}}{Plot resolution. Also accepts a string input: "retina" (320),
"print" (300), or "screen" (72). Applies only to raster output types.}
    \item{\code{limitsize}}{When \code{TRUE} (the default), \code{ggsave()} will not
save images larger than 50x50 inches, to prevent the common error of
specifying dimensions in pixels.}
    \item{\code{bg}}{Background colour. If \code{NULL}, uses the \code{plot.background} fill value
from the plot theme.}
    \item{\code{feature_similarity}}{Table containing pairwise distance between
sample. This is used to determine cluster information, and indicate which
samples are similar. The table is created by the
\code{extract_sample_similarity} method.}
    \item{\code{data}}{A \code{dataObject} object, \code{data.table} or \code{data.frame} that
constitutes the data that are assessed.}
    \item{\code{feature_similarity_metric}}{Metric to determine pairwise similarity
between features. Similarity is computed in the same manner as for
clustering, and \code{feature_similarity_metric} therefore has the same options
as \code{cluster_similarity_metric}: \code{mcfadden_r2}, \code{cox_snell_r2},
\code{nagelkerke_r2}, \code{spearman}, \code{kendall} and \code{pearson}.

If not provided explicitly, this parameter is read from settings used at
creation of the underlying \code{familiarModel} objects.}
    \item{\code{sample_similarity_metric}}{Metric to determine pairwise similarity
between samples. Similarity is computed in the same manner as for
clustering, but \code{sample_similarity_metric} has different options that are
better suited to computing distance between samples instead of between
features: \code{gower}, \code{euclidean}.

The underlying feature data is scaled to the \eqn{[0, 1]} range (for
numerical features) using the feature values across the samples. The
normalisation parameters required can optionally be computed from feature
data with the outer 5\% (on both sides) of feature values trimmed or
winsorised. To do so append \verb{_trim} (trimming) or \verb{_winsor} (winsorising) to
the metric name. This reduces the effect of outliers somewhat.

If not provided explicitly, this parameter is read from settings used at
creation of the underlying \code{familiarModel} objects.}
    \item{\code{message_indent}}{Number of indentation steps for messages shown during
computation and extraction of various data elements.}
  }}
}
\value{
\code{NULL} or list of plot objects, if \code{dir_path} is \code{NULL}.
}
\description{
This method creates a heatmap based on data stored in a
\code{familiarCollection} object. Features in the heatmap are ordered so that
more similar features appear together.
}
\details{
This function generates area under the ROC curve plots.

Available splitting variables are: \code{fs_method}, \code{learner}, and \code{data_set}.
By default, the data is split by \code{fs_method} and \code{learner} and \code{data_set},
since the number of samples will typically differ between data sets, even
for the same feature selection method and learner.

The \code{x_axis_by} and \code{y_axis_by} arguments determine what data are shown
along which axis. Each argument takes one of \code{feature} and \code{sample}, and
both arguments should be unique. By default, features are shown along the
x-axis and samples along the y-axis.

Note that similarity is determined based on the underlying data. Hence the
ordering of features may differ between facets, and tick labels are
maintained for each panel.

Available palettes for \code{gradient_palette} are those listed by
\code{grDevices::palette.pals()} (requires R >= 4.0.0), \code{grDevices::hcl.pals()}
(requires R >= 3.6.0) and \code{rainbow}, \code{heat.colors}, \code{terrain.colors},
\code{topo.colors} and \code{cm.colors}, which correspond to the palettes of the same
name in \code{grDevices}. If not specified, a default palette based on palettes
in Tableau are used. You may also specify your own palette by using colour
names listed by \code{grDevices::colors()} or through hexadecimal RGB strings.

Labeling methods such as \code{set_fs_method_names} or \code{set_data_set_names} can
be applied to the \code{familiarCollection} object to update labels, and order
the output in the figure.
}
