% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cluster_k.R
\name{mt_cluster_k}
\alias{mt_cluster_k}
\title{Estimate optimal number of clusters.}
\usage{
mt_cluster_k(data, use = "sp_trajectories", dimensions = c("xpos",
  "ypos"), kseq = 2:15, compute = c("stability", "gap", "jump",
  "slope"), method = "hclust", weights = rep(1, length(dimensions)),
  pointwise = TRUE, minkowski_p = 2, hclust_method = "ward.D",
  kmeans_nstart = 10, n_bootstrap = 10, model_based = FALSE,
  n_gap = 10, na_rm = FALSE, verbose = FALSE)
}
\arguments{
\item{data}{a mousetrap data object created using one of the mt_import
functions (see \link{mt_example} for details). Alternatively, a trajectory
array can be provided directly (in this case \code{use} will be ignored).}

\item{use}{a character string specifying which trajectory data should be
used.}

\item{dimensions}{a character vector specifying which trajectory variables 
should be used. Can be of length 2 or 3, for two-dimensional or 
three-dimensional trajectories respectively.}

\item{kseq}{a numeric vector specifying set of candidates for k. Defaults to
2:15, implying that all values of k within that range are compared using
the metrics specified in \code{compute}.}

\item{compute}{character vector specifying the to be computed measures. Can
be any subset of \code{c("stability","gap","jump","slope")}.}

\item{method}{character string specifying the type of clustering procedure
for the stability-based method. Either \code{hclust} or \code{kmeans}.}

\item{weights}{numeric vector specifying the relative importance of the 
variables specified in \code{dimensions}. Defaults to a vector of 1s 
implying equal importance. Technically, each variable is rescaled so that
the standard deviation matches the corresponding value in \code{weights}.
To use the original variables, set \code{weights = NULL}.}

\item{pointwise}{boolean specifying the way in which dissimilarity between
the trajectories is measured. If \code{TRUE} (the default),
\code{mt_distmat} measures the average dissimilarity and then sums the
results. If \code{FALSE}, \code{mt_distmat}  measures dissimilarity once
(by treating the various points as independent dimensions). This is only
relevant if \code{method} is "hclust". See \link{mt_distmat} for further
details.}

\item{minkowski_p}{an integer specifying the distance metric for the cluster
solution. \code{minkowski_p = 1} computes the city-block distance,
\code{minkowski_p = 2} (the default) computes the Euclidian distance,
\code{minkowski_p = 3} the cubic distance, etc. Only relevant if
\code{method} is "hclust". See \link{mt_distmat} for further details.}

\item{hclust_method}{character string specifying the linkage criterion used. 
Passed on to the \code{method} argument of \link[stats]{hclust}. Default is
set to \code{ward.D}. Only relevant if \code{method} is "hclust".}

\item{kmeans_nstart}{integer specifying the number of reruns of the kmeans 
procedure. Larger numbers minimize the risk of finding local minima. Passed
on to the \code{nstart} argument of \link[stats]{kmeans}. Only relevant if 
\code{method} is "kmeans".}

\item{n_bootstrap}{an integer specifying the number of bootstrap comparisons
used by \code{stability}. See \link[cstab]{cStability}.}

\item{model_based}{boolean specifying whether the model-based or the
model-free should be used by \code{stability}, when method is
\code{kmeans}. See \link[cstab]{cStability} and Haslbeck & Wulff (2016).}

\item{n_gap}{integer specifying the number of simulated datasets used by
\code{gap}. See Tibshirani et al. (2001).}

\item{na_rm}{logical specifying whether trajectory points containing NAs 
should be removed. Removal is done column-wise. That is, if any trajectory 
has a missing value at, e.g., the 10th recorded position, the 10th position
is removed for all trajectories. This is necessary to compute distance
between trajectories.}

\item{verbose}{logical indicating whether function should report its
progress.}
}
\value{
A list containing two lists that store the results of the different
  methods. \code{kopt} contains the estimated \code{k} for each of the
  methods specified in \code{compute}. \code{paths} contains the values for
  each \code{k} in \code{kseq} as computed by each of the methods specified
  in \code{compute}. The values in \code{kopt} are optima for each of the
  vectors in \code{paths}.
}
\description{
Estimates the optimal number of clusters (\code{k}) using various methods.
}
\details{
\code{mt_cluster_k} estimates the number of clusters (\code{k}) using four
commonly used k-selection methods (specified via \code{compute}): cluster
stability (\code{stability}), the gap statistic (\code{gap}), the jump
statistic (\code{jump}), and the slope statistic (\code{slope}).

Cluster stability methods select \code{k} as the number of clusters for which
the assignment of objects to clusters is most stable across bootstrap
samples. This function implements the model-based and model-free methods
described by Haslbeck & Wulff (2016). See references.

The remaining three methods select \code{k} as the value that optimizes the
gap statistic (Tibshirani, Walther, & Hastie, 2001), the jump statistic
(Sugar & James, 2013), and the slope statistic (Fujita, Takahashi, &
Patriota, 2014), respectively.

For clustering trajectories, it is often useful that the endpoints of all
trajectories share the same direction, e.g., that all trajectories end in the
top-left corner of the coordinate system (\link{mt_remap_symmetric} or
\link{mt_align} can be used to achieve this). Furthermore, it is recommended
to use spatialized trajectories (see \link{mt_spatialize}; Wulff et al., in
press; Haslbeck et al., 2018).
}
\examples{

\dontrun{
# Spatialize trajectories
KH2017 <- mt_spatialize(KH2017)

# Find k
results <- mt_cluster_k(KH2017, use="sp_trajectories")

# Retrieve results
results$kopt
results$paths
}

}
\references{
Haslbeck, J., & Wulff, D. U. (2016). Estimating the Number of
  Clusters via Normalized Cluster Instability. \emph{arXiv preprint}
  arXiv:1608.07494.

  Wulff, D. U., Haslbeck, J. M. B., Kieslich, P. J., Henninger, F., &
  Schulte-Mecklenbeck, M. (in press). Mouse-tracking: Detecting types in
  movement trajectories. In M. Schulte-Mecklenbeck, A. Kühberger, & J. G.
  Johnson (Eds.), \emph{A Handbook of Process Tracing Methods}. New York, NY:
  Routledge.

  Haslbeck, J. M. B., Wulff, D. U., Kieslich, P. J., Henninger, F., &
  Schulte-Mecklenbeck, M. (2018). \emph{Advanced mouse- and hand-tracking
  analysis: Detecting and visualizing clusters in movement trajectories}.
  Manuscript in preparation.
  
  Tibshirani, R., Walther, G., & Hastie, T. (2001). Estimating the number of
  clusters in a data set via the gap statistic. \emph{Journal of the Royal
  Statistical Society: Series B (Statistical Methodology), 63}(2), 411-423.

  Sugar, C. A., & James, G. M. (2013). Finding the number of clusters in a
  dataset. \emph{Journal of the American Statistical Association, 98}(463),
  750-763.

  Fujita, A., Takahashi, D. Y., & Patriota, A. G. (2014). A non-parametric
  method to estimate the number of clusters. \emph{Computational Statistics &
  Data Analysis, 73}, 27-39.
}
\seealso{
\link{mt_distmat} for more information about how the distance matrix
  is computed when the hclust method is used.

  \link{mt_cluster} for performing trajectory clustering with a specified
  number of clusters.
}
\author{
Dirk U. Wulff (\email{dirk.wulff@gmail.com})

Jonas M. B. Haslbeck (\email{jonas.haslbeck@gmail.com})
}
