% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/convex_clustering.R
\name{convex_clustering}
\alias{convex_clustering}
\title{Find a target number of clusters in the data using convex clustering}
\usage{
convex_clustering(
  X,
  W,
  target_low,
  target_high = NULL,
  max_iter_phase_1 = 2000,
  max_iter_phase_2 = 20,
  lambda_init = 0.01,
  factor = 0.025,
  tau = 0.001,
  center = TRUE,
  scale = TRUE,
  eps_conv = 1e-06,
  burnin_iter = 25,
  max_iter_conv = 5000,
  save_clusterpath = FALSE,
  verbose = 0
)
}
\arguments{
\item{X}{An \eqn{n} x \eqn{p} numeric matrix. This function assumes that each
row represents an object with \eqn{p} attributes.}

\item{W}{A \code{sparseweights} object, see \link{sparse_weights}.}

\item{target_low}{Lower bound on the number of clusters that should be
searched for. If \code{target_high = NULL}, this is the exact number of
clusters that is searched for.}

\item{target_high}{Upper bound on the number of clusters that should be
searched for. Default is \code{NULL}, in that case, it is set equal to
\code{target_low}.}

\item{max_iter_phase_1}{Maximum number of iterations to find an upper and
lower bound for the value for lambda for which the desired number of clusters
is attained. Default is 2000.}

\item{max_iter_phase_2}{Maximum number of iterations to to refine the upper
and lower bounds for lambda. Default is 20.}

\item{lambda_init}{The first value for lambda other than 0 to use for convex
clustering. Default is 0.01.}

\item{factor}{The percentage by which to increase lambda in each step.
Default is 0.025.}

\item{tau}{Parameter to compute the threshold to fuse clusters. Default is
0.001.}

\item{center}{If \code{TRUE}, center \code{X} so that each column has mean
zero. Default is \code{TRUE}.}

\item{scale}{If \code{TRUE}, scale the loss function to ensure that the
cluster solution is invariant to the scale of \code{X}. Default is
\code{TRUE}. Not recommended to set to \code{FALSE} unless comparing to
algorithms that minimize the unscaled convex clustering loss function.}

\item{eps_conv}{Parameter for determining convergence of the minimization.
Default is 1e-6.}

\item{burnin_iter}{Number of updates of the loss function that are done
without step doubling. Default is 25.}

\item{max_iter_conv}{Maximum number of iterations for minimizing the loss
function. Default is 5000.}

\item{save_clusterpath}{If \code{TRUE}, store the solution that minimized
the loss function for each lambda. Is required for drawing the clusterpath.
Default is \code{FALSE}. To store the clusterpath coordinates, \eqn{n} x
\eqn{p} x \eqn{no. lambdas} values have to be stored, this may require too
much memory for large data sets.}

\item{verbose}{Verbosity of the information printed during clustering.
Default is 0, no output.}
}
\value{
A \code{cvxclust} object containing the following
\item{\code{info}}{A dataframe containing for each value for lambda: the
number of different clusters, and the value of the loss function at the
minimum.}
\item{\code{merge}}{The merge table containing the order at which the
observations in \code{X} are clustered.}
\item{\code{height}}{The value for lambda at which each reduction in the
number of clusters occurs.}
\item{\code{order}}{The order of the observations in \code{X} in order to
draw a dendrogram without conflicting branches.}
\item{\code{elapsed_time}}{The number of seconds that elapsed while
running the code. Note that this does not include the time required for
input checking and possibly scaling and centering \code{X}.}
\item{\code{coordinates}}{The clusterpath coordinates. Only part of the
output in case that \code{save_clusterpath=TRUE}.}
\item{\code{lambdas}}{The values for lambda for which a clustering was
found.}
\item{\code{eps_fusions}}{The threshold for cluster fusions that was used by
the algorithm.}
\item{\code{phase_1_instances}}{The number of instances of the loss function
that were minimized while finding an upper and lower bound for lambda. The
sum \code{phase_1_iterations + phase_2_iterations} gives the total number of
instances solved.}
\item{\code{phase_2_instances}}{The number of instances of the loss function
that were minimized while refining the value for lambda. The sum
\code{phase_1_iterations + phase_2_iterations} gives the total number of
instances solved.}
\item{\code{num_clusters}}{The different numbers of clusters that have been
found.}
\item{\code{n}}{The number of observations in \code{X}.}
}
\description{
\code{convex_clustering} attempts to find the number of clusters
specified by the user by means of convex clustering. The algorithm looks for
each number of clusters between \code{target_low} and \code{target_high}. If
\code{target_low} = \code{target_high}, the algorithm searches for a single
clustering. It is recommended to specify a range around the desired number of
clusters, as not each number of clusters between 1 and \code{nrow(X)} may be
attainable due to numerical inaccuracies.
}
\examples{
# Load data
data(two_half_moons)
data = as.matrix(two_half_moons)
X = data[, -3]
y = data[, 3]

# Get sparse weights in dictionary of keys format with k = 5 and phi = 8
W = sparse_weights(X, 5, 8.0)

# Perform convex clustering with a target number of clusters
res1 = convex_clustering(X, W, target_low = 2, target_high = 5)

# Plot the clustering for 2 to 5 clusters
oldpar = par(mfrow=c(2, 2))
plot(X, col = clusters(res1, 2), main = "2 clusters", pch = 19)
plot(X, col = clusters(res1, 3), main = "3 clusters", pch = 19)
plot(X, col = clusters(res1, 4), main = "4 clusters", pch = 19)
plot(X, col = clusters(res1, 5), main = "5 clusters", pch = 19)

# A more generalized approach to plotting the results of a range of clusters
res2 = convex_clustering(X, W, target_low = 2, target_high = 7)

# Plot the clusterings
k = length(res2$num_clusters)
par(mfrow=c(ceiling(k / ceiling(sqrt(k))), ceiling(sqrt(k))))

for (i in 1:k) {
    labels = clusters(res2, res2$num_clusters[i])
    c = length(unique(labels))

    plot(X, col = labels, main = paste(c, "clusters"), pch = 19)
}
par(oldpar)

}
\seealso{
\link{convex_clusterpath}, \link{sparse_weights}
}
