% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/summ_classmetric.R
\name{summ_classmetric}
\alias{summ_classmetric}
\alias{summ_classmetric_df}
\title{Summarize pair of distributions with classification metric}
\usage{
summ_classmetric(f, g, threshold, method = "F1")

summ_classmetric_df(f, g, threshold, method = "F1")
}
\arguments{
\item{f}{A pdqr-function of any \link[=meta_type]{type} and
\link[=meta_class]{class}. Represents distribution of "true negative" values.}

\item{g}{A pdqr-function of any type and class. Represents distribution of
"true positive" values.}

\item{threshold}{A numeric vector of classification threshold(s).}

\item{method}{Method of classification metric (might be a vector for
\code{summ_classmetric_df()}). Should be one of "TPR", "TNR", "FPR", "FNR",
"PPV", "NPV", "FDR", "FOR", "LR+", "LR-", "Acc", "ER", "GM", "F1", "OP",
"MCC", "YI", "MK", "Jaccard", "DOR" (with possible aliases, see Details).}
}
\value{
\code{summ_classmetric()} returns a numeric vector, of the same length as
\code{threshold}, representing classification metrics for different threshold
values.

\code{summ_classmetric_df()} returns a data frame with rows corresponding to
\code{threshold} values. First column is "threshold" (with \code{threshold} values),
and all other represent classification metric for every input method (see
Examples).
}
\description{
Compute metric of the following one-dimensional binary classification setup:
any \code{x} value not more than \code{threshold} value is classified as "negative"; if
strictly greater - "positive". Classification metrics are computed based on
two pdqr-functions: \code{f}, which represents the distribution of values which
\emph{should be} classified as "negative" ("true negative"), and \code{g} - the same
for "positive" ("true positive").
}
\details{
Binary classification setup used here to compute metrics is a
simplified version of the most common one, when there is a finite set of
already classified objects. Usually, there are \code{N} objects which are truly
"negative" and \code{P} truly "positive" ones. Values \code{N} and \code{P} can vary, which
often results in class imbalance. However, in current setup both \code{N} and
\code{P} are equal to 1 (total probability of \code{f} and \code{g}).

In common setup, classification of all \code{N + P} objects results into the
following values: "TP" (number of truly "positive" values classified as
"positive"), "TN" (number of negatives classified as "negative"), "FP"
(number of negatives falsely classified as "positive"), and "FN" (number of
positives falsely classified as "negative"). In current setup all those
values are equal to respective "rates" (because \code{N} and \code{P} are both equal to
1).

Both \code{summ_classmetric()} and \code{summ_classmetric_df()} allow aliases to some
classification metrics (for readability purposes).

Following classification metrics are available:
\itemize{
\item Simple metrics:
\itemize{
\item \emph{True positive rate}, \code{method} "TPR" (aliases: "TP", "sensitivity",
"recall"): proportion of actual positives correctly classified as such.
Computed as \code{1 - as_p(g)(threshold)}.
\item \emph{True negative rate}, \code{method} "TNR" (aliases: "TN", "specificity"):
proportion of actual negatives correctly classified as such. Computed as
\code{as_p(f)(threshold)}.
\item \emph{False positive rate}, \code{method} "FPR" (aliases: "FP", "fall-out"):
proportion of actual negatives falsely classified as "positive". Computed
as \code{1 - as_p(f)(threshold)}.
\item \emph{False negative rate}, \code{method} "FNR" (aliases: "FN", "miss_rate"):
proportion of actual positives falsely classified as "negative". Computed
as \code{as_p(g)(threshold)}.
\item \emph{Positive predictive value}, \code{method} "PPV" (alias: "precision"):
proportion of output positives that are actually "positive". Computed as
\code{TP / (TP + FP)}.
\item \emph{Negative predictive value}, \code{method} "NPV": proportion of output
negatives that are actually "negative". Computed as \code{TN / (TN + FN)}.
\item \emph{False discovery rate}, \code{method} "FDR": proportion of output positives
that are actually "negative". Computed as \code{FP / (TP + FP)}.
\item \emph{False omission rate}, \code{method} "FOR": proportion of output negatives
that are actually "positive". Computed as \code{FN / (TN + FN)}.
\item \emph{Positive likelihood}, \code{method} "LR+": measures how much the odds of
being "positive" increase when value is classified as "positive".
Computed as \code{TPR / (1 - TNR)}.
\item \emph{Negative likelihood}, \code{method} "LR-": measures how much the odds of
being "positive" decrease when value is classified as "negative".
Computed as \code{(1 - TPR) / TNR}.
}
\item Combined metrics (for all, except "error rate", bigger value represents
better classification performance):
\itemize{
\item \emph{Accuracy}, \code{method} "Acc" (alias: "accuracy"): proportion of total
number of input values that were correctly classified. Computed as \code{(TP +   TN) / 2} (here 2 is used because of special classification setup,
\code{TP + TN + FP + FN = 2}).
\item \emph{Error rate}, \code{method} "ER" (alias: "error_rate"): proportion of
total number of input values that were incorrectly classified. Computed
as \code{(FP + FN) / 2}.
\item \emph{Geometric mean}, \code{method} "GM": geometric mean of TPR and TNR.
Computed as \code{sqrt(TPR * TNR)}.
\item \emph{F1 score}, \code{method} "F1": harmonic mean of PPV and TPR. Computed as
\code{2*TP / (2*TP + FP + FN)}.
\item \emph{Optimized precision}, \code{method} "OP": accuracy, penalized for
imbalanced class performance. Computed as \code{Acc - abs(TPR - TNR) / (TPR +   TNR)}.
\item \emph{Matthews correlation coefficient}, \code{method} "MCC" (alias: "corr"):
correlation between the observed and predicted classifications. Computed
as \code{(TP*TN - FP*FN) / sqrt((TP+FP) * (TN+FN))} (here equalities \code{TP+FN =   1} and \code{TN+FP = 1} are used to simplify formula).
\item \emph{Youden’s index}, \code{method} "YI" (aliases: "youden", "informedness"):
evaluates the discriminative power of the classification setup. Computed
as \code{TPR + TNR - 1}.
\item \emph{Markedness}, \code{method} "MK" (alias: "markedness"): evaluates the
predictive power of the classification setup. Computed as \code{PPV + NPV -   1}.
\item \emph{Jaccard}, \code{method} "Jaccard": accuracy ignoring correct classification
of negatives. Computed as \code{TP / (TP + FP + FN)}.
\item \emph{Diagnostic odds ratio}, \code{method} "DOR" (alias: "odds_ratio"): ratio
between positive and negative likelihoods. Computed as \code{"LR+" / "LR-"}.
}
}
}
\examples{
d_unif <- as_d(dunif)
d_norm <- as_d(dnorm)
t_vec <- c(0, 0.5, 0.75, 1.5)

summ_classmetric(d_unif, d_norm, threshold = t_vec, method = "F1")
summ_classmetric(d_unif, d_norm, threshold = t_vec, method = "Acc")

summ_classmetric_df(
  d_unif, d_norm, threshold = t_vec, method = c("F1", "Acc")
)

# Using method aliases
summ_classmetric_df(
  d_unif, d_norm, threshold = t_vec, method = c("TPR", "sensitivity")
)

}
\seealso{
\link{summ_separation} for computing optimal separation threshold (which
is symmetrical with respect to \code{f} and \code{g}).

Other summary functions: 
\code{\link{summ_center}()},
\code{\link{summ_distance}()},
\code{\link{summ_entropy}()},
\code{\link{summ_hdr}()},
\code{\link{summ_interval}()},
\code{\link{summ_moment}()},
\code{\link{summ_order}()},
\code{\link{summ_prob_true}()},
\code{\link{summ_pval}()},
\code{\link{summ_quantile}()},
\code{\link{summ_roc}()},
\code{\link{summ_separation}()},
\code{\link{summ_spread}()}
}
\concept{summary functions}
