% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ip_knn_cad.R
\name{IpKnnCad}
\alias{IpKnnCad}
\title{Incremental processing KNN based Conformal Anomaly Detector (KNN-CAD).}
\usage{
IpKnnCad(data, n.train, threshold = 1, l = 19, k = 27,
  ncm.type = "ICAD", reducefp = TRUE, to.next.iteration = NULL)
}
\arguments{
\item{data}{Numerical vector with training and test dataset.}

\item{n.train}{Number of points of the dataset that correspond to the
training set.}

\item{threshold}{Anomaly threshold.}

\item{l}{Window length.}

\item{k}{Number of neighbours to take into account.}

\item{ncm.type}{Non Conformity Measure to use "ICAD" or "LDCD"}

\item{reducefp}{If TRUE reduces false positives.}

\item{to.next.iteration}{list with the necessary parameters to execute in
the next iteration.}
}
\value{
dataset conformed by the following columns:

  \item{is.anomaly}{1 if the value is anomalous 0, otherwise.}
  \item{anomaly.score}{Probability of anomaly.}
  \item{to.next.iteration}{Last result returned by the algorithm. It is a list
  containing the following items.}
  \itemize{
     \item \code{training.set} Last training set values used
     in the previous iteration and required for the next run.
     \item \code{calibration.set} Last calibration set values used
     in the previous iteration and required for the next run.
     \item \code{sigma} Last covariance matrix calculated in the previous
     iteration and required for the next run.
     \item \code{alphas} Last calibration alpha values calculated
     in the previous iteration and required for the next run.
     \item \code{last.data} Last values of the dataset converted into
     multi-dimensional vectors..
     \item \code{pred} Parameter that is used to reduce false positives. Only
     necessary in case of reducefp is TRUE.
     \item \code{record.count} Number of observations that have been
     processed up to the last iteration.
 }
}
\description{
\code{IpKnnCad} allows the calculation of anomalies using SD-EWMA in an
incremental processing mode. KNN-CAD is a model-free anomaly
detection method for univariate time-series which adapts itself to
non-stationarity in the data stream and provides probabilistic abnormality
scores based on the conformal prediction paradigm.
}
\details{
\code{data} must be a numerical vector without NA values.
\code{threshold} must be a numeric value between 0 and 1. If the anomaly
score obtained for an observation is greater than the \code{threshold}, the
observation will be considered abnormal. \code{l} must be a numerical value
between 1 and 1/\code{n}; \code{n} being the length of the training data.
Take into account that the value of l has a direct impact on the
computational cost, so very high values will make the execution time longer.
\code{k} parameter must be a numerical value less than the \code{n.train}
value. \code{ncm.type} determines the non-conformity measurement to be used.
ICAD calculates dissimilarity as the sum of the distances of the nearest k
neighbours and LDCD as the average. \code{to.next.iteration}
is the last result returned by some previous execution of this algorithm.
The first time the algorithm is executed its value is NULL. However, to run a
new batch of data without having to include it in the old dataset and restart
the process, this parameter returned by the last run is only needed.

This algorithm can be used for both classical and incremental processing.
It should be noted that in case of having a finite dataset, the
\code{\link{CpKnnCad}} algorithm is faster.
Incremental processing can be used in two ways. 1) Processing all available
data and saving \code{calibration.alpha} and \code{last.data} for future runs
with new data. 2) Using the
\href{https://CRAN.R-project.org/package=stream}{stream} library for when
there is much data and it does not fit into the memory. An example has been
made for this use case.
}
\examples{
## EXAMPLE 1: ----------------------
## It can be used in the same way as with CpKnnCad passing the whole dataset as
## an argument.

## Generate data
set.seed(100)
n <- 500
x <- sample(1:100, n, replace = TRUE)
x[70:90] <- sample(110:115, 21, replace = TRUE)
x[25] <- 200
x[320] <- 170
df <- data.frame(timestamp = 1:n, value = x)

## Set parameters
params.KNN <- list(threshold = 1, n.train = 50, l = 19, k = 17)

## Calculate anomalies
result <- IpKnnCad(
  data = df$value,
  n.train = params.KNN$n.train,
  threshold = params.KNN$threshold,
  l = params.KNN$l,
  k = params.KNN$k,
  ncm.type = "ICAD",
  reducefp = TRUE
)

## Plot results
res <- cbind(df, is.anomaly = result$is.anomaly)
PlotDetections(res, print.time.window = FALSE, title = "KNN-CAD ANOMALY DETECTOR")

## EXAMPLE 2: ----------------------
## You can use it in an incremental way. This is an example using the stream
## library. This library allows the simulation of streaming operation.
\donttest{
# install.packages("stream")
library("stream")

## Generate data
set.seed(100)
n <- 500
x <- sample(1:100, n, replace = TRUE)
x[70:90] <- sample(110:115, 21, replace = TRUE)
x[25] <- 200
x[320] <- 170
df <- data.frame(timestamp = 1:n, value = x)
dsd_df <- DSD_Memory(df)

## Initialize parameters for the loop
last.res <- NULL
res <- NULL
nread <- 100
numIter <- n\%/\%nread

## Set parameters
params.KNN <- list(threshold = 1, n.train = 50, l = 19, k = 17)

## Calculate anomalies
for(i in 1:numIter) {
  # read new data
  newRow <- get_points(dsd_df, n = nread, outofpoints = "ignore")
  # calculate if it's an anomaly
  last.res <- IpKnnCad(
    data = newRow$value,
    n.train = params.KNN$n.train,
    threshold = params.KNN$threshold,
    l = params.KNN$l,
    k = params.KNN$k,
    ncm.type = "ICAD",
    reducefp = TRUE,
    to.next.iteration = last.res$to.next.iteration
  )
  # prepare the result
  if(!is.null(last.res$is.anomaly)){
    res <- rbind(res, cbind(newRow, is.anomaly = last.res$is.anomaly))
  }
}

## Plot results
PlotDetections(res, title = "KNN-CAD ANOMALY DETECTOR")
}


}
\references{
V. Ishimtsev, I. Nazarov, A. Bernstein and E. Burnaev. Conformal
k-NN Anomaly Detector for Univariate Data Streams. ArXiv e-prints, jun. 2017.
}
