% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jaccard.R
\name{jaccard}
\alias{jaccard}
\title{Compute Jaccard Dissimilarity from a Sparse Matrix.}
\usage{
jaccard(x, weighted = TRUE, threads = 1)
}
\arguments{
\item{x}{A \link[Matrix]{sparseMatrix}.}

\item{weighted}{A boolean value, to use abundances (\code{weighted = TRUE}) or absence/presence (\code{weighted=FALSE}) (default: TRUE).}

\item{threads}{A wholenumber, the number of threads to use in \link[RcppParallel]{setThreadOptions} (default: 1).}
}
\value{
A column x column \link[stats]{dist} object.
}
\description{
Calculates the Jaccard dissimilarity of a \link[Matrix]{sparseMatrix} pairwise for each column.
}
\details{
The weighted Jaccard disimilarity between two samples \eqn{A} and \eqn{B}, each of length \eqn{n}, is defined as:

\eqn{d(A,B) = 1 - \frac{ \sum_{i}^{n} \min(A_i, B_i) }{ \sum_{i}^{n} \max(A_i, B_i) }}

where \eqn{A_i} and \eqn{B_i} are the abundances of the \eqn{i}-th feature in sample \eqn{A} and \eqn{B}, respectively.
When weighted is set to FALSE, abundances are changed to 1 (classical Jaccard for binary data).
}
\references{
Jaccard, P. (1912) The distribution of the flora in the alpine zone. New Phytologist, 11(2), 37–50.
library("OmicFlow")

metadata_file <- system.file("extdata", "metadata.tsv", package = "OmicFlow")
counts_file <- system.file("extdata", "counts.tsv", package = "OmicFlow")
features_file <- system.file("extdata", "features.tsv", package = "OmicFlow")
tree_file <- system.file("extdata", "tree.newick", package = "OmicFlow")

taxa <- metagenomics$new(
metaData = metadata_file,
countData = counts_file,
featureData = features_file,
treeData = tree_file
)

taxa$feature_subset(Kingdom == "Bacteria")
taxa$normalize()

jaccard(taxa$countData)
}
