% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dist_wasserstein.R
\name{wasserstein}
\alias{wasserstein}
\alias{wassersteinD}
\title{Wasserstein Distance via Linear Programming}
\usage{
wasserstein(X, Y, p = 2, wx = NULL, wy = NULL)

wassersteinD(D, p = 2, wx = NULL, wy = NULL)
}
\arguments{
\item{X}{an \eqn{(M\times P)} matrix of row observations.}

\item{Y}{an \eqn{(N\times P)} matrix of row observations.}

\item{p}{an exponent for the order of the distance (default: 2).}

\item{wx}{a length-\eqn{M} marginal density that sums to \eqn{1}. If \code{NULL} (default), uniform weight is set.}

\item{wy}{a length-\eqn{N} marginal density that sums to \eqn{1}. If \code{NULL} (default), uniform weight is set.}

\item{D}{an \eqn{(M\times N)} distance matrix \eqn{d(x_m, y_n)} between two sets of observations.}
}
\value{
a named list containing\describe{
\item{distance}{\eqn{\mathcal{W}_p} distance value.}
\item{plan}{an \eqn{(M\times N)} nonnegative matrix for the optimal transport plan.}
}
}
\description{
Given two empirical measures
\deqn{\mu = \sum_{m=1}^M \mu_m \delta_{X_m}\quad\textrm{and}\quad \nu = \sum_{n=1}^N \nu_n \delta_{Y_n},}
the \eqn{p}-Wasserstein distance for \eqn{p\geq 1} is posited as the following optimization problem
\deqn{
  W_p^p(\mu, \nu) = \min_{\pi \in \Pi(\mu, \nu)} \sum_{m=1}^M \sum_{n=1}^N \pi_{mn} \|X_m - Y_n\|^p,
}
where \eqn{\Pi(\mu, \nu)} denotes the set of joint distributions (transport plans) with marginals \eqn{\mu} and \eqn{\nu}. 
This function solves the above problem with linear programming, which is a standard approach for 
exact computation of the empirical Wasserstein distance. Please see the section 
for detailed description on the usage of the function.
}
\section{Using \code{wasserstein()} function}{

We assume empirical measures are defined on the Euclidean space \eqn{\mathcal{X}=\mathbb{R}^d},
\deqn{\mu = \sum_{m=1}^M \mu_m \delta_{X_m}\quad\textrm{and}\quad \nu = \sum_{n=1}^N \nu_n \delta_{Y_n}} 
and the distance metric used here is standard Euclidean norm \eqn{d(x,y) = \|x-y\|}. Here, the 
marginals \eqn{(\mu_1,\mu_2,\ldots,\mu_M)} and \eqn{(\nu_1,\nu_2,\ldots,\nu_N)} correspond to 
\code{wx} and \code{wy}, respectively.
}

\section{Using \code{wassersteinD()} function}{

If other distance measures or underlying spaces are one's interests, we have an option for users to provide 
a distance matrix \code{D} rather than vectors, where
\deqn{D := D_{M\times N} = d(X_m, Y_n)}
for arbitrary distance metrics beyond the \eqn{\ell_2} norm.
}

\examples{
#-------------------------------------------------------------------
#  Wasserstein Distance between Samples from Two Bivariate Normal
#
# * class 1 : samples from Gaussian with mean=(-1, -1)
# * class 2 : samples from Gaussian with mean=(+1, +1)
#-------------------------------------------------------------------
## SMALL EXAMPLE
m = 20
n = 10
X = matrix(rnorm(m*2, mean=-1),ncol=2) # m obs. for X
Y = matrix(rnorm(n*2, mean=+1),ncol=2) # n obs. for Y

## COMPUTE WITH DIFFERENT ORDERS
out1 = wasserstein(X, Y, p=1)
out2 = wasserstein(X, Y, p=2)
out5 = wasserstein(X, Y, p=5)

## VISUALIZE : SHOW THE PLAN AND DISTANCE
pm1 = paste0("Order p=1\n distance=",round(out1$distance,2))
pm2 = paste0("Order p=2\n distance=",round(out2$distance,2))
pm5 = paste0("Order p=5\n distance=",round(out5$distance,2))

opar <- par(no.readonly=TRUE)
par(mfrow=c(1,3), pty="s")
image(out1$plan, axes=FALSE, main=pm1)
image(out2$plan, axes=FALSE, main=pm2)
image(out5$plan, axes=FALSE, main=pm5)
par(opar)

\dontrun{
## COMPARE WITH ANALYTIC RESULTS
#  For two Gaussians with same covariance, their 
#  2-Wasserstein distance is known so let's compare !

niter = 1000          # number of iterations
vdist = rep(0,niter)
for (i in 1:niter){
  mm = sample(30:50, 1)
  nn = sample(30:50, 1)
  
  X = matrix(rnorm(mm*2, mean=-1),ncol=2)
  Y = matrix(rnorm(nn*2, mean=+1),ncol=2)
  
  vdist[i] = wasserstein(X, Y, p=2)$distance
  if (i\%\%10 == 0){
    print(paste0("iteration ",i,"/", niter," complete.")) 
  }
}

# Visualize
opar <- par(no.readonly=TRUE)
hist(vdist, main="Monte Carlo Simulation")
abline(v=sqrt(8), lwd=2, col="red")
par(opar)
}

}
\references{
\insertRef{peyre_2019_ComputationalOptimalTransport}{T4transport}
}
\concept{dist}
