% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Index_calculations.r, R/ia_jackknife.R
\name{ia}
\alias{ia}
\alias{pair.ia}
\alias{resample.ia}
\alias{jack.ia}
\title{Index of Association}
\usage{
ia(gid, sample = 0, method = 1, quiet = FALSE, missing = "ignore",
  plot = TRUE, hist = TRUE, index = "rbarD", valuereturn = FALSE)

pair.ia(gid, sample = 0L, quiet = FALSE, plot = TRUE, low = "blue",
  high = "red", limits = NULL, index = "rbarD", method = 1L)

resample.ia(gid, n = NULL, reps = 999, quiet = FALSE,
  use_psex = FALSE, ...)

jack.ia(gid, n = NULL, reps = 999, quiet = FALSE)
}
\arguments{
\item{gid}{a \code{\link{genind}} or \code{\link{genclone}} object.}

\item{sample}{an integer indicating the number of permutations desired (eg 
999).}

\item{method}{an integer from 1 to 4 indicating the sampling method desired. 
see \code{\link{shufflepop}} for details.}

\item{quiet}{Should the function print anything to the screen while it is 
performing calculations?

\code{TRUE} prints nothing.

\code{FALSE} (default) will print the population name and progress bar.}

\item{missing}{a character string. see \code{\link{missingno}} for details.}

\item{plot}{When \code{TRUE} (default), a heatmap of the values per locus
pair will be plotted (for \code{pair.ia()}). When \code{sampling > 0}, 
different things happen with \code{ia()} and \code{pair.ia()}. For 
\code{ia()}, a histogram for the data set is plotted. For \code{pair.ia()},
p-values are added as text on the heatmap.}

\item{hist}{\code{logical} Deprecated. Use plot.}

\item{index}{\code{character} either "Ia" or "rbarD". If \code{hist = TRUE}, 
this indicates which index you want represented in the plot (default:
"rbarD").}

\item{valuereturn}{\code{logical} if \code{TRUE}, the index values from the 
reshuffled data is returned. If \code{FALSE} (default), the index is 
returned with associated p-values in a 4 element numeric vector.}

\item{low}{(for pair.ia) a color to use for low values when \code{plot =
TRUE}}

\item{high}{(for pair.ia) a color to use for low values when \code{plot =
TRUE}}

\item{limits}{(for pair.ia) the limits to be used for the color scale. 
Defaults to \code{NULL}. If you want to use a custom range, supply two
numbers between -1 and 1, (e.g. \code{limits = c(-0.15, 1)})}

\item{n}{an integer specifying the number of samples to be drawn. Defaults to
\code{NULL}, which then uses the number of multilocus genotypes.}

\item{reps}{an integer specifying the number of replicates to perform. 
Defaults to 999.}

\item{use_psex}{a logical. If \code{TRUE}, the samples will be weighted by the value 
of psex. Defaults to \code{FALSE}.}

\item{...}{arguments passed on to \code{\link{psex}}}
}
\value{
\subsection{for \code{pair.ia}}{
  A matrix with two columns and choose(nLoc(gid), 2) rows representing the
  values for Ia and rbarD per locus pair.
  }
  \subsection{If no sampling has occurred:}{ 
  A named number vector of length 2 giving the Index of Association, "Ia";
  and the Standardized Index of Association, "rbarD" 
  }
  \subsection{If there is sampling:}{ A a named number vector of length 4
  with the following values:
  \itemize{
    \item{Ia - }{numeric. The index of association.} 
    \item{p.Ia - }{A number indicating the p-value resulting from a
    one-sided permutation test based on the number of samples indicated in
    the original call.}
    \item{rbarD - }{numeric. The standardized index of association.}
    \item{p.rD - }{A factor indicating the p-value resulting from a
    one-sided permutation test based on the number of samples indicated in
    the original call.} 
  }
  }
  \subsection{If there is sampling and valureturn = TRUE}{ 
  A list with the following elements:
  \itemize{ 
    \item{index }{The above vector}
    \item{samples }{A data frame with s by 2 column data frame where s is the
    number of samples defined. The columns are for the values of Ia and
    rbarD, respectively.}
  }
  }

\subsection{resample.ia()}{a data frame with the index of association and standardized index of
association in columns. Number of rows represents the number of reps.}
}
\description{
Calculate the Index of Association and Standardized Index of Association.
\itemize{
  \item \code{ia()} calculates the index of association over all loci in
  the data set.
  \item \code{pair.ia()} calculates the index of association in a pairwise
  manner among all loci.
  \item  \code{resample.ia()} calculates the index of association on a
  reduced data set multiple times to create a distribution, showing the
  variation of values observed at a given sample size (previously 
  \code{jack.ia}).
}
}
\details{
The index of association was originally developed by A.H.D. Brown 
  analyzing population structure of wild barley (Brown, 1980). It has been widely 
  used as a tool to detect clonal reproduction within populations . 
  Populations whose members are undergoing sexual reproduction, whether it be
  selfing or out-crossing, will produce gametes via meiosis, and thus have a 
  chance to shuffle alleles in the next generation. Populations whose members
  are undergoing clonal reproduction, however, generally do so via mitosis. 
  This means that the most likely mechanism for a change in genotype is via 
  mutation. The rate of mutation varies from species to species, but it is 
  rarely sufficiently high to approximate a random shuffling of alleles. The 
  index of association is a calculation based on the ratio of the variance of
  the raw number of differences between individuals and the sum of those 
  variances over each locus . You can also think of it as the observed 
  variance over the expected variance. If they  are the same, then the index 
  is zero after subtracting one (from Maynard-Smith, 1993): \deqn{I_A = 
  \frac{V_O}{V_E}-1}{Ia = (Vo/Ve) - 1} Since the distance is more or less a binary 
  distance, any sort of marker can be used for this analysis. In the 
  calculation, phase is not considered, and any difference increases the 
  distance between two individuals. Remember that each column represents a 
  different allele and that each entry in the table represents the fraction 
  of the genotype made up by that allele at that locus. Notice also that the 
  sum of the rows all equal one. Poppr uses this to calculate distances by 
  simply taking the sum of the absolute values of the differences between 
  rows.
  
  The calculation for the distance between two individuals at a single locus 
  with \emph{a} allelic states and a ploidy of \emph{k} is as follows (except
  for Presence/Absence data): \deqn{ d = \displaystyle 
  \frac{k}{2}\sum_{i=1}^{a} \mid A_{i} - B_{i}\mid }{d(A,B) = (k/2)*sum(abs(Ai - Bi))} 
  To find the total number of differences 
  between two individuals over all loci, you just take \emph{d} over \emph{m}
  loci, a value we'll call \emph{D}:
  
  \deqn{D = \displaystyle \sum_{i=1}^{m} d_i }{D = sum(di)}
  
  These values are calculated over all possible combinations of individuals 
  in the data set, \eqn{{n \choose 2}}{choose(n, 2)} after which you end up 
  with \eqn{{n \choose 2}\cdot{}m}{choose(n, 2) * m} values of \emph{d} and 
  \eqn{{n \choose 2}}{choose(n, 2)} values of \emph{D}. Calculating the 
  observed variances is fairly straightforward (modified from Agapow and 
  Burt, 2001):
  
  \deqn{ V_O = \frac{\displaystyle \sum_{i=1}^{n \choose 2} D_{i}^2 - 
  \frac{(\displaystyle\sum_{i=1}^{n \choose 2} D_{i})^2}{{n \choose 2}}}{{n 
  \choose 2}}}{Vo = var(D)}
  
  Calculating the expected variance is the sum of each of the variances of 
  the individual loci. The calculation at a single locus, \emph{j} is the 
  same as the previous equation, substituting values of \emph{D} for 
  \emph{d}:
  
  \deqn{ var_j = \frac{\displaystyle \sum_{i=1}^{n \choose 2} d_{i}^2 - 
  \frac{(\displaystyle\sum_{i=1}^{n \choose 2} d_i)^2}{{n \choose 2}}}{{n 
  \choose 2}} }{Varj = var(dj)}
  
  The expected variance is then the sum of all the variances over all 
  \emph{m} loci:
  
  \deqn{ V_E = \displaystyle \sum_{j=1}^{m} var_j }{Ve = sum(var(dj))}
  
  Agapow and Burt showed that \eqn{I_A}{Ia} increases steadily with the
  number of loci, so they came up with an approximation that is widely used,
  \eqn{\bar r_d}{rbarD}. For the derivation, see the manual for
  \emph{multilocus}.
  
  \deqn{ \bar r_d = \frac{V_O - V_E} {2\displaystyle 
  \sum_{j=1}^{m}\displaystyle \sum_{k \neq j}^{m}\sqrt{var_j\cdot{}var_k}} 
  }{rbarD = (Vo - Ve)/(2*sum(sum(sqrt(var(dj)*var(dk))))}
}
\note{
\code{jack.ia()} is deprecated as the name was misleading. Please use
  \code{resample.ia()}
}
\examples{
data(nancycats)
ia(nancycats)

# Pairwise over all loci:
data(partial_clone)
res <- pair.ia(partial_clone)
plot(res, low = "black", high = "green", index = "Ia")

# Resampling
data(Pinf)
resample.ia(Pinf, reps = 99)

\dontrun{

# Pairwise IA with p-values (this will take about a minute)
res <- pair.ia(partial_clone, sample = 999)
head(res)

# Plot the results of resampling rbarD. 
library("ggplot2")
Pinf.resamp <- resample.ia(Pinf, reps = 999)
ggplot(Pinf.resamp[2], aes(x = rbarD)) +
  geom_histogram() +
  geom_vline(xintercept = ia(Pinf)[2]) +
  geom_vline(xintercept = ia(clonecorrect(Pinf))[2], linetype = 2) +
  xlab(expression(bar(r)[d]))

# Get the indices back and plot the distributions.
nansamp <- ia(nancycats, sample = 999, valuereturn = TRUE)

plot(nansamp, index = "Ia")
plot(nansamp, index = "rbarD")

# You can also adjust the parameters for how large to display the text
# so that it's easier to export it for publication/presentations.
library("ggplot2")
plot(nansamp, labsize = 5, linesize = 2) +
  theme_bw() +                                      # adding a theme
  theme(text = element_text(size = rel(5))) +       # changing text size
  theme(plot.title = element_text(size = rel(4))) + # changing title size
  ggtitle("Index of Association of nancycats")      # adding a new title

# Get the index for each population.
lapply(seppop(nancycats), ia)
# With sampling
lapply(seppop(nancycats), ia, sample = 999)

# Plot pairwise ia for all populations in a grid with cowplot
# Set up the library and data
library("cowplot")
data(monpop)
splitStrata(monpop) <- ~Tree/Year/Symptom
setPop(monpop)      <- ~Tree

# Need to set up a list in which to store the plots.
plotlist        <- vector(mode = "list", length = nPop(monpop))
names(plotlist) <- popNames(monpop)

# Loop throgh the populations, calculate pairwise ia, plot, and then
# capture the plot in the list
for (i in popNames(monpop)){
  x <- pair.ia(monpop[pop = i], limits = c(-0.15, 1)) # subset, calculate, and plot
  plotlist[[i]] <- ggplot2::last_plot() # save the last plot
}

# Use the plot_grid function to plot.
plot_grid(plotlist = plotlist, labels = paste("Tree", popNames(monpop)))

}
}
\references{
Paul-Michael Agapow and Austin Burt. Indices of multilocus 
  linkage disequilibrium. \emph{Molecular Ecology Notes}, 1(1-2):101-102, 
  2001
  
  A.H.D. Brown, M.W. Feldman, and E. Nevo. Multilocus structure of natural 
  populations of \emph{Hordeum spontaneum}. \emph{Genetics}, 96(2):523-536, 1980.
  
  J M Smith, N H Smith, M O'Rourke, and B G Spratt. How clonal are bacteria? 
  Proceedings of the National Academy of Sciences, 90(10):4384-4388, 1993.
}
\seealso{
\code{\link{poppr}}, \code{\link{missingno}}, 
  \code{\link{import2genind}}, \code{\link{read.genalex}}, 
  \code{\link{clonecorrect}}, \code{\link{win.ia}}, \code{\link{samp.ia}}
}
\author{
Zhian N. Kamvar
}
