% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/weight.R
\name{weight}
\alias{weight}
\alias{weight.DNAbin}
\alias{weight.AAbin}
\alias{weight.list}
\alias{weight.dendrogram}
\alias{weight.default}
\title{Sequence weighting.}
\usage{
weight(x, ...)

\method{weight}{DNAbin}(x, method = "Gerstein", k = 5, ...)

\method{weight}{AAbin}(x, method = "Gerstein", k = 5, ...)

\method{weight}{list}(x, method = "Gerstein", k = 5, residues = NULL,
  gap = "-", ...)

\method{weight}{dendrogram}(x, method = "Gerstein", ...)

\method{weight}{default}(x, method = "Gerstein", k = 5, residues = NULL,
  gap = "-", ...)
}
\arguments{
\item{x}{an object of class \code{"dendrogram"}, or a list or matrix of
sequences (possibly a "DNAbin" or "AAbin" object) from which to derive a
dendrogram using the \code{\link[kmer]{cluster}} function in the
\code{\link[kmer]{kmer}} package.}

\item{...}{additional arguments to be passed between methods.}

\item{method}{a character string indicating the weighting method to be used.
Currently only that of Gerstein et al (1994) is supported
(\code{method = "Gerstein"}).}

\item{k}{integer representing the k-mer size to be used in tree-based
sequence weighting. Defaults to 5. Note that higher
values of k may be slow to compute and use excessive memory due to
the large numbers of calculations required.}

\item{residues}{either NULL (default; emitted residues are automatically
detected from the sequences), a case sensitive character vector
specifying the residue alphabet, or one of the character strings
"RNA", "DNA", "AA", "AMINO". Note that the default option can be slow for
large lists of character vectors. Furthermore, the default setting
\code{residues = NULL} will not detect rare residues that are not present
in the sequences, and thus will not assign them emission probabilities
in the model. Specifying the residue alphabet is therefore
recommended unless x is a "DNAbin" or "AAbin" object.}

\item{gap}{the character used to represent gaps in the alignment matrix
(if applicable). Ignored for \code{"DNAbin"} or \code{"AAbin"} objects.
Defaults to "-" otherwise.}
}
\value{
a named vector of weights, the sum of which is equal to
   the total number of sequences (average weight = 1).
}
\description{
Weight sequences based on a tree.
}
\details{
This is a generic function that uses the agglomerative method of
  Gerstein et al (1994) to weight sequences based on their relatedness
  as derived from a phylogenetic tree. Methods are available for
  \code{"dendrogram"} objects, \code{"DNAbin"} and \code{"AAbin"}
  sequence objects (as lists or matrices) and sequences in standard
  ASCII character format provided either as lists or matrices.

  For further details on sequence weighting see Durbin et al
  (1998) chapter 5.8.
}
\examples{
  ## weight the sequences in the woodmouse dataset from the ape package
  library(ape)
  data(woodmouse)
  woodmouse.weights <- weight(woodmouse)
  woodmouse.weights
}
\references{
Durbin R, Eddy SR, Krogh A, Mitchison G (1998) Biological
  sequence analysis: probabilistic models of proteins and nucleic acids.
  Cambridge University Press, Cambridge, United Kingdom.

  Gerstein M, Sonnhammer ELL, Chothia C (1994) Volume changes in protein evolution.
  \emph{Journal of Molecular Biology}, \strong{236}, 1067-1078.
}
\seealso{
\code{\link[kmer]{cluster}}
}
\author{
Shaun Wilkinson
}
