\name{hyphen}
\alias{hyphen}
\title{Automatic hyphenation}
\usage{
  hyphen(words, hyph.pattern = NULL, min.length = 3,
    rm.hyph = TRUE, corp.rm.class = "nonpunct",
    corp.rm.tag = c(), quiet = FALSE, cache = TRUE)
}
\arguments{
  \item{words}{Either an object of class
  \code{\link[koRpus]{kRp.tagged-class}},
  \code{\link[koRpus]{kRp.txt.freq-class}} or
  \code{\link[koRpus]{kRp.analysis-class}}, or a character
  vector with words to be hyphenated.}

  \item{hyph.pattern}{Either an object of class
  \code{\link[koRpus]{kRp.hyph.pat-class}}, or a valid
  character string naming the language of the patterns to
  be used. See details.}

  \item{min.length}{Integer, number of letters a word must
  have for considering a hyphenation.}

  \item{rm.hyph}{Logical, whether appearing hyphens in
  words should be removed before pattern matching.}

  \item{corp.rm.class}{A character vector with word classes
  which should be ignored. The default value
  \code{"nonpunct"} has special meaning and will cause the
  result of \code{kRp.POS.tags(lang, c("punct","sentc"),
  list.classes=TRUE)} to be used. Relevant only if
  \code{words} is a valid koRpus object.}

  \item{corp.rm.tag}{A character vector with POS tags which
  should be ignored. Relevant only if \code{words} is a
  valid koRpus object.}

  \item{quiet}{Logical. If \code{FALSE}, short status
  messages will be shown.}

  \item{cache}{Logical. \code{hyphen()} can cache results
  to speed up the process. If this option is set to
  \code{TRUE}, the current cache will be queried and new
  tokens also be added. Caches are language-specific and
  reside in an environment, i.e., they are cleaned at the
  end of a session.}
}
\value{
  An object of class \code{\link[koRpus]{kRp.hyphen-class}}
}
\description{
  This function implements word hyphenation, based on
  Liang's algorithm.
}
\details{
  For this to work the function must be told which pattern
  set it should use to find the right hyphenation spots. If
  \code{words} is already a tagged object, its language
  definition might be used. Otherwise, in addition to the
  words to be processed you must specify
  \code{hyph.pattern}. You have two options: If you want to
  use one of the built-in language patterns, just set it to
  the according language abbrevation. As of this version
  valid choices are: \itemize{ \item {\code{"de"}} {---
  German (new spelling, since 1996)} \item
  {\code{"de.old"}} {--- German (old spelling, 1901--1996)}
  \item {\code{"en"}} {--- English (UK)} \item
  {\code{"en.us"}} {--- English (US)} \item {\code{"es"}}
  {--- Spanish} \item {\code{"it"}} {--- Italian} \item
  {\code{"ru"}} {--- Russian} } In case you'd rather use
  your own pattern set, \code{hyph.pattern} can be an
  object of class \code{kRp.hyph.pat}, alternatively.

  The built-in hyphenation patterns were derived from the
  patterns available on CTAN[1] under the terms of the
  LaTeX Project Public License[2], see
  \code{\link[koRpus:hyph.XX]{hyph.XX}} for detailed
  information.
}
\references{
  Liang, F.M. (1983). \emph{Word Hy-phen-a-tion by
  Com-put-er}.  Dissertation, Stanford University, Dept. of
  Computer Science.

  [1]
  \url{http://tug.ctan.org/tex-archive/language/hyph-utf8/tex/generic/hyph-utf8/patterns/}

  [2]
  \url{http://www.ctan.org/tex-archive/macros/latex/base/lppl.txt}
}
\seealso{
  \code{\link[koRpus:read.hyph.pat]{read.hyph.pat}},
  \code{\link[koRpus:manage.hyph.pat]{manage.hyph.pat}}
}
\keyword{hyphenation}

