% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cooccurrences.R
\docType{methods}
\name{cooccurrences}
\alias{cooccurrences}
\alias{cooccurrences,corpus-method}
\alias{cooccurrences,character-method}
\alias{cooccurrences,slice-method}
\alias{cooccurrences,partition-method}
\alias{cooccurrences,subcorpus-method}
\alias{cooccurrences,context-method}
\alias{cooccurrences,partition_bundle-method}
\alias{cooccurrences,Cooccurrences-method}
\alias{cooccurrences,remote_corpus-method}
\alias{cooccurrences,remote_subcorpus-method}
\title{Get cooccurrence statistics.}
\usage{
cooccurrences(.Object, ...)

\S4method{cooccurrences}{corpus}(
  .Object,
  query,
  cqp = is.cqp,
  p_attribute = getOption("polmineR.p_attribute"),
  boundary = NULL,
  left = getOption("polmineR.left"),
  right = getOption("polmineR.right"),
  stoplist = NULL,
  positivelist = NULL,
  regex = FALSE,
  keep = NULL,
  cpos = NULL,
  method = "ll",
  mc = getOption("polmineR.mc"),
  verbose = FALSE,
  progress = FALSE,
  ...
)

\S4method{cooccurrences}{character}(
  .Object,
  query,
  cqp = is.cqp,
  p_attribute = getOption("polmineR.p_attribute"),
  boundary = NULL,
  left = getOption("polmineR.left"),
  right = getOption("polmineR.right"),
  stoplist = NULL,
  positivelist = NULL,
  regex = FALSE,
  keep = NULL,
  cpos = NULL,
  method = "ll",
  mc = getOption("polmineR.mc"),
  verbose = FALSE,
  progress = FALSE,
  ...
)

\S4method{cooccurrences}{slice}(
  .Object,
  query,
  cqp = is.cqp,
  left = getOption("polmineR.left"),
  right = getOption("polmineR.right"),
  p_attribute = getOption("polmineR.p_attribute"),
  boundary = NULL,
  stoplist = NULL,
  positivelist = NULL,
  keep = NULL,
  method = "ll",
  mc = FALSE,
  progress = TRUE,
  verbose = FALSE,
  ...
)

\S4method{cooccurrences}{partition}(
  .Object,
  query,
  cqp = is.cqp,
  left = getOption("polmineR.left"),
  right = getOption("polmineR.right"),
  p_attribute = getOption("polmineR.p_attribute"),
  boundary = NULL,
  stoplist = NULL,
  positivelist = NULL,
  keep = NULL,
  method = "ll",
  mc = FALSE,
  progress = TRUE,
  verbose = FALSE,
  ...
)

\S4method{cooccurrences}{subcorpus}(
  .Object,
  query,
  cqp = is.cqp,
  left = getOption("polmineR.left"),
  right = getOption("polmineR.right"),
  p_attribute = getOption("polmineR.p_attribute"),
  boundary = NULL,
  stoplist = NULL,
  positivelist = NULL,
  keep = NULL,
  method = "ll",
  mc = FALSE,
  progress = TRUE,
  verbose = FALSE,
  ...
)

\S4method{cooccurrences}{context}(.Object, method = "ll", verbose = FALSE)

\S4method{cooccurrences}{partition_bundle}(
  .Object,
  query,
  verbose = FALSE,
  mc = getOption("polmineR.mc"),
  ...
)

\S4method{cooccurrences}{Cooccurrences}(.Object, query)

\S4method{cooccurrences}{remote_corpus}(.Object, ...)

\S4method{cooccurrences}{remote_subcorpus}(.Object, ...)
}
\arguments{
\item{.Object}{A \code{partition} object, or a \code{character} vector with a CWB corpus.}

\item{...}{Further parameters that will be passed into bigmatrix (applies only of big = TRUE).}

\item{query}{A query, either a character vector to match a token, or a CQP query.}

\item{cqp}{Defaults to \code{is.cqp}-function, or provide
\code{TRUE}/\code{FALSE}; relevant only if query is not \code{NULL}.}

\item{p_attribute}{The p-attribute of the tokens/the query.}

\item{boundary}{If provided, it will be checked that the corpus positions of
windows do not extend beyond the left and right boundaries of the region
defined by the s-attribute where the match occurs.}

\item{left}{A single \code{integer} value defining the number of tokens to the
left of the query match to include in the context. Advanced usage: (a) If
\code{left} is a length-one \code{character} vector stating an s-attribute, the
context will be expanded to the (left) boundary of the region where the
match occurs. (b) If \code{left} is a named length-one \code{integer} vector, this
value is the number regions of the structural attribute referred to by the
vector's name  to the left of the query match that are included in the
context.}

\item{right}{A single \code{integer} value, a length-one \code{character} vector or a
named length-one \code{integer} value, with equivalent effects to argument
\code{left}.}

\item{stoplist}{Exclude a query hit from analysis if stopword(s) is/are in
context (relevant only if query is not \code{NULL}).}

\item{positivelist}{Character vector or numeric vector: include a query hit
only if token in \code{positivelist} is present. If \code{positivelist} is
a character vector, it is assumed to provide regex expressions (incredibly
long if the list is long) (relevant only if query is nut NULL)}

\item{regex}{A \code{logical} value, whether stoplist/positivelist are
interpreted as regular expressions.}

\item{keep}{list with tokens to keep}

\item{cpos}{integer vector with corpus positions, defaults to NULL - then the
corpus positions for the whole corpus will be used}

\item{method}{The statistical test(s) to use (defaults to "ll").}

\item{mc}{whether to use multicore}

\item{verbose}{A \code{logical} value, whether to be verbose.}

\item{progress}{A \code{logical} value, whether to output progress bar.}
}
\value{
a cooccurrences-class object
}
\description{
Get cooccurrence statistics.
}
\examples{
use("polmineR")
use(pkg = "RcppCWB", corpus = "REUTERS")

merkel <- partition("GERMAPARLMINI", interjection = "speech", speaker = ".*Merkel", regex = TRUE)
merkel <- enrich(merkel, p_attribute = "word")
cooc <- cooccurrences(merkel, query = "Deutschland")

# use subset-method to filter results
a <- cooccurrences("REUTERS", query = "oil")
b <- subset(a, !is.na(ll))
c <- subset(b, !word \%in\% tm::stopwords("en"))
d <- subset(c, count_coi >= 5)
e <- subset(c, ll >= 10.83)
format(e)

# using pipe operator with subset
cooccurrences("REUTERS", query = "oil") \%>\%
  subset(!is.na(ll)) \%>\%
  subset(!word \%in\% tm::stopwords("en")) \%>\%
  subset(count_coi >= 5) \%>\%
  subset(ll >= 10.83) \%>\%
  format()
  
# generate datatables htmlwidget with buttons for export (Excel & more)
# (alternatively use openxlsx::write.xlsx())
\donttest{
interactive_table <- cooccurrences("REUTERS", query = "oil") \%>\%
  format() \%>\%
  DT::datatable(
    extensions = "Buttons",
    options = list(dom = 'Btip', buttons = c("excel", "pdf", "csv"))
  )
if (interactive()) show(interactive_table)
}

# compute cooccurrences for a set of partitions
# (example not run by default to save time on test machines)
\donttest{
pb <- partition_bundle("GERMAPARLMINI", s_attribute = "speaker")
ps <- count(pb, query = "Deutschland")[Deutschland >= 25][["partition"]]
pb_min <- pb[ps]
y <- cooccurrences(pb_min, query = "Deutschland")
if (interactive()) y[[1]]
if (interactive()) y[[2]]

y2 <- corpus("GERMAPARLMINI") \%>\%
  subset(speaker \%in\% c("Hubertus Heil", "Angela Dorothea Merkel")) \%>\%
  split(s_attribute = "speaker") \%>\%
  cooccurrences(query = "Deutschland")
}
}
\references{
Baker, Paul (2006): \emph{Using Corpora in Discourse Analysis}. London: continuum, p. 95-120 (ch. 5).

Manning, Christopher D.; Schuetze, Hinrich (1999): \emph{Foundations of Statistical Natural Language
Processing}. MIT Press: Cambridge, Mass., pp. 151-189 (ch. 5).
}
\seealso{
See the documentation for the \code{\link{ll}}-method for an
explanation of the computation of the log-likelihood statistic.
}
\author{
Andreas Blaette
}
