% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/corpus_reshape.R
\name{corpus_reshape}
\alias{corpus_reshape}
\title{Recast the document units of a corpus}
\usage{
corpus_reshape(x, to = c("sentences", "paragraphs", "documents"),
  use_docvars = TRUE, ...)
}
\arguments{
\item{x}{corpus whose document units will be reshaped}

\item{to}{new document units in which the corpus will be recast}

\item{use_docvars}{if \code{TRUE}, repeat the docvar values for each 
segmented text; if \code{FALSE}, drop the docvars in the segmented corpus. 
Dropping the docvars might be useful in order to conserve space or if these
are not desired for the segmented corpus.}

\item{...}{additional arguments passed to \code{\link{tokens}}, since the
syntactic segmenter uses this function)}
}
\value{
A corpus object with the documents defined as the new units,
  including document-level meta-data identifying the original documents.
}
\description{
For a corpus, reshape (or recast) the documents to a different level of aggregation.  
Units of aggregation can be defined as documents, paragraphs, or sentences.
Because the corpus object records its current "units" status, it is possible
to move from recast units back to original units, for example from documents,
to sentences, and then back to documents (possibly after modifying the sentences).
}
\examples{
# simple example
corp <- corpus(c(textone = "This is a sentence.  Another sentence.  Yet another.", 
                 textwo = "Premiere phrase.  Deuxieme phrase."), 
                 docvars = data.frame(country=c("UK", "USA"), year=c(1990, 2000)),
                 metacorpus = list(notes = "Example showing how corpus_reshape() works."))
summary(corp)
summary(corpus_reshape(corp, to = "sentences"), showmeta = TRUE)

# example with inaugural corpus speeches
(corp2 <- corpus_subset(data_corpus_inaugural, Year>2004))
corp2_para <- corpus_reshape(corp2, to="paragraphs")
corp2_para
summary(corp2_para, 100, showmeta = TRUE)
## Note that Bush 2005 is recorded as a single paragraph because that text 
## used a single \\n to mark the end of a paragraph.
}
\keyword{corpus}
