% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/01_prepare.R
\name{fst_prepare}
\alias{fst_prepare}
\title{Read In and format survey text responses}
\usage{
fst_prepare(
  data,
  question,
  id,
  model = "ftb",
  stopword_list = "nltk",
  language = "fi",
  weights = NULL,
  add_cols = NULL,
  manual = FALSE,
  manual_list = ""
)
}
\arguments{
\item{data}{A dataframe of survey responses which contains an open-ended
question.}

\item{question}{The column in the dataframe which contains the open-ended
question.}

\item{id}{The column in the dataframe which contains the ids for the
responses.}

\item{model}{A language model available for [udpipe]. `"ftb"`
(default) or `"tdt"` are recognised as shorthand for "finnish-ftb" and
"finnish-tdt". The full list is available in the [udpipe] documentation or
via `fst_print_available_models()`.}

\item{stopword_list}{A valid stopword list, default is `"nltk"`,
`"manual"` can be used to indicate that a manual list will be provided, or
`"none"` if you don't want to remove stopwords known as 'source' in
`stopwords::stopwords`}

\item{language}{two-letter ISO code for the language for the stopword list}

\item{weights}{Optional, the column of the dataframe which contains the
respective weights for each response.}

\item{add_cols}{Optional, a column (or columns) from the dataframe which
contain other information you'd like to retain (for instance, dimension
columnns for splitting the data for comparison plots).}

\item{manual}{An optional boolean to indicate that a manual list will be
provided, `stopword_list = "manual"` can also or instead be used.}

\item{manual_list}{A manual list of stopwords.}
}
\value{
A dataframe of text in CoNLL-U format.
}
\description{
Creates a dataframe in CoNLL-U format from a dataframe containing
text from using the [udpipe] package and a language model plus any
additional columns that are included such as `weights` or columns added
through `add_cols`. Stopwords and punctuation are optionally removed if the
the `stopword_list` argument is not "none".
}
\details{
`fst_prepare_conllu()` produces a dataframe containing survey text
 responses in CoNLL-U format with stopwords optionally removed.
}
\examples{
\dontrun{
i <- "fsd_id"
cb <- child
dev <- dev_coop
fst_prepare(data = cb, question = "q7", id = 'fsd_id', weights = 'paino')
fst_prepare(data = dev, question = "q11_2", id = i, add_cols = c('gender'))
fst_prepare(data = dev, question = "q11_3", id = i, add_cols = 'gender')
fst_prepare(data = child, question = "q7", id = i, model = 'swedish-lines')
unlink("finnish-ftb-ud-2.5-191206.udpipe")
unlink("finnish-tdt-ud-2.5-191206.udpipe")
unlink("swedish-lines-ud-2.5-191206.udpipe")
}
}
