% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/inpdfr_PRO_extractTxt.R
\name{getPDF}
\alias{getPDF}
\title{Extract text from PDF files and return a word-occurrence data.frame.}
\usage{
getPDF(myPDFs, minword = 1, maxword = 20, minFreqWord = 1,
  pathToPdftotext = "")
}
\arguments{
\item{myPDFs}{A character vector containing PDF file names.}

\item{minword}{An integer specifying the minimum number of letters per word
into the returned data.frame.}

\item{maxword}{An integer to specifying the maximum number of letters per
word into the returned data.frame.}

\item{minFreqWord}{An integer specifying the minimum word frequency into the
returned data.frame.}

\item{pathToPdftotext}{A character containing an alternative path to XPDF
\code{pdftotext} function, see Details section.}
}
\value{
A list of list with word-occurrence data.frame and file name.
}
\description{
\code{getPDF} returns a word-occurrence data.frame from PDF files.
It needs \code{XPDF} in order to run (http://www.foolabs.com/xpdf/download.html),
and uses \code{parallel} to perform parallel computation.
}
\details{
\code{getPDF} uses \code{XPDF pdftotext} function to extract the
  content of PDF files into a TXT file. If  \code{pdftotext} is not in the
  \code{PATH}, an alternative is to provide the full path of the program into
  the \code{pathToPdftotext} parameter.
}
\examples{
\dontrun{
getPDF(myPDFs = "mypdf.pdf")
}
}

