\name{fdt}
\alias{fdt}
\alias{fdt.default}
\alias{fdt.data.frame}
\alias{fdt.matrix}

\title{
  Frequency distribution table for numerical data
}

\description{
  A S3 set of methods to easily perform frequency distribution table (\samp{fdt}) from
  \code{vector}, \code{data.frame} and \code{matrix} objects.
}

\usage{
## S3 generic
fdt(x, \dots)

## S3 methods
\method{fdt}{default}(x,
    k,
    start,
    end,
    h,
    breaks=c('Sturges', 'Scott', 'FD'),
    right=FALSE,
    na.rm=FALSE, \dots)

\method{fdt}{data.frame}(x,
    k,
    by,
    breaks=c('Sturges', 'Scott', 'FD'),
    right=FALSE,
    na.rm=FALSE, \dots)

\method{fdt}{matrix}(x,
    k,
    breaks=c('Sturges', 'Scott', 'FD'),
    right=FALSE,
    na.rm=FALSE, \dots)
}

\arguments{
  \item{x}{a \code{vector}, \code{data.frame} or \code{matrix} object.
    If \samp{x} is \code{data.frame} or \code{matrix} it must contain at least 
    one numeric column.}
  \item{k}{number of class intervals.}
  \item{start}{left endpoint of the first class interval.}
  \item{end}{right endpoint of the last class interval.}
  \item{h}{class interval width.}
  \item{by}{categorical variable used for grouping each numeric variable,
    useful only on \code{data.frame}.}
  \item{breaks}{method used to determine the number of interval classes,
    c(\dQuote{Sturges}, \dQuote{Scott}, \dQuote{FD}).}
  \item{right}{right endpoints open (default = \code{FALSE}).}
  \item{na.rm}{logical. Should missing values be removed? (default = \code{FALSE}).}
  \item{\dots}{potencial further arguments (required by generic).}
}

\details{
  The simplest way to run \samp{fdt} is done by supplying only the \samp{x}
  object, for example: \code{nm <- fdt(x)}. In this case all necessary
  default values (\samp{breaks} and \samp{right}) (\dQuote{Sturges} and \code{FALSE}
  respectively) will be used.

  It can be provided also:
  \itemize{
    \item \samp{x} and \samp{k} (number of class intervals);
    \item \samp{x}, \samp{start} (left endpoint of the first class interval) and
    \samp{end} (right endpoint of the last class interval); or
    \item \samp{x}, \samp{start}, \samp{end} and \samp{h} (class interval width).
  }
  These options make the \samp{fdt} very easy and flexible.

  The \samp{fdt} object stores information to be used by methods \code{summary},
  \code{print}, \code{plot}, \code{mean}, \code{median} and \code{mfv}. The result of plot is a histogram.
  The methods \code{summary}, \code{print} and \code{plot} provide a reasonable
  set of parameters to format and plot the \samp{fdt} object in a pretty
  (and publishable) way.
}

\value{
  For \code{fdt} the method \code{fdt.default} returns a list of class \code{fdt.default} with the slots:
  \item{\samp{table}}{A \code{data.frame} storing the \samp{fdt};}
  \item{\samp{breaks}}{A \code{vector} of length 4 storing \samp{start}, \samp{end},
    \samp{h} and \samp{right} of the \samp{fdt} generated by this method;}
  \item{\samp{data}}{A vector of the data \samp{x} provided.}

  The methods \code{fdt.data.frame} and \code{fdt.matrix}
  return a list of class \code{fdt.multiple}.
  This \code{list} has one slot for each numeric (\code{fdt})
  variable of the \samp{x} provided. Each slot, corresponding to each numeric
  variable, stores the same slots of the \code{fdt.default} described above.
}

\author{
  Faria, J. C. \cr
  Allaman, I. B \cr
  Jelihovschi, E. G.
}

\seealso{
  \code{\link[graphics]{hist}} provided by \pkg{graphics} and
  \code{\link[base]{table}}, \code{\link[base]{cut}} both provided by \pkg{base}.
}

\examples{
library(fdth)

#========
# Vector
#========
x <- rnorm(n=1e3,
           mean=5,
           sd=1)

str(x)

# x
(ft <- fdt(x))

# x, alternative breaks
(ft <- fdt(x,
           breaks='Scott'))

# x, k
(ft <- fdt(x,
           k=10))

# x, star, end
range(x)

(ft <- fdt(x,
           start=floor(min(x)),
           end=floor(max(x) + 1)))

# x, start, end, h
(ft <- fdt(x,
           start=floor(min(x)),
           end=floor(max(x) + 1),
           h=1))

# Effect of right
sort(x <- rep(1:3, 3))

(ft <- fdt(x,
           start=1,
           end=4,
           h=1))

(ft <- fdt(x,
           start=0,
           end=3,
           h=1,
           right=TRUE))

#================================================
# Data.frame: multivariated with two categorical
#================================================
mdf <- data.frame(c1=sample(LETTERS[1:3], 1e2, TRUE),
                  c2=as.factor(sample(1:10, 1e2, TRUE)),
                  n1=c(NA, NA, rnorm(96, 10, 1), NA, NA),
                  n2=rnorm(100, 60, 4),
                  n3=rnorm(100, 50, 4),
                  stringsAsFactors=TRUE)

head(mdf)

#(ft <- fdt(mdf))  # Error message due to presence of NA values

(ft <- fdt(mdf,
           na.rm=TRUE))

str(mdf)

# By factor
(ft <- fdt(mdf,
           k=5,
           by='c1',
           na.rm=TRUE))

# choose FD criteria               
(ft <- fdt(mdf,
           breaks='FD',
           by='c1',
           na.rm=TRUE))

# k
(ft <- fdt(mdf,
           k=5,
           by='c2',
           na.rm=TRUE))

(ft <- fdt(iris,
           k=10))

(ft <- fdt(iris,
           k=5,
           by='Species'))

#=========================
# Matrices: multivariated
#=========================
(ft <-fdt(state.x77))

summary(ft,
        format=TRUE)

summary(ft,
        format=TRUE,
        pattern='\%.2f')
}

\keyword{fdt}
\keyword{fdt_cat}
\keyword{frequency}
\keyword{distribution}
\keyword{table}
