% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fuzzyforest_fit.R
\name{ff}
\alias{ff}
\title{Fits fuzzy forest algorithm.}
\usage{
ff(X, y, Z = NULL, module_membership,
  screen_params = screen_control(min_ntree = 5000),
  select_params = select_control(min_ntree = 5000), final_ntree = 5000,
  num_processors = 1, nodesize, test_features = NULL, test_y = NULL)
}
\arguments{
\item{X}{A data.frame.
Each column corresponds to a feature vectors.}

\item{y}{Response vector.  For classification, y should be a
factor.  For regression, y should be
numeric.}

\item{Z}{A data.frame. Additional features that are not to be
screened out at the screening step.}

\item{module_membership}{A character vector giving the module membership of
each feature.}

\item{screen_params}{Parameters for screening step of fuzzy forests.
See \code{\link[fuzzyforest]{screen_control}} for
details. \code{screen_params} is an object of type
\code{screen_control}.}

\item{select_params}{Parameters for selection step of fuzzy forests.
See \code{\link[fuzzyforest]{select_control}} for details.
\code{select_params} is an object of type
\code{select_control}.}

\item{final_ntree}{Number of trees grown in the final random forest.
This random forest contains all selected features.}

\item{num_processors}{Number of processors used to fit random forests.}

\item{nodesize}{Minimum terminal nodesize. 1 if classification.
5 if regression.  If the sample size is very large,
the trees will be grown extremely deep.
This may lead to issues with memory usage and may
lead to significant increases in the time it takes
the algorithm to run.  In this case,
it may be useful to increase \code{nodesize}.}

\item{test_features}{A data.frame containing features from a test set.
The data.frame should contain the features in both
X and Z.}

\item{test_y}{The responses for the test set.}
}
\value{
An object of type \code{\link[fuzzyforest]{fuzzy_forest}}.  This
object is a list containing useful output of fuzzy forests.
In particular it contains a data.frame with list of selected features.
It also includes the random forest fit using the selected features.
}
\description{
Fits fuzzy forest algorithm.  Returns
fuzzy forest object.
}
\note{
This work was partially funded by NSF IIS 1251151 and AMFAR 8721SC.
}
\examples{
#ff requires that the partition of the covariates be previously determined.
#ff is handy if the user wants to test out multiple settings of WGCNA
#prior to running fuzzy forests.
library(WGCNA)
library(randomForest)
library(fuzzyforest)
data(ctg)
y <- ctg$NSP
X <- ctg[, 2:22]

#set tuning parameters for WGCNA
net = blockwiseModules(X, power = 6, minModuleSize = 1, nThreads = 1)


#extract module membership for each covariate
module_membership <- net$colors

#set tuning parameters
mtry_factor <- 1; min_ntree <- 500;  drop_fraction <- .5; ntree_factor <- 1
screen_params <- screen_control(drop_fraction = drop_fraction,
                                keep_fraction = .25, min_ntree = min_ntree,
                                ntree_factor = ntree_factor,
                                mtry_factor = mtry_factor)
select_params <- select_control(drop_fraction = drop_fraction,
                                number_selected = 5,
                                min_ntree = min_ntree,
                                ntree_factor = ntree_factor,
                                mtry_factor = mtry_factor)

#fit fuzzy forests
\donttest{
ff_fit <- ff(X, y, module_membership = module_membership,
                screen_params = screen_params,
                select_params = select_params,
                final_ntree = 500)

#extract variable importance rankings
vims <- ff_fit$feature_list

#plot results
modplot(ff_fit)
}
}
\references{
Leo Breiman (2001). Random Forests. Machine Learning, 45(1), 5-32.

Daniel Conn, Tuck Ngun, Christina M. Ramirez (2015). Fuzzy Forests: a New
WGCNA Based Random Forest Algorithm for Correlated, High-Dimensional Data,
Journal of Statistical Software, Manuscript in progress.

Bin Zhang and Steve Horvath (2005) "A General Framework for Weighted Gene
Co-Expression Network Analysis", Statistical Applications in Genetics and
Molecular Biology: Vol. 4: No. 1, Article 17
}

