% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/apply-parallelize.R
\name{big_apply}
\alias{big_apply}
\title{Split-Apply-Combine}
\usage{
big_apply(X, a.FUN, a.combine, ind = cols_along(X), ncores = 1,
  block.size = block_size(nrow(X), ncores), ...)
}
\arguments{
\item{X}{A \link[=FBM-class]{FBM}.}

\item{a.FUN}{The function to be applied to each subset matrix.
It must take a \link[=FBM-class]{Filebacked Big Matrix} as first argument and
\code{ind}, a vector of indices, which are used to split the data.
For example, if you want to apply a function to \code{X[ind.row, ind.col]},
you may use \code{X[ind.row, ind.col[ind]]} in \code{a.FUN}.}

\item{a.combine}{function that is used by \link{foreach} to process the tasks
results as they generated. This can be specified as either a function or a
non-empty character string naming the function. Specifying 'c' is useful
for concatenating the results into a vector, for example.
The values 'cbind' and 'rbind' can combine vectors into a matrix.
The values '+' and '*' can be used to process numeric data.
By default, the results are returned in a list.}

\item{ind}{Initial vector of subsetting indices.
Default is the vector of all column indices.}

\item{ncores}{Number of cores used. Default doesn't use parallelism.
You may use \link{nb_cores}.}

\item{block.size}{Maximum number of columns (or rows, depending on how you
use \code{ind} for subsetting) read at once. Default uses \link{block_size}.}

\item{...}{Extra arguments to be passed to \code{a.FUN}.}
}
\value{
The result of \link{foreach}.
}
\description{
A Split-Apply-Combine strategy to apply common R functions to a
Filebacked Big Matrix.
}
\details{
This function splits indices in parts, then apply a given function to each
subset matrix and finally combine the results. If parallelization is used,
this function splits indices in parts for parallelization, then split again
them on each core, apply a given function to each part and finally combine
the results (on each cluster and then from each cluster).
}
\examples{
X <- big_attachExtdata()

# get the means of each column
colMeans_sub <- function(X, ind) colMeans(X[, ind])
str(colmeans <- big_apply(X, a.FUN = colMeans_sub, a.combine = 'c'))

# get the norms of each column
colNorms_sub <- function(X, ind) sqrt(colSums(X[, ind]^2))
str(colnorms <- big_apply(X, colNorms_sub, a.combine = 'c'))

# get the sums of each row
# split along rows: need to change the "complete" `ind` parameter
str(rowsums <- big_apply(X, a.FUN = function(X, ind) rowSums(X[ind, ]),
                         ind = rows_along(X), a.combine = 'c',
                         block.size = 100))
# it is usually preferred to split along columns
# because matrices are stored by column.
str(rowsums2 <- big_apply(X, a.FUN = function(X, ind) rowSums(X[, ind]),
                          a.combine = '+'))

## Every extra parameter to `a.FUN` should be passed to `big_apply`
# get the crossproduct between X and a matrix A
# note that we don't explicitly pass `ind.col` to `a.FUN`
body(big_cprodMat)

# get the product between X and a matrix B
# here, we must explicitly pass `ind.col` to `a.FUN`
# because the right matrix also needs to be subsetted.
body(big_prodMat)
}
\seealso{
\link{big_parallelize}
}
