% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/solarAssoc.R
\name{solarAssoc}
\alias{solarAssoc}
\title{Run association analysis.}
\usage{
solarAssoc(formula, data, dir, kinship, traits, covlist = "1", snpformat,
  snpdata, snpcovdata, snpmap, snplist, snpind, genocov.files, snplists.files,
  snpmap.files, mga.files, plink.ped, plink.map, plink.raw,
  assoc.outformat = c("df", "outfile", "outfile.gz"), assoc.outdir,
  assoc.options = "", cores = getOption("cores"), batch.size = 1000, ...,
  verbose = 0)
}
\arguments{
\item{formula}{an object of class \code{formula} or one that can be coerced to that class.
It is a symbolic description of fixed effects (covariates) to be fitted.}

\item{data}{A data frame containing the variables in the model,
including ID fields needed to construct random effects: genetic and house-hold (both optional).
Other classes such as list, environment or object coercible by \code{as.data.frame} to a data frame
are not supported.}

\item{dir}{an optional character string, the name of directory,
where SOLAR performs the analysis.
In this case, the analysis within related input/output files is
conducted in the given folder instead of a temporary one
(the default work flow).}

\item{kinship}{A matrix of the kinship coefficients (custom kinship matrix).
The IDs are required to be in row and column names.}

\item{traits}{a vector of characters to specify trait(s) in the model. It is alternative to the formula interface.}

\item{covlist}{a vector of characters to specify fixed effects (covariates) in the model.
It is alternative to the formula interface.
The default value is \code{"1"}.}

\item{snpformat}{a character, the format of SNP data passsed by \code{snpdata} argument.
Currently, this argument is not used.}

\item{snpdata}{A matrix of SNP data.
SNPs are given in the columns, and individuals correspond to the rows.
The IDs of individuals are required to be in row and column names.}

\item{snpcovdata}{A matrix of SNP data, which are converted to covariates (numeric format).
SNPs are given in the columns, and individuals correspond to the rows.
The IDs of individuals are required to be in row and column names.}

\item{snpmap}{A data.frame of annotation for SNPs.}

\item{snplist}{a vector of characters, the names of SNPs to be used in the analysis.
This argument may be used when a subset of SNPs is of the interest.}

\item{snpind}{a vector of positive integers, the indices of SNPs to be used in the analysis.
This argument may be used when a subset of SNPs is of the interest.}

\item{genocov.files}{A vector of characters, the file paths to \code{genocov} SOLAR files.}

\item{snplists.files}{A vector of characters, the file paths to \code{snplists} SOLAR files.}

\item{snpmap.files}{A vector of characters (optional), the file paths to \code{snpmap} SOLAR files.}

\item{mga.files}{A list with 2-3 elements, where each element is a vector of characters.
This argument is an alternative to the other three
\code{genocov.files}, \code{snplists.files} and \code{snpmap.files}.
The element 3 of the list is optional.}

\item{plink.ped}{A character, the file path to genotype data in plink .ped format.
Two columns are used per genotype.}

\item{plink.map}{A character, the file path to genotype annotation data in plink .map format.}

\item{plink.raw}{A character, the file path to genotype data in allele-dosage plink format
(an example plink command: \code{plink --noweb --file dat50  --recodeA}).
One column is used per genotype.}

\item{assoc.outformat}{A character, the output format.
Possible values are \code{"df"}, \code{"outfile"} and \code{"outfile.gz"}.
Currently, the only supported output format is \code{"df"}.
That means the table of results is stored in \code{snpf} slot of a returned object.}

\item{assoc.outdir}{a character, the path to the output directory.
Currently, this argument is not used.}

\item{assoc.options}{A character, specific options to be passed to \code{mga} SOLAR command.}

\item{cores}{A positive integer, the number of cores for parallel computing.
The default value is taken from \code{getOption("cores")}.
If the default value is \code{NULL} then the number of cores is \code{1}.}

\item{batch.size}{An integer, the number of SNPs per batch for parallel computation.
The default value is \code{1000}.}

\item{...}{Arguments to be passed to  \code{\link{solarPolygenic}} function.
For example, one of such arguments may be \code{polygenic.settings = "option EnableDiscrete 0"}.
Arguments of \code{solarMultipoint},
which are also passed to \code{\link{solarPolygenic}},
include \code{formula}, \code{data}, \code{dir},
\code{kinship}, \code{traits} and \code{covlist}.}

\item{verbose}{An non-negative integer of the verbose level.
   The default value is \code{0}.}
}
\description{
The association analysis is conducted in the following sequence:
parse input files of SNP markers,
export data to a directory by \code{\link{df2solar}} function,
run the polygenic analysis in a directory,
run the association analysis on the top of the polygenic analysis,
parse output files and
store results in an object of \code{solarAssoc} class (see \code{\link{solarAssocClass}}).
}
\note{
\code{solarAssoc} function accepts input genetic data in three formats:
 SOLAR (\code{genocov.files}, \code{snplists.files}, \code{snpmap.files} and \code{param mga.files} arguments),
 R data frame or matrix (\code{snpdata}, \code{snpcovdata} and \code{snpmap} arguments),
 and plink (\code{plink.ped}, \code{plink.map} and \code{plink.raw} arguments).

 For large-size problems, the user is recommended to prepare the genetic data in SOLAR format
 and to split them into batches of size, for example, 1,000 markers.
 The use of the other two R and plink formats is not optimized for large-scale scenarios.
}
\examples{
### load data
data(dat50)
dim(phenodata)
dim(kin)
dim(genodata)

\dontrun{
### basic (univariate) association model with a custom kinship
mod <- solarAssoc(trait~age+sex, phenodata,
  kinship = kin, snpdata = genodata)
mod$snpf # table of results for 50 SNPs
}
}

