\name{mbmdr}
\Rdversion{1.1}
\alias{mbmdr}
\alias{print.mbmdr}

\title{
	Model Based Multifactor Dimensionality Reduction
}

\description{
  \code{mbmdr} implements the Model Based Multifactor Dimensionality Reduction (MB-MDR) method
  proposed by Calle et al.(2008) as a dimension reduction method for 
  exploring gene-gene interactions.
}
   
\usage{
  mbmdr(y, data, order, covar=NULL, exclude=NA, risk.threshold=0.1, 
        output=NULL, adjust=c("none","covariables","main effects","both"), 
        first.model=NULL, list.models=NULL, use.logistf=TRUE, 
        printStep1=FALSE, ...)
}

\arguments{

  \item{y}{
  	Vector containing the dependent variable.
  }

  \item{data}{
  	A data.frame (or object coercible by as.data.frame to a data frame) 
  	containing the SNP information with values 0,1,2. \cr 
  	For example: 0 = common homozygous genotype, 1 = heterozygous genotype, 
  	2 = variant homozygous genotype.
  }
 
  \item{order}{
  	Single integer that specifies the order of interactions to be analyzed.\cr
  	If \code{list.models = NULL} (value by default) all possible interactions of 
  	the specified order are analyzed.
  }
  
  \item{covar}{
  	\emph{(Optional)} A data.frame or object coercible by as.data.frame to a 
  	data frame containing the covariables for adjusting regression models. \cr 
  	Only used if \code{adjust="covariables"} or \code{adjust="both"}.
  }
 
  \item{exclude}{
  	\emph{(Optional)} Value/s of missing data. If missings in data are coded different 
  	than \code{NA} it should be especified. 
  	For example \code{exclude=c(NA,-1)} especifies that both, \code{NA} and \code{-1}
  	indicates a missing value.
  }

  \item{risk.threshold}{
  	Threshold used at the first MB-MDR stage for defining the risk category of a multilocus
  	genotype. It should be a conservative value. 
  	Value by default is \code{risk.threshold=0.1}.
  }

  \item{output}{
  	\emph{(Optional)} Output file name for storing \code{mbmdr} results as file, 
  	or \code{NULL} (default) for output as R object. If the number of models to be
    analyzed is too large, it is preferable to store the output in a file. This allows
    exploring partial results while mbmdr is still running and prevents from loosing all
    the information if R or \code{mbmdr} crashes in the midle.
  }
 
  \item{adjust}{
  	Type of regressions adjustment. Options are \code{"none", "covariables", "main effects"}
  	or \code{"both"}. \cr
  	By default no adjustment is performed; \code{adjust="none"}.
  }
      	
  \item{first.model}{
  	\emph{(Optional)} Numerical vector of length equal to \code{order} 
    for specifying initial interaction model for \code{mbmdr}; previous models will not be evaluated. 
    This is useful for continuing \code{mbmdr} computation after a stop. \cr
    Note that, by default, \code{mbmdr} explores all possible interactions of a specified order. 
    If there are for example, 50 snps in data and \code{order=3}, \code{mbmdr} will start analyzing 
    the model (50,49,48), that means interaction between snps 50, 49 and 48 (column position 
    in data data.frame). The second model that \code{mbmdr} will analyze is (50,49,47). 
    After model (50,49,1), next model will be (50,48,47), and the last model will be (3,2,1).\cr
 	  For example, if \code{mbmdr} stopped after the analysis of model (30,21,14), you can continue 
 	  the process by specifying \code{first.model=c(30,21,13)}.
	  Ids of snps must be in descended order.
    If \code{first.model=NULL} (by default) all models will be analyzed.
  }
   
  \item{list.models}{
  	\emph{(Optional)} Exhaustive list of models to be analyzed, Only models in list will be analyzed.
	  It can be: a vector of lenght \code{order} specifying an unique model, a matrix (\code{n x order}) 
	  containing models by rows, or a string for specifying a file with models by rows 
	  (all models must be of the same interaction order)\cr
    A \code{NULL} value (by default) indicates that all possible interactions will be analyzed.
  }

  \item{use.logistf}{
  	Boolean value indicating whether to use or not the \code{logistf} package 
    for regressions when phenomenon of separation is observed. By default \code{TRUE}.\cr
    It only has effect if logistic regression (\code{family=binomial(link = "logit")}) is specified.
    (See \code{logistf} help for details).
  }

  \item{printStep1}{
  	Boolean. If true, the details of \code{mbmdr} step 1 are printed for every model.
    This slows the process, so it is only advisable when the number of models to analyze is small.\cr 
    By default \code{printStep1=FALSE}.
  }
  
  \item{...}{
  	For regression arguments: arguments to be passed to \code{glm} calls.\cr
  	Mainly to specify the error distribution and link function to be used in the regression models.\cr
  	For example, use \code{family=binomial(link=logit)} for specifying logistic regression or 
  	\code{gaussian(link = "identity")} for normal regression. \cr
  	(See \code{\link{family}} for details of family functions and \code{\link{glm}} for more options
  	of \code{glm} function).
  }

}

\details{
	MB-MDR is a method for identifying multi-locus genotypes that are associated with 
	phenotype, and allows to adjust for marginal and confounders effects.\cr

  The exploration of interacions is performed in three steps:\cr

  \emph{Step1}\cr
  Each genotype is tested for association with response and classified as high risk, low risk
  or not significant, and all genotypes of the same class are merged. The threshold for considering
  significant evidence is the value especified in \code{risk.threshold} (by default \code{risk.threshold=0.1}).\cr
  If \code{printStep1=TRUE}, the MBMDR function prints this classification.
  
  \emph{Step2}\cr
  For each risk categories, high and low, a new association test is performed. The result 
  provides a Wald statistic for the high and for the low categories.
  
  \emph{Step3}\cr
  The significance is explored through a permutation test on the maximum Wald statistics.

} 


\value{
  \code{mbmdr} returns an object of class \code{mbmdr} with the following attributes:
  
  \item{call}{
  The matched call.
  }
  \item{y}{
  The outcome used.
  }
  \item{data}{
  The SNPs data used.
  }
  \item{covar}{
  The covariable data used.
  }
  
  \item{result}{
  Dataframe with those interactions that have at least a significant genotype. 
  For each interaction (rows), the following information is returned:\cr
         
  \tabular{lll}{
    \code{SNP1...SNPx}\tab  \tab Names of snps in interaction.                                    \cr
    \code{NH   }      \tab  \tab Number of significant High risk genotypes in the interaction.    \cr
    \code{WH   }      \tab  \tab Wald statistic for High risk category.                           \cr
    \code{PH   }      \tab  \tab P-value of the Wald test for the High risk category.             \cr
    \code{NL   }      \tab  \tab Number of significant Low risk genotypes in the interaction.     \cr
    \code{WL   }      \tab  \tab Wald statistic for Low risk category.                            \cr
    \code{PL   }      \tab  \tab P-value of the Wald test for the Low risk category.              \cr
    \code{MIN.P}      \tab  \tab Minimun p-value (\code{min(PH,PL)}) for the interaction model.   \cr
  }
  }

  If \code{printStep1} argument is set to \code{TRUE}, the result of the first step in 
  \code{mbmdr} is printed for each genotype with the following information:\cr
  
    \tabular{lll}{
      \code{...}      \tab  \tab Genotype.                                          \cr
      \code{cases}    \tab  \tab (only for case/control outcome) Number of cases with the specific genotype.        \cr
      \code{controls} \tab  \tab (only for case/control outcome) Number of controls with the specific genotype.     \cr
      \code{beta}     \tab  \tab Regression coefficient for this genotype.          \cr
      \code{p.value}     \tab  \tab Wald test p-value for this genotype.               \cr
      \code{category} \tab  \tab Predicted risk category for this genotype.         \cr
    }

}


\references{
  Calle M.L., Urrea V., Vellalta G., Malats N., Steen K.V. (2008) \emph{Improving strategies for 
  detecting genetic patterns of disease susceptibility in association studies.} 
  Statistics in Medicine 27, 6532-6546.
}


\author{
  Victor Urrea, Malu Calle, Kristel Van Steen, Nuria Malats
}


\examples{

#---  Case-control study  -------------

#load example data
data(simSNP)

#complete MB-MDR analysis (It takes some time)
#fit <- mbmdr(y=simSNP$Y,data=simSNP[,3:12],order=2,family=binomial(link=logit))
#print(fit)

#Model 2 1 analysis
fit <- mbmdr(y=simSNP$Y,data=simSNP[,3:12],order=2,list.models=c(2,1),
             family=binomial(link=logit),printStep1=TRUE)
print(fit)


#MB-MDR analysis ajusting by covariate
fit <- mbmdr(y=simSNP$Y,data=simSNP[,3:12],order=2,list.models=c(2,1),
             cov=simSNP$X,adjust="covariables",family=binomial(link=logit))
print(fit)



#---  Continous outcome  --------------
#load example data
data(simSNPcont)

#complete MB-MDR analysis (It takes some time)
#fit <- mbmdr(y=simSNPcont$Y,data=simSNPcont[,2:11],order=2)
#print(fit)

#Model 2 1 analysis
fit <- mbmdr(y=simSNPcont$Y,data=simSNPcont[,2:11],order=2,
	           list.models=c(2,1),printStep1=TRUE)
print(fit)


}
