% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/anovaglm.R
\name{h2o.anovaglm}
\alias{h2o.anovaglm}
\title{H2O ANOVAGLM is used to calculate Type III SS which is used to evaluate the contributions of individual predictors 
and their interactions to a model.  Predictors or interactions with negligible contributions to the model will have 
high p-values while those with more contributions will have low p-values.}
\usage{
h2o.anovaglm(
  x,
  y,
  training_frame,
  model_id = NULL,
  seed = -1,
  ignore_const_cols = TRUE,
  score_each_iteration = FALSE,
  offset_column = NULL,
  weights_column = NULL,
  family = c("AUTO", "gaussian", "binomial", "fractionalbinomial", "quasibinomial",
    "poisson", "gamma", "tweedie", "negativebinomial"),
  tweedie_variance_power = 0,
  tweedie_link_power = 1,
  theta = 0,
  solver = c("AUTO", "IRLSM", "L_BFGS", "COORDINATE_DESCENT_NAIVE",
    "COORDINATE_DESCENT", "GRADIENT_DESCENT_LH", "GRADIENT_DESCENT_SQERR"),
  missing_values_handling = c("MeanImputation", "Skip", "PlugValues"),
  plug_values = NULL,
  compute_p_values = TRUE,
  standardize = TRUE,
  non_negative = FALSE,
  max_iterations = 0,
  link = c("family_default", "identity", "logit", "log", "inverse", "tweedie",
    "ologit"),
  prior = 0,
  alpha = NULL,
  lambda = c(0),
  lambda_search = FALSE,
  stopping_rounds = 0,
  stopping_metric = c("AUTO", "deviance", "logloss", "MSE", "RMSE", "MAE", "RMSLE",
    "AUC", "AUCPR", "lift_top_group", "misclassification", "mean_per_class_error",
    "custom", "custom_increasing"),
  early_stopping = FALSE,
  stopping_tolerance = 0.001,
  balance_classes = FALSE,
  class_sampling_factors = NULL,
  max_after_balance_size = 5,
  max_runtime_secs = 0,
  save_transformed_framekeys = FALSE,
  highest_interaction_term = 0,
  nparallelism = 4,
  type = 0
)
}
\arguments{
\item{x}{(Optional) A vector containing the names or indices of the predictor variables to use in building the model.
If x is missing, then all columns except y are used.}

\item{y}{The name or column index of the response variable in the data. 
The response must be either a numeric or a categorical/factor variable. 
If the response is numeric, then a regression model will be trained, otherwise it will train a classification model.}

\item{training_frame}{Id of the training data frame.}

\item{model_id}{Destination id for this model; auto-generated if not specified.}

\item{seed}{Seed for random numbers (affects certain parts of the algo that are stochastic and those might or might not be enabled by default).
Defaults to -1 (time-based random number).}

\item{ignore_const_cols}{\code{Logical}. Ignore constant columns. Defaults to TRUE.}

\item{score_each_iteration}{\code{Logical}. Whether to score during each iteration of model training. Defaults to FALSE.}

\item{offset_column}{Offset column. This will be added to the combination of columns before applying the link function.}

\item{weights_column}{Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from
the dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative
weights are not allowed. Note: Weights are per-row observation weights and do not increase the size of the
data frame. This is typically the number of times a row is repeated, but non-integer values are supported as
well. During training, rows with higher weights matter more, due to the larger loss function pre-factor. If
you set weight = 0 for a row, the returned prediction frame at that row is zero and this is incorrect. To get
an accurate prediction, remove all rows with weight == 0.}

\item{family}{Family. Use binomial for classification with logistic regression, others are for regression problems. Must be
one of: "AUTO", "gaussian", "binomial", "fractionalbinomial", "quasibinomial", "poisson", "gamma", "tweedie",
"negativebinomial". Defaults to AUTO.}

\item{tweedie_variance_power}{Tweedie variance power Defaults to 0.}

\item{tweedie_link_power}{Tweedie link power Defaults to 1.}

\item{theta}{Theta Defaults to 0.}

\item{solver}{AUTO will set the solver based on given data and the other parameters. IRLSM is fast on on problems with small
number of predictors and for lambda-search with L1 penalty, L_BFGS scales better for datasets with many
columns. Must be one of: "AUTO", "IRLSM", "L_BFGS", "COORDINATE_DESCENT_NAIVE", "COORDINATE_DESCENT",
"GRADIENT_DESCENT_LH", "GRADIENT_DESCENT_SQERR". Defaults to IRLSM.}

\item{missing_values_handling}{Handling of missing values. Either MeanImputation, Skip or PlugValues. Must be one of: "MeanImputation",
"Skip", "PlugValues". Defaults to MeanImputation.}

\item{plug_values}{Plug Values (a single row frame containing values that will be used to impute missing values of the
training/validation frame, use with conjunction missing_values_handling = PlugValues)}

\item{compute_p_values}{\code{Logical}. Request p-values computation, p-values work only with IRLSM solver and no regularization
Defaults to TRUE.}

\item{standardize}{\code{Logical}. Standardize numeric columns to have zero mean and unit variance Defaults to TRUE.}

\item{non_negative}{\code{Logical}. Restrict coefficients (not intercept) to be non-negative Defaults to FALSE.}

\item{max_iterations}{Maximum number of iterations Defaults to 0.}

\item{link}{Link function. Must be one of: "family_default", "identity", "logit", "log", "inverse", "tweedie", "ologit".
Defaults to family_default.}

\item{prior}{Prior probability for y==1. To be used only for logistic regression iff the data has been sampled and the mean
of response does not reflect reality. Defaults to 0.}

\item{alpha}{Distribution of regularization between the L1 (Lasso) and L2 (Ridge) penalties. A value of 1 for alpha
represents Lasso regression, a value of 0 produces Ridge regression, and anything in between specifies the
amount of mixing between the two. Default value of alpha is 0 when SOLVER = 'L-BFGS'; 0.5 otherwise.}

\item{lambda}{Regularization strength Defaults to c(0.0).}

\item{lambda_search}{\code{Logical}. Use lambda search starting at lambda max, given lambda is then interpreted as lambda min
Defaults to FALSE.}

\item{stopping_rounds}{Early stopping based on convergence of stopping_metric. Stop if simple moving average of length k of the
stopping_metric does not improve for k:=stopping_rounds scoring events (0 to disable) Defaults to 0.}

\item{stopping_metric}{Metric to use for early stopping (AUTO: logloss for classification, deviance for regression and anomaly_score
for Isolation Forest). Note that custom and custom_increasing can only be used in GBM and DRF with the Python
client. Must be one of: "AUTO", "deviance", "logloss", "MSE", "RMSE", "MAE", "RMSLE", "AUC", "AUCPR",
"lift_top_group", "misclassification", "mean_per_class_error", "custom", "custom_increasing". Defaults to
AUTO.}

\item{early_stopping}{\code{Logical}. Stop early when there is no more relative improvement on train or validation (if provided).
Defaults to FALSE.}

\item{stopping_tolerance}{Relative tolerance for metric-based stopping criterion (stop if relative improvement is not at least this
much) Defaults to 0.001.}

\item{balance_classes}{\code{Logical}. Balance training data class counts via over/under-sampling (for imbalanced data). Defaults to
FALSE.}

\item{class_sampling_factors}{Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors will
be automatically computed to obtain class balance during training. Requires balance_classes.}

\item{max_after_balance_size}{Maximum relative size of the training data after balancing class counts (can be less than 1.0). Requires
balance_classes. Defaults to 5.0.}

\item{max_runtime_secs}{Maximum allowed runtime in seconds for model training. Use 0 to disable. Defaults to 0.}

\item{save_transformed_framekeys}{\code{Logical}. true to save the keys of transformed predictors and interaction column. Defaults to FALSE.}

\item{highest_interaction_term}{Limit the number of interaction terms, if 2 means interaction between 2 columns only, 3 for three columns and
so on...  Default to 2. Defaults to 0.}

\item{nparallelism}{Number of models to build in parallel.  Default to 4.  Adjust according to your system. Defaults to 4.}

\item{type}{Refer to the SS type 1, 2, 3, or 4.  We are currently only supporting 3 Defaults to 0.}
}
\description{
H2O ANOVAGLM is used to calculate Type III SS which is used to evaluate the contributions of individual predictors 
and their interactions to a model.  Predictors or interactions with negligible contributions to the model will have 
high p-values while those with more contributions will have low p-values.
}
\examples{
\dontrun{
h2o.init()

# Run ANOVA GLM of VOL ~ CAPSULE + RACE
prostate_path <- system.file("extdata", "prostate.csv", package = "h2o")
prostate <- h2o.uploadFile(path = prostate_path)
prostate$CAPSULE <- as.factor(prostate$CAPSULE)
model <- h2o.anovaglm(y = "VOL", x = c("CAPSULE","RACE"), training_frame = prostate)

}
}
