
require("mboost")

set.seed(290875)

### for boosting hat matrix checks
fm <- GaussReg()
fm@offset <- function(y, w) 0

### a simple two-dimensional example from `gamboost.Rd'
data("cars")
cars.gb <- gamboost(dist ~ speed, data = cars, df = 4, family = fm,
                    control = boost_control(mstop = 50))
cars.gb
aic <- AIC(cars.gb, method = "corrected")
aic

### plot fit
plot(dist ~ speed, data = cars)
lines(cars$speed, predict(cars.gb[mstop(AIC(cars.gb))]), col = "red")
lines(cars$speed, predict(smooth.spline(cars$speed, cars$dist), cars$speed)$y, 
      col = "green")

#### check boosting hat matrix and subsetting / predict
stopifnot(isTRUE(all.equal(drop(attr(aic, "hatmat") %*% cars$dist),
                           as.vector(predict(cars.gb)))))
stopifnot(isTRUE(all.equal(drop(attr(AIC(cars.gb[25]), "hatmat") %*% cars$dist),
                           as.vector(predict(cars.gb[25])))))
stopifnot(isTRUE(all.equal(drop(attr(AIC(cars.gb[25]), "hatmat") %*% cars$dist),
                           as.vector(fitted(cars.gb[25])))))

### check boosting hat matrix with multiple independent variables
### and weights
data("bodyfat", package = "mboost")
bffm <- DEXfat ~ age + waistcirc + hipcirc + elbowbreadth + kneebreadth +
      anthro3a + anthro3b + anthro3c + anthro4 - 1
indep <- names(bodyfat)[names(bodyfat) != "DEXfat"]
bodyfat[indep] <- lapply(bodyfat[indep], function(x) x - mean(x))
bf_gam <- gamboost(bffm, data = bodyfat, control = boost_control(mstop = 10), 
                   weights = runif(nrow(bodyfat)) * 10)
aic <- AIC(bf_gam)

off <- bf_gam$offset
u <- bf_gam$ustart

stopifnot(isTRUE(all.equal(drop(attr(aic, "hatmat") %*% u + off),
                           as.vector(predict(bf_gam)))))
stopifnot(isTRUE(all.equal(drop(attr(aic, "hatmat") %*% u + off),
                           as.vector(fitted(bf_gam)))))


### compare `gamboost' with `lm' in cases where this is actually possible
set.seed(290875)
x <- matrix(runif(1000) * 10, ncol = 10)
xf <- gl(4, nrow(x)/4)

### OK, we need to allow for some small differences (larger mstop values
### would fix this)
stopin <- function(x, y) stopifnot(max(abs(x - y)) < 0.1)

### univariate linear model
df <- data.frame(y = 3*x[,2], x = x)
ga <- gamboost(y ~ x.2 - 1, data = df,
               control = boost_control(mstop = 100, nu = 1))
stopin(fitted(lm(y ~ x.2 - 1, data = df)), fitted(ga))

### univariate model involving sin transformation
df <- data.frame(y = sin(x[,1]), x = x)
ga <- gamboost(y ~ x.1 - 1, data = df, 
               control = boost_control(mstop = 100, nu = 1))
stopin(fitted(lm(y ~ sin(x.1) - 1, data = df)), fitted(ga))

### bivariate model: linear and sin
df <- data.frame(y = sin(x[,1]) + 3*x[,2], x = x)
ga <- gamboost(y ~ x.1 + x.2 - 1, data = df, 
               control = boost_control(mstop = 100, nu = 1))
stopin(fitted(lm(y ~ sin(x.1) + x.2 - 1, data = df)), fitted(ga))
ga <- gamboost(y ~ x.1 + x.2 - 1, data = df, dfbase = c(4, 1), 
               control = boost_control(mstop = 100, nu = 1))
stopin(fitted(lm(y ~ sin(x.1) + x.2 - 1, data = df)), fitted(ga))

### ANCOVA model
df <- data.frame(y = 3 * x[,2] + (1:4)[xf], x = x)
ga <- gamboost(y ~ xf + x.2 - 1, data = df, 
               control = boost_control(mstop = 100, nu = 1))
stopin(fitted(lm(y ~ xf + x.2 - 1, data = df)), fitted(ga))
ga <- gamboost(y ~ xf + sin(x.1) + x.2, data = df, 
               dfbase = c(1, 1, 4, 1),
               control = boost_control(mstop = 100, nu = 1))
stopin(fitted(lm(y ~ xf + sin(x.1) + x.2, data = df)), fitted(ga))


### check centering
y <- rnorm(20)
xn <- rnorm(20)
xnm <- xn - mean(xn)
xf <- gl(2, 10)
gc <- gamboost(y ~ xn + xf, control = boost_control(center = TRUE))
g <- gamboost(y ~ xnm + xf)
cgc <- coef(gc)
cg <- coef(g)  
names(cgc) <- NULL
names(cg) <- NULL 
stopifnot(all.equal(cgc, cg))

pc1 <- predict(gc)
pc2 <- predict(gc, newdata = data.frame(xn = xn, xf = xf))
pc3 <- predict(g)
stopifnot(all.equal(pc1, pc2))
stopifnot(all.equal(pc2, pc3))
