Monday, March 25, 2013

Regression III: LASSO

rm(list=ls())
library(ElemStatLearn)
data(prostate)
data = prostate[,1:9]
train = subset(prostate, train==T)[,1:9]
test = subset(prostate, train!=T)[,1:9]
# scale function in R substract mean,
# and divide by std dev (with df = N-1)
train.s = scale(train)
center = attributes(train.s)$'scaled:center'
scale = attributes(train.s)$'scaled:scale'
Y = as.numeric(train.s[,9])
X = as.matrix(train.s[,-9])
# scale testing data based on center and scale of training data
test.s = t((t(test) - center)/scale)
Y1 = as.numeric(test.s[,9])
X1 = as.matrix(test.s[,-9])
# unscaled data
Y0 = as.numeric(train[,9])
X0 = as.matrix(train[,-9])
Y01 = as.numeric(test[,9])
X01 = as.matrix(test[,-9])
#################################################################################
########### fit LASSO regression by glmnet package  #############################
#################################################################################
library(glmnet)
# Way 1
lasso.glm = glmnet(X,Y,
                   family = "gaussian",
                   alpha = 1,
                   standardize = FALSE,
                   intercept = FALSE,
                   standardize.response = FALSE
)
# # Way 2: NOT WORK
lasso.glm = glmnet(X0,Y0,
                   family = "gaussian",
                   alpha = 1,
                   standardize = TRUE,
                   intercept = TRUE,
                   standardize.response = TRUE
)
# # Way 3: NOT WORK
lasso.glm = glmnet(X0,Y0,
                   family = "gaussian",
                   alpha = 1
)
# Based on my experiments, I have to scale data first
names(lasso.glm)
plot(lasso.glm, xvar = "norm", label=TRUE)
abline(h=0)

lasso.glm$a0
lasso.glm$beta
lasso.glm$lambda
y = predict(lasso.glm, X, type="link")
y1 = predict(lasso.glm, X1, type="link")

lasso.mse = matrix(0, length(lasso.glm$lambda), 1)
lasso1.mse = matrix(0, length(lasso.glm$lambda), 1)
for (i in 1:length(lasso.glm$lambda)){
  lasso.mse[i] = mean((Y - y[,i])^2)
  lasso1.mse[i] = mean((Y1 - y1[,i])^2)
}
############################################################################
###################   Figure 3.10  on Page 70  #############################
############################################################################
plot(lasso.glm$lambda, lasso.mse)
lines(lasso.glm$lambda, lasso1.mse)
# cross-validation
lasso.cv = cv.glmnet(X,Y,
                     family = "gaussian",
                     alpha = 1,
                     nfolds = 10,
                     standardize = FALSE,
                     intercept = FALSE,
                     standardize.response = FALSE,
                     type.measure = "mse"
)
names(lasso.cv)
plot(lasso.cv)
plot(lasso.cv, -1)
lasso.cv$lambda.1se
lasso.cv$cvm
lasso.cv$cvsd
lasso.cv$glmnet.fit
y.cv = predict(lasso.cv, X, s="lambda.1se")
###########################################################################
############# fit LASSO regression by lars package  ###############################
###########################################################################
library(lars)
# cross-validation is available in this package
# scaled version
model.ridge = lars(X,Y,type="lasso",
                   trace=TRUE,
                   normalize=FALSE,
                   intercept=FALSE
)
# unscaled version
model.ridge = lars(X0,Y0,type="lasso",
                   trace=TRUE,
                   normalize=TRUE,
                   intercept=TRUE
)
summary(model.ridge)
model.ridge
plot(model.ridge,
     xvar="arc.length",
     breaks=TRUE,
     plottype="coefficients"
)
predict(model.ridge,
        X.test,
        type="fit",
        mode="step")
coef(model.ridge, s=5.6,mode="step")
# always scale X and Y manually before applying functions

No comments:

Post a Comment