rm(list=ls())
library(ElemStatLearn)
data(prostate)
data = prostate[,1:9]
train = subset(prostate, train==T)[,1:9]
test = subset(prostate, train!=T)[,1:9]
# scale function in R substract mean,
# and divide by std dev (with df = N-1)
train.s = scale(train)
center = attributes(train.s)$'scaled:center'
scale = attributes(train.s)$'scaled:scale'
Y = as.numeric(train.s[,9])
X = as.matrix(train.s[,-9])
# scale testing data based on center and scale of training data
test.s = t((t(test) - center)/scale)
Y1 = as.numeric(test.s[,9])
X1 = as.matrix(test.s[,-9])
# unscaled data
Y0 = as.numeric(train[,9])
X0 = as.matrix(train[,-9])
Y01 = as.numeric(test[,9])
X01 = as.matrix(test[,-9])
#################################################################################
########### fit LASSO regression by glmnet package #############################
#################################################################################
library(glmnet)
# Way 1
lasso.glm = glmnet(X,Y,
family = "gaussian",
alpha = 1,
standardize = FALSE,
intercept = FALSE,
standardize.response = FALSE
)
# # Way 2: NOT WORK
lasso.glm = glmnet(X0,Y0,
family = "gaussian",
alpha = 1,
standardize = TRUE,
intercept = TRUE,
standardize.response = TRUE
)
# # Way 3: NOT WORK
lasso.glm = glmnet(X0,Y0,
family = "gaussian",
alpha = 1
)
# Based on my experiments, I have to scale data first
names(lasso.glm)
plot(lasso.glm, xvar = "norm", label=TRUE)
abline(h=0)
lasso.glm$a0
lasso.glm$beta
lasso.glm$lambda
y = predict(lasso.glm, X, type="link")
y1 = predict(lasso.glm, X1, type="link")
lasso.mse = matrix(0, length(lasso.glm$lambda), 1)
lasso1.mse = matrix(0, length(lasso.glm$lambda), 1)
for (i in 1:length(lasso.glm$lambda)){
lasso.mse[i] = mean((Y - y[,i])^2)
lasso1.mse[i] = mean((Y1 - y1[,i])^2)
}
############################################################################
################### Figure 3.10 on Page 70 #############################
############################################################################
plot(lasso.glm$lambda, lasso.mse)
lines(lasso.glm$lambda, lasso1.mse)
# cross-validation
lasso.cv = cv.glmnet(X,Y,
family = "gaussian",
alpha = 1,
nfolds = 10,
standardize = FALSE,
intercept = FALSE,
standardize.response = FALSE,
type.measure = "mse"
)
names(lasso.cv)
plot(lasso.cv)
plot(lasso.cv, -1)
lasso.cv$lambda.1se
lasso.cv$cvm
lasso.cv$cvsd
lasso.cv$glmnet.fit
y.cv = predict(lasso.cv, X, s="lambda.1se")
###########################################################################
############# fit LASSO regression by lars package ###############################
###########################################################################
library(lars)
# cross-validation is available in this package
# scaled version
model.ridge = lars(X,Y,type="lasso",
trace=TRUE,
normalize=FALSE,
intercept=FALSE
)
# unscaled version
model.ridge = lars(X0,Y0,type="lasso",
trace=TRUE,
normalize=TRUE,
intercept=TRUE
)
summary(model.ridge)
model.ridge
plot(model.ridge,
xvar="arc.length",
breaks=TRUE,
plottype="coefficients"
)
predict(model.ridge,
X.test,
type="fit",
mode="step")
coef(model.ridge, s=5.6,mode="step")
# always scale X and Y manually before applying functions
No comments:
Post a Comment