Sunday, March 24, 2013

Regression II: Ridge Regression

rm(list=ls())
library(ElemStatLearn)
data(prostate)
data = prostate[,1:9]
train = subset(prostate, train==T)[,1:9]
test = subset(prostate, train!=T)[,1:9]
# scale function in R substract mean,
# and divide by std dev (with df = N-1)
train.s = scale(train)
center = attributes(train.s)$'scaled:center'
scale = attributes(train.s)$'scaled:scale'
Y = as.numeric(train.s[,9])
X = as.matrix(train.s[,-9])
# scale testing data based on center and scale of training data
test.s = t((t(test) - center)/scale)
Y1 = as.numeric(test.s[,9])
X1 = as.matrix(test.s[,-9])
# unscaled data
Y0 = as.numeric(train[,9])
X0 = as.matrix(train[,-9])
Y01 = as.numeric(test[,9])
X01 = as.matrix(test[,-9])
##########################################################################
############### fit ridge regression manually  ###################################
##########################################################################
# ridge regression coefficient paths
lambdas = exp(seq(log(0.01), log(10000), l=100))
betar.path = matrix(0, length(lambdas), ncol(X))
for (i in 1:length(lambdas)){
  betar.path[i,] = solve(t(X)%*%X + diag(rep(lambdas[i], ncol(X))))%*%t(X)%*%Y
}
X.d = svd(X)$d
df = matrix(0, length(lambdas), 1)
for (i in 1:length(lambdas)){
  df[i] = sum(X.d^2/(X.d^2 + lambdas[i]))
}
png(file = "fig1.png", width = 600, height = 600)
plot(range(df)*1.1, range(betar.path)*1.1, type='n', ylab='Coefficients', xlab='Degrees of Freedom')
for (j in c(1:8)){
  lines(df, betar.path[,j],col=j)
  if( names(train)[j]=="svi" ){
    text( 1.075*df[1], 1.08*betar.path[1,j], names(train)[j] )
  }else if( names(train)[j]=="lweight" ){
    text( 1.075*df[1], betar.path[1,j], names(train)[j] )
  }else if( names(train)[j]=="pgg45" ){
    text( 1.075*df[1], 0.9*betar.path[1,j], names(train)[j] )
  }else{
    text( 1.075*df[1], betar.path[1,j], names(train)[j] )
  }
}
abline(h=0)
dev.off()

# Remark: Recreate Figure 3.8 in the book

################################################################################
################## fit ridge regression by glmnet package  #############################
###############################################################################
library(glmnet)
lambdas = exp(seq(log(0.01), log(10000), l=100))
# Way 1
ridge.glm = glmnet(X,Y,
                   family = "gaussian",
                   lambda = 2*lambdas,
                   alpha = 0,
                   standardize = FALSE,
                   intercept = TRUE,
)
# Way 2
ridge.glm = glmnet(X,Y,family = "gaussian", alpha = 0)
# Way 3
ridge.glm = glmnet(X0,Y0,
                   family = "gaussian",
                   lambda = 2*lambdas,
                   alpha = 0,
                   standardize = TRUE,
                   intercept = TRUE
                   )
names(ridge.glm)
print(ridge.glm)
# plot(ridge.glm, xvar = "lambda", label = TRUE)
X.d = svd(X)$d
df = matrix(0, length(ridge.glm$lambda), 1)
for (i in 1:length(lambdas)){
  df[i] = sum(X.d^2/(X.d^2 + ridge.glm$lambda[i]))
}
plot(range(df)*1.1, range(ridge.glm$beta)*1.1, type='n', ylab='Coefficients', xlab='Degrees of Freedom')
for (j in c(1:8)){
  lines(df, ridge.glm$beta[j,])
}
# Remark: I cannot recreate Figure 3.8 in the book
# cross-validation and prediction is available
########################################################################
######### fit ridge regression by MASS package  ###############################
########################################################################
library(MASS)
train.s = as.data.frame(train.s)
train = as.data.frame(train)
# model.ridge = lm.ridge(lpsa ~ .,
#                        train.s,
#                        lambda = exp(seq(log(0.01), log(10000), l=100))
# )
model.ridge = lm.ridge(lpsa ~ .,
                       train,
                       x = TRUE,
                       y = TRUE,
                       lambda = exp(seq(log(0.01), log(10000), l=100))
)
model.ridge$coef
model.ridge$lambda
X.d = svd(X)$d
df = matrix(0, length(model.ridge$lambda), 1)
for (i in 1:length(model.ridge$lambda)){
  df[i] = sum(X.d^2/(X.d^2 + model.ridge$lambda[i]))
}
plot(range(df)*1.1, range(model.ridge$coef)*1.1, type='n', ylab='Coefficients', xlab='Degrees of Freedom')
for (j in c(1:8)){
  lines(df, model.ridge$coef[j,])
}
summary(model.ridge)
names(model.ridge)
model.ridge
model.ridge$Inter
model.ridge$ym
model.ridge$xm
model.ridge$scales
model.ridge$GCV
model.ridge$coef
plot(model.ridge)
select(model.ridge)
# Do not need to scale X and Y
# Be able to recreate the Figure 3.8
############################################################################
############### fit ridge regression by ridge package  ##############################
############################################################################
library(ridge)
train.s = as.data.frame(train.s)
train = as.data.frame(train)
# scaled version
# model.ridge = linearRidge(lpsa ~ .,
#                           train.s,
#                           lambda = exp(seq(log(0.01), log(10000), l=100))
# )
# unscaled version
model.ridge = linearRidge(lpsa ~ .,
                          train,
                          scaling = "scale",
                          lambda = exp(seq(log(0.01), log(10000), l=100))
)
names(model.ridge)
X.d = svd(X)$d
df = matrix(0, length(model.ridge$lambda), 1)
for (i in 1:length(model.ridge$lambda)){
  df[i] = sum(X.d^2/(X.d^2 + model.ridge$lambda[i]))
}
plot(range(df)*1.1, range(model.ridge$coef)*1.1, type='n', ylab='Coefficients', xlab='Degrees of Freedom')
for (j in c(1:8)){
  lines(df, model.ridge$coef[j,])
}
# recreate Figure 3.8
######################################################################
################### Conclusion  ########################################
######################################################################
# I don't know how to apply glmnet for ridge regression

No comments:

Post a Comment