Monday, March 25, 2013

Logistic and LASSO Regression

# The following code is for the book The Elements of Statistical Learning, chapter 4
# Example: South African Heart Disease (Page: 122)
# load data
rm(list=ls())
library(ElemStatLearn)
data(SAheart)
data = SAheart[,c(1:3,5,7:10)]
# to convert factor variables into dummy variables
temp = matrix(0,nrow(data),1)
for (i in c(1:nrow(data))){
  if (data[i,4] == 'Present'){temp[i] = 1}
  else {temp[i] = 0}
}
temp = as.data.frame(temp)
names(temp) = "famhist"
data.new = cbind(data[,1:3], temp, data[,5:8])
# in order to apply glmnet function
# change dataframe to matrix
Y = as.matrix(data.new[,8])
X = as.matrix(data.new[,-8])
logit = glm(chd ~ ., family=binomial("logit"),data=data.new,na.action=na.exclude)
# table 4.2 on page 122
summary(logit)
# Figure 4.12
pairs(data.new, main="South African heart disease data",pch = 21)























# reduced model: stepwise (same as the linear regression)
logit.reduced = glm(chd ~ tobacco + ldl + famhist + age,
                    family=binomial("logit"),data=data.new,na.action=na.exclude)
# table 4.3 on page 124
summary(logit.reduced)
library(glmnet)
# not work if X has a factor variable 
# This is the only package I know to apply lasso for binormial response
# work
lasso = glmnet(scale(X), Y, family = "binomial", alpha = 1, standardize = FALSE, intercept=TRUE)
# not work
lasso = glmnet(X, Y, family = "binomial", alpha = 1, standardize = TRUE, intercept=TRUE)
# FIGURE 4.13 on page 126
plot(lasso, xvar = "norm", label=TRUE)
abline(h=0)

No comments:

Post a Comment