Sunday, March 31, 2013

Spline Regression

Chapter 5
 

rm(list=ls())
library(ElemStatLearn)
library(splines)
# Basic Examples
set.seed(0)
x = seq(0, 4*pi, length.out=50)
y = cos(x) + 0.3*rnorm(length(x))
par(mfrow=c(2,3))
# linear, no interior knots
plot(x,y,type="p", main = "Deg = 1, Df = 1")
m1 = lm(y ~ bs(x,degree=1,df=1))
lines(x, fitted(m1))
# linear, 1 interior knot
plot(x,y,type="p", main = "Deg = 1, Df = 2")
m2 = lm(y ~ bs(x,degree=1,df=2))
lines(x, fitted(m2))
# linear, 2 interior knots
plot(x, y,type="p", main = "Deg = 1, Df = 3")
m3 = lm(y ~ bs(x,degree=1,df=3))
lines(x, fitted(m3))
# quadratic, no interior knots
plot(x, y,type="p", main = "Deg = 2, Df = 2")
m4 = lm( y ~ bs(x,degree=2,df=2))
lines(x, fitted(m4))
# quadratic, 1 interior knot
plot(x, y,type="p", main = "Deg = 2, Df = 3")
m5 = lm(y ~ bs(x,degree=2,df=3))
lines(x, fitted(m5))
# quadratic, 2 interior knots
plot(x, y,type="p", main = "Deg = 2, Df = 4")
m6 = lm(y ~ bs(x,degree=2,df=4))
lines(x, fitted(m6))
par(mfrow=c(1,1))

##########################################################################
##########################################################################
################   South African Heart Disease   #########################
##########################################################################
##########################################################################

form="chd~ns(sbp,df=4)+ns(tobacco,df=4)+ns(ldl,df=4)+famhist+ns(obesity,df=4)+ns(alcohol,df=4)+ns(age,df=4)"

form = formula(form)
sbp.ns = ns(SAheart$sbp,df=4)
attr(sbp.ns,"knots")
summary(SAheart$sbp)
# Natural Cubic Splines
model.ncs = glm(form, data=SAheart, family=binomial("logit"))
summary(model.ncs)
names(model.ncs)
# model.ncs$coefficients
# # model.ncs$effects
# model.ncs$xlevels
# model.ncs$method
# # model.ncs$data
# model.ncs$terms # knots
# # model.ncs$model
# recreate Table 5.1
drop1( model.ncs, scope=form, test="Chisq" )
# create the model in Table 5.1
library(MASS)
stepAIC(model.ncs, scope=form, k=2)

##########################################################################
##########################################################################
#######################   Phoneme Recognition   ##########################
############  Example: Filtering and Feature Extraction  #################
##########################################################################
##########################################################################
rm(list=ls())
data(phoneme)
set.seed(0)
aa_indx = which(phoneme$g == "aa")
ao_indx = which(phoneme$g == "ao")
AA_data = phoneme[aa_indx[1:15], 1:256]
AO_data = phoneme[ao_indx[1:15], 1:256]
min_l = min(c(min(AA_data), min(AO_data)))
max_l = max(c(max(AA_data), max(AO_data)))
# Figure 5.5 on page 149
ii=1
plot( as.double(AA_data[ii,]),
      ylim=c(min_l,max_l), type="l", col="red",
      xlab="Frequency", ylab="Log-periodogram" )
for( ii in 2:dim(AA_data)[1] ){
  lines( as.double(AA_data[ii,]), col="red" )
}
for( ii in 1:dim(AO_data)[1] ){
  lines( as.double(AO_data[ii,]), col="blue" )
}

rm(list=ls())
aa_indx = which(phoneme$g == "aa")
ao_indx = which(phoneme$g == "ao")
###########################################################
AA_data = phoneme[aa_indx,]
train_inds = grep("^train", AA_data$speaker)
test_inds = grep("^test", AA_data$speaker)
AA_data_train = AA_data[train_inds, 1:256]
AA_data_test = AA_data[test_inds, 1:256]
#####################################
###### call AA class 1  #############
#####################################
AA_data_train$Y = rep(1, dim(AA_data_train)[1])
AA_data_test$Y = rep(1, dim(AA_data_test)[1])
###########################################################
AO_data = phoneme[ao_indx,]
train_inds = grep("^train", AO_data$speaker)
test_inds = grep("^test", AO_data$speaker)
AO_data_train = AO_data[train_inds, 1:256]
AO_data_test = AO_data[test_inds, 1:256]
#####################################
###### call AO class 1  #############
#####################################
AO_data_train$Y = rep(0, dim(AO_data_train)[1])
AO_data_test$Y = rep(0, dim(AO_data_test)[1])
####################################################
##################  Combine Data  ##################
####################################################
Data_train = rbind(AA_data_train, AO_data_train)
Data_test = rbind(AA_data_test, AO_data_test)

#########################################################################
#############   Logistic Regression Without Intercept  ##################
#########################################################################
form = paste("Y ~ -1 + ", paste(colnames(Data_train)[1:256], collapse="+"))
m = glm(form, family=binomial("logit"), data=Data_train)
mc = as.double(coefficients(m))
m.smooth = smooth.spline(mc,df=15)
plot(mc, ylim=c(-0.8,+0.6),type="l",
     xlab="Frequency",ylab="Logistic Regression Coefficients")
lines(m.smooth, col="blue" )

abline(h=0)
Y_hat_train = predict(m, Data_train[,1:256], type="response")
predicted_class_label_train = as.double(Y_hat_train > 0.5)
Y_hat_test  = predict(m, Data_test[,1:256], type="response")
predicted_class_label_test = as.double(Y_hat_test > 0.5)
err_rate_train1 = mean(Data_train[,257] != predicted_class_label_train)
err_rate_test1  = mean(Data_test[,257] != predicted_class_label_test)
print(sprintf('Logistic Regression (Raw): err_rate_train= %10.6f; err_rate_test= %10.5f',
              err_rate_train1, err_rate_test1))
beta = matrix(m.smooth$y,ncol=1)
logit_train = as.matrix(Data_train[,1:256])%*%beta
Y_smooth_train = exp(logit_train)/(1+exp(logit_train))
predicted_class_smooth_train = as.double(Y_smooth_train > 0.5)
logit_test = as.matrix(Data_test[,1:256])%*%beta
Y_smooth_test = exp(logit_test)/(1+exp(logit_test))
predicted_class_smooth_test = as.double(Y_smooth_test > 0.5)
err_rate_train2 = mean(Data_train[,257] != predicted_class_smooth_train)
err_rate_test2  = mean(Data_test[,257] != predicted_class_smooth_test)
print(sprintf('Logistic Regression (Raw): err_rate_train= %10.6f; err_rate_test= %10.5f',
              err_rate_train1, err_rate_test1))
print(sprintf('Logistic Regression (Regularized): err_rate_train= %10.6f; err_rate_test= %10.5f',
              err_rate_train2, err_rate_test2))
#######################################################################
#############  BMD Example  ############################################
#######################################################################
males = bone$gender == "male"
females = bone$gender == "female"
# df is monotone in lambda for smoothing splines
boneMaleSmooth = smooth.spline( bone[bone$gender == "male","age"],
                                bone[bone$gender == "male","spnbmd"], df=12)
boneFemaleSmooth = smooth.spline( bone[bone$gender == "female","age"],
                                  bone[bone$gender == "female","spnbmd"], df=12)
plot(boneMaleSmooth, ylim=c(-0.05,0.20), col="blue", type="l", xlab="Age", ylab="spnbmd")
points(bone[males,c(2,4)], col="blue", pch=20)
lines(boneFemaleSmooth, ylim=c(-0.05,0.20), col="red")
points(bone[females,c(2,4)], col="red", pch=20)

No comments:

Post a Comment