Thursday, March 14, 2013

Naive Bayes in R

library(e1071)
rm(list=ls())
################################################################################
############# Naive Bayes for continuous variables #############################
################################################################################
# Data
data(iris)
names(iris)
pairs(iris[1:4],main="Iris Data (red=setosa,yellow=versicolor,blue=virginica)",
      pch=21,
      bg=c("red","yellow","blue")[unclass(iris$Species)])
summary(iris)
#just show first 6 records
head(iris)
classifier = naiveBayes(iris[,1:4], iris[,5])
summary(classifier)
names(classifier)
classifier$apriori
classifier$table  #1st column: mean, 2rd column:standard deviation
classifier$levels
classifier$call
plot(function(x) dnorm(x, 1.462, 0.1736640),
     0, 8,
     col="red",
     ylab="",
     main="Petal length distribution for the 3 different species")
curve(dnorm(x, 4.260, 0.4699110), add=TRUE, col="blue")
curve(dnorm(x, 5.552, 0.5518947 ), add=TRUE, col = "black")
#split data into train and test
train.ind = sample(1:nrow(iris), ceiling(nrow(iris)*2/3), replace=FALSE)
classifier.train = naiveBayes(iris[train.ind,1:4], iris[train.ind,5])
#give the classified results
nb.pred = predict(classifier.train, newdata=iris[-train.ind,])
#just show first 6 predictions
head(nb.pred)
table(nb.pred, iris[-train.ind,]$Species)

#give the raw score
nb.pred = predict(classifier.train, newdata=iris[-train.ind,], type="raw")
#just show first 6 predictions
head(nb.pred)

################################################################################
############# Naive Bayes for discrete variables ###############################
################################################################################
# HairEyeColor
# mosaicplot(HairEyeColor)
# margin.table(HairEyeColor, 1)
# margin.table(HairEyeColor, 2)
# margin.table(HairEyeColor, 3)
# margin.table(HairEyeColor, c(1,3))
# install.packages("mlbench")
# library(mlbench)
data(HouseVotes84, package = "mlbench")
head(HouseVotes84)
summary(HouseVotes84)
train.ind = sample(1:nrow(HouseVotes84),
                   ceiling(nrow(HouseVotes84)*2/3),
                   replace=FALSE)
classifier.train = naiveBayes(Class ~ ., data=HouseVotes84[train.ind,], laplace = 0)
summary(classifier.train)
names(classifier.train)
classifier.train$apriori
classifier.train$table  #1st_column + 2rd_column = 1 (probability)
classifier.train$levels
classifier.train$call
mode(classifier.train$table)
nb.pred = predict(classifier.train, newdata=HouseVotes84[-train.ind,])
head(nb.pred)
table(nb.pred, HouseVotes84[-train.ind,]$Class)
nb.pred = predict(classifier.train, newdata=HouseVotes84[-train.ind,], type = "raw")
head(nb.pred)
head(HouseVotes84[-train.ind,]$Class)

No comments:

Post a Comment