Wednesday, September 4, 2013

Survival Analysis Part 1: Descriptive Methods for Survival Data (Based on the book by Hosmer, Lemeshow, May)

rm(list = ls())
setwd("C:\\ASA")
# import data
WHAS100 = read.table("C:\\ASA\\whas100.DAT", sep = "", header = F)
names(WHAS100) = c("id",
                   "admitdate",
                   "foldate",
                   "los",
                   "lenfol",
                   "fstat",
                   "age",
                   "gender",
                   "bmi")
head(WHAS100)
# Kaplan-Meier estimate
library(survival)
whas100.surv = survfit(Surv(lenfol, fstat) ~ 1, conf.type = "none", data = WHAS100)
# Table 2.3
summary(whas100.surv)
# Figure 2.2
plot(whas100.surv, xlab="Time", ylab="survival probability")

# Life-Table estimator
t1year = floor(WHAS100$lenfol/365)
fstat = WHAS100$fstat
t1 = data.frame(t1year, fstat)
t1
library(nlme)
die = gsummary(t1, sum, groups = t1year)
total = gsummary(t1, length, groups = t1year)
rm(t1year)
ltab.data = cbind(die[,1:2], total[,2])
names(ltab.data) = c("t1year", "censor", "total")
ltab.data
attach(ltab.data)
lt = length(t1year)
t1year[lt + 1] = NA
die = censor
censored = total[,2] - censor
library(KMsurv)
lifetable = lifetab(t1year, 100, censored, die)
# Table 2.4
lifetable[,1:5]
# nlost = censored
# nevent = die
# Figure 2.3
plot(t1year[1:8], lifetable[,5], type="s",
     xlab="Survival Time (Years)", ylab="Estimated Survival Probability")
# Figure 2.4
plot(t1year[1:8], lifetable[,5], type="b",
     xlab="Survival Time (Years)", ylab="Estimated Survival Probability")
################################
## confidence interval  ########
################################

whas100.surv = survfit(Surv(lenfol, fstat) ~ 1, conf.type = "none", data = WHAS100)
library(km.ci)
ci.log = km.ci(whas100.surv, conf.level = 0.95, tl = NA, tu = NA, method = "log")
ci.loglog = km.ci(whas100.surv, conf.level = 0.95, tl = NA, tu = NA, method = "loglog")
# ci.hallwellner = km.ci(whas100.surv, conf.level = 0.95, tl = NA, tu = NA, method = "hall-wellner")
ci.hallwellner = km.ci(whas100.surv, conf.level = 0.95, tl = NA, tu = NA, method = "loghall")
# look like loghall can recreate the Figure 2.7 instead of hall-wellner
plot(whas100.surv)
plot(ci.log)
plot(ci.loglog)
plot(ci.hallwellner)
log.ci = survfit(Surv(lenfol, fstat) ~ 1, conf.type = "log", data = WHAS100)
loglog.ci = survfit(Surv(lenfol, fstat) ~ 1, conf.type = "log-log", data = WHAS100)
# Figure 2.7
par(cex = 0.8)
plot(ci.hallwellner, lty = 3, lwd = 2)
lines(log.ci, lwd = 2, lty = 1)
lines(log.ci, lwd = 2, lty = 6, conf.int = T)
legend(150, 0.4, c("Kaplan-Meier", "Hall-Wellner", "Pointwise"), lty=c(1, 4, 6))
#######################################
########  Comparison  #################
#######################################
# Table 2.12
diff.logrank = survdiff(Surv(lenfol, fstat) ~ gender, data = WHAS100, rho = 0)
print(diff.logrank)
diff.petopeto = survdiff(Surv(lenfol, fstat) ~ gender, data = WHAS100, rho = 1)
print(diff.petopeto)
# Figure 2.9
diff.drug = survfit(Surv(lenfol, fstat) ~ strata(gender), data = WHAS100, conf.type="log-log")
plot(diff.drug, lty = c(1, 2), xlab = "Time", ylab = "Survival Probability")
diff.drug
legend(200,0.4, c("Male", "Female"), lty = c(1,2))
agecat = cut(WHAS100$age, c(0, 59, 69, 79, 100))
agecat
diff.age = survfit(Surv(lenfol, fstat) ~ strata(agecat), data = WHAS100, conf.type="log-log")
# Table 2.13
print(diff.age)
# Table 2.15
survdiff(Surv(lenfol, fstat) ~ agecat, data = WHAS100, rho = 0)
plot(diff.age, lty=c(1, 2, 3, 4), xlab="Time", ylab="Survival Probability")
legend(200,0.6, lty=c(1, 2, 3, 4),
       c("Age<60", "60<=Age<69", "70<=Age<79", "Age>=80"), bg="transparent")

############################################################
#####  aalen  ################################################
############################################################
aalen = survfit(coxph(Surv(lenfol, fstat) ~ 1, data = WHAS100), type = "aalen")
summary(aalen)
H.aalen = -log(aalen$surv)
aalen.est = cbind(time=aalen$time, d=aalen$n.event, n=aalen$n.risk, H.aalen, s1 = aalen$surv)
KM = survfit(Surv(lenfol, fstat) ~ 1, data = WHAS100)
KM.est = cbind(time=KM$time, s2 = KM$surv)
all = merge(data.frame(aalen.est), data.frame(KM.est), by="time")
all
# Figure 2.12
plot(all$time, all$s1, type="s", xlab="Time", ylab="Survival Probability")
points(all$time, all$s1, pch=1)
lines(all$time, all$s2, type="s")
points(all$time, all$s2, pch=3)
legend(200,0.6, c("Nelson-Aalen", "Kaplan-Meier"), pch=c(1,3))
hazard.rate = all$d/all$n
plot(all$time, hazard.rate, type = "p", pch=20, xlab="Time", ylab="Hazard Ratio")
lines(lowess(all$time, hazard.rate, f=0.75, iter=5))

No comments:

Post a Comment