rm(list=ls())
setwd("//Users//yangzou//Desktop//R_Study//BrockwellDavis")
strikes = scan(file = "STRIKES.DAT")
strikes = ts(data = strikes, start = 1951, frequency = 1)
plot(strikes)
points(strikes, pch = 16)
# exponential smoothing by arima
exsmooth = arima(strikes, order = c(0, 1, 1))
exsmooth
strikes.ex = strikes - exsmooth$residuals
plot(strikes.ex, ylab = "STRICK")
points(strikes, pch = 16)
exsmooth = arima(strikes, order = c(0, 1, 1), fixed = -0.6,
include.mean = FALSE)
exsmooth
strikes.ex = strikes - exsmooth$residuals
plot(strikes.ex, ylab = "STRICK")
points(strikes, pch = 16)
# Holt-Winters
deaths = scan(file = "DEATHS.DAT")
deaths = ts(data = deaths, start = c(1973, 1), frequency = 12)
plot(deaths)
points(deaths, pch = 16)
deaths.hw = HoltWinters(deaths, seasonal = "additive")
plot(deaths.hw)
deaths.hw$fitted
plot(deaths.hw$fitted[,3], ylab = "trend")
plot(deaths.hw$fitted[,4], ylab = "seosonality")
points(deaths.hw$fitted[,4], pch = 16)
deaths.hw = HoltWinters(deaths, seasonal = "additive",
alpha = 0.2, beta = 0.2, gamma = 0.2)
plot(deaths.hw)
deaths.hw$fitted
plot(deaths.hw$fitted[,3], ylab = "trend")
plot(deaths.hw$fitted[,4], ylab = "seosonality")
points(deaths.hw$fitted[,4], pch = 16)
Data Science Blog
Wednesday, September 23, 2015
Sunday, September 20, 2015
linear time series models
rm(list=ls())
library(ggplot2)
library(fBasics)
library(fUnitRoots)
setwd("//Users//yangzou//Desktop//R_Study//tsay_3")
# IBM stock data
da = read.table("m-ibm3dx2608.txt", header = T)
head(da)
summary(da)
da$date = as.POSIXct(as.character(da$date), format="%Y%m%d")
basicStats(da$ibmrtn)
ggplot(da, aes(date, ibmrtn)) + geom_line() +
xlab("Time") + ylab("IBM monthly returns") +
scale_x_datetime(breaks = '10 years')
# create a time series object
ibm = ts(da$ibmrtn, frequency = 12, start = c(1926, 1))
plot(ibm, type='l')
acf(ibm, lag = 100)
# simple return
sibm = da$ibmrtn
# log return
libm = log(da$ibmrtn + 1)
par(mfcol=c(2,1))
acf(sibm, lag = 100)
acf(libm, lag = 100)
par(mfcol=c(1,1))
par(mfcol=c(2,1))
acf(sibm, lag = 100, ylim=c(-0.2, 0.2))
acf(libm, lag = 100, ylim=c(-0.2, 0.2))
par(mfcol=c(1,1))
Box.test(sibm, lag = 5, type = 'Ljung-Box')
box.obj = Box.test(sibm, lag = 5, type = 'Ljung-Box')
names(box.obj)
box.obj$statistic
Box.test(sibm, lag = 5, type = 'Box-Pierce')
Box.test(sibm, lag = 10, type = 'Ljung-Box')
# CRSP data
crsp = scan(file="m-ew6299.txt")
crsp1 = ts(crsp, frequency = 12, start = c(1962, 1))
plot(crsp1, type='l')
acf(crsp1, lag = 100)
# Quarterly growth of US real GNP
gnp = read.table("q-gnp4791.txt", header = F)
gnp1 = ts(gnp, frequency = 4, start = c(1947, 2))
plot(gnp1, ylab="Growth")
points(gnp1, pch='*')
abline(h = 0)
acf(gnp, lag=100)
# method = c("yule-walker", "burg", "ols", "mle", "yw")
m1 = ar(gnp[,1], method = "mle")
m1$order
m1$ar
m1$aic
m1$method
m1$n.used
m2 = arima(gnp, order=c(3, 0, 0))
m2
m2$coef
# intercept is the mean of the series
# the constant is obtained below:
(1 - sum(m2$coef[1:length(m2$coef)-1]))*(m2$coef[length(m2$coef)][[1]])
# residual standard error
sqrt(m2$sigma2)
# Characteristic equation
p1 = c(1, -m2$coef[1:length(m2$coef)-1])
p1
# find solutions
roots = polyroot(p1)
roots
# compute the absolute values of the solutions
Mod(roots)
# compute average length of business cycles
roots[1]
Re(roots[1])
Im(roots[1])
k=2*pi/acos(Re(roots[1])/Mod(roots[1]))
k/4
pacf = pacf(gnp[,1])
pacf$acf
# vm
vm = read.table("m-ibm3dx2608.txt", header = T)[,3]
# the average annual simple gross returns
t1 = prod(vm + 1)
t1^(12/length(vm)) - 1
# AR(3) model
m1 = arima(vm, order = c(3, 0, 0))
m1
mean(vm)
m1$coef[4]
# compute the intercept
(1 - sum(m1$coef[seq(1, 3)]))*mean(vm)
# compute standard error of residuals
sqrt(m1$sigma2)
Box.test(m1$residuals, lag=12, type="Ljung-Box")
# p-value using 9 degrees of freedom
pv = 1 - pchisq(16.352, 9)
pv
# to fix the AR(2) coef to zero
m2 = arima(vm, order = c(3, 0, 0), fixed = c(NA, 0, NA, NA))
m2
m2 = predict(m2, n.ahead = 12)
pred = m2$pred[1:12]
err = m2$se[1:12]
ulb = pred + 1.96*err
llb = pred - 1.96*err
x = seq(997, 1008)
df = data.frame(x, err, ulb, llb)
ggplot(df, aes(x)) +
geom_line(aes(y = pred), colors = "red") +
geom_line(aes(y = ulb), colors = "blue", linetype = "dotted") +
geom_line(aes(y = llb), colors = "blue", linetype = "dotted")
# Johnson & Johnson quarterly earnings per share
qjnj = scan(file="q-jnj.txt")
qjnj1 = ts(qjnj, frequency = 4, start = c(1960, 1))
lqjnj1 = log(qjnj1)
par(mfcol=c(2,1))
plot(qjnj1, type = "l", ylab = "Earning")
plot(lqjnj1, type = "l", ylab = "Log earning")
par(mfcol=c(1,1))
x = log(qjnj)
dx1 = diff(x)
dx4 = diff(x, lag = 4)
dx_1_4 = diff(dx1, lag = 4)
acf(x)
acf(dx1)
acf(dx4)
acf(dx_1_4)
m1 = arima(dx_1_4, order = c(0, 0, 1),
seasonal = list(order = c(0, 0, 1), periods = 4))
sqrt(m1$sigma2)
arima(dx_1_4, order = c(0, 0, 1), method = "ML",
seasonal = list(order = c(0, 0, 1), periods = 4))
arima(dx_1_4, order = c(0, 0, 1), method = "CSS",
seasonal = list(order = c(0, 0, 1), periods = 4))
# airline model
m2 = arima(qjnj, order = c(0, 1, 1), include.mean = F,
seasonal = list(order = c(0, 1, 1), frequency = 4))
m2
# monthly simple returns of CRSP Decile 1 Index
da = read.table("m-deciles08.txt", header = T)
d1 = da[,2]
d1s = ts(d1, start = c(1970, 1), frequency = 12)
plot(d1s)
acf(d1, lag = 40)
jan = rep(c(1,rep(0, 11)), 39)
m1 = lm(d1 ~ jan)
summary(m1)
d1_a = d1 - m1$coefficients[2] * jan
d1s_a = d1s - m1$coefficients[2] * jan
plot(d1s_a)
acf(d1_a, lag = 40)
m1 = arima(d1, order = c(1, 0, 0),
seasonal = list(order = c(1, 0, 1), periods = 12))
m1
m2 = arima(d1, order = c(1, 0, 0), include.mean = F,
seasonal = list(order = c(1, 0, 1), periods = 12))
m2
# regression with time series errors
r1 = read.table("w-gs1yr.txt", header = T)[,4]
r3 = read.table("w-gs3yr.txt", header = T)[,4]
m1 = lm(r3 ~ r1)
m1
plot(m1$residuals, type = "l")
acf(m1$residuals, lag = 36)
c1 = diff(r1)
c3 = diff(r3)
m2 = lm(c3 ~ -1 + c1)
summary(m2)
acf(m2$residuals, lag = 36)
m3 = arima(c3, order = c(0, 0, 1), xreg = c1, include.mean = F)
m3
mean(c3)
rsq = (sum(c3^2) - sum(m3$residuals^2))/sum(c3^2)
rsq
# unit-root test
da = read.table("q-gdp4708.txt", header = T)
gdp=log(da[,4])
gdp_s = ts(gdp, start = c(1947, 1), frequency = 4)
plot(gdp_s, type="l", ylab = "ln(GDP)")
acf(gdp)
gdp_diff = diff(gdp)
gdp_s_diff = diff(gdp_s)
plot(gdp_s_diff, type="l", ylab="diff(ln(GDP))")
abline(h=0)
pacf(gdp_diff, lag = 20)
m1 = ar(gdp_diff, method="mle")
m1$order
adfTest(gdp, lags = 10, type=c("c"))
da = read.table("d-sp55008.txt", header = T)
sp5=log(da[,7])
time = ISOdate(da[,1], da[,2], da[,3])
plot(time, sp5, type = "l")
plot(time, log(sp5), type = "l")
time[length(time)]
time[1]
m2 = ar(diff(sp5), method = 'mle')
m2$order
adfTest(sp5, lags = 2, type = "ct")
library(ggplot2)
library(fBasics)
library(fUnitRoots)
setwd("//Users//yangzou//Desktop//R_Study//tsay_3")
# IBM stock data
da = read.table("m-ibm3dx2608.txt", header = T)
head(da)
summary(da)
da$date = as.POSIXct(as.character(da$date), format="%Y%m%d")
basicStats(da$ibmrtn)
ggplot(da, aes(date, ibmrtn)) + geom_line() +
xlab("Time") + ylab("IBM monthly returns") +
scale_x_datetime(breaks = '10 years')
# create a time series object
ibm = ts(da$ibmrtn, frequency = 12, start = c(1926, 1))
plot(ibm, type='l')
acf(ibm, lag = 100)
# simple return
sibm = da$ibmrtn
# log return
libm = log(da$ibmrtn + 1)
par(mfcol=c(2,1))
acf(sibm, lag = 100)
acf(libm, lag = 100)
par(mfcol=c(1,1))
par(mfcol=c(2,1))
acf(sibm, lag = 100, ylim=c(-0.2, 0.2))
acf(libm, lag = 100, ylim=c(-0.2, 0.2))
par(mfcol=c(1,1))
Box.test(sibm, lag = 5, type = 'Ljung-Box')
box.obj = Box.test(sibm, lag = 5, type = 'Ljung-Box')
names(box.obj)
box.obj$statistic
Box.test(sibm, lag = 5, type = 'Box-Pierce')
Box.test(sibm, lag = 10, type = 'Ljung-Box')
# CRSP data
crsp = scan(file="m-ew6299.txt")
crsp1 = ts(crsp, frequency = 12, start = c(1962, 1))
plot(crsp1, type='l')
acf(crsp1, lag = 100)
# Quarterly growth of US real GNP
gnp = read.table("q-gnp4791.txt", header = F)
gnp1 = ts(gnp, frequency = 4, start = c(1947, 2))
plot(gnp1, ylab="Growth")
points(gnp1, pch='*')
abline(h = 0)
acf(gnp, lag=100)
# method = c("yule-walker", "burg", "ols", "mle", "yw")
m1 = ar(gnp[,1], method = "mle")
m1$order
m1$ar
m1$aic
m1$method
m1$n.used
m2 = arima(gnp, order=c(3, 0, 0))
m2
m2$coef
# intercept is the mean of the series
# the constant is obtained below:
(1 - sum(m2$coef[1:length(m2$coef)-1]))*(m2$coef[length(m2$coef)][[1]])
# residual standard error
sqrt(m2$sigma2)
# Characteristic equation
p1 = c(1, -m2$coef[1:length(m2$coef)-1])
p1
# find solutions
roots = polyroot(p1)
roots
# compute the absolute values of the solutions
Mod(roots)
# compute average length of business cycles
roots[1]
Re(roots[1])
Im(roots[1])
k=2*pi/acos(Re(roots[1])/Mod(roots[1]))
k/4
pacf = pacf(gnp[,1])
pacf$acf
# vm
vm = read.table("m-ibm3dx2608.txt", header = T)[,3]
# the average annual simple gross returns
t1 = prod(vm + 1)
t1^(12/length(vm)) - 1
# AR(3) model
m1 = arima(vm, order = c(3, 0, 0))
m1
mean(vm)
m1$coef[4]
# compute the intercept
(1 - sum(m1$coef[seq(1, 3)]))*mean(vm)
# compute standard error of residuals
sqrt(m1$sigma2)
Box.test(m1$residuals, lag=12, type="Ljung-Box")
# p-value using 9 degrees of freedom
pv = 1 - pchisq(16.352, 9)
pv
# to fix the AR(2) coef to zero
m2 = arima(vm, order = c(3, 0, 0), fixed = c(NA, 0, NA, NA))
m2
m2 = predict(m2, n.ahead = 12)
pred = m2$pred[1:12]
err = m2$se[1:12]
ulb = pred + 1.96*err
llb = pred - 1.96*err
x = seq(997, 1008)
df = data.frame(x, err, ulb, llb)
ggplot(df, aes(x)) +
geom_line(aes(y = pred), colors = "red") +
geom_line(aes(y = ulb), colors = "blue", linetype = "dotted") +
geom_line(aes(y = llb), colors = "blue", linetype = "dotted")
# Johnson & Johnson quarterly earnings per share
qjnj = scan(file="q-jnj.txt")
qjnj1 = ts(qjnj, frequency = 4, start = c(1960, 1))
lqjnj1 = log(qjnj1)
par(mfcol=c(2,1))
plot(qjnj1, type = "l", ylab = "Earning")
plot(lqjnj1, type = "l", ylab = "Log earning")
par(mfcol=c(1,1))
x = log(qjnj)
dx1 = diff(x)
dx4 = diff(x, lag = 4)
dx_1_4 = diff(dx1, lag = 4)
acf(x)
acf(dx1)
acf(dx4)
acf(dx_1_4)
m1 = arima(dx_1_4, order = c(0, 0, 1),
seasonal = list(order = c(0, 0, 1), periods = 4))
sqrt(m1$sigma2)
arima(dx_1_4, order = c(0, 0, 1), method = "ML",
seasonal = list(order = c(0, 0, 1), periods = 4))
arima(dx_1_4, order = c(0, 0, 1), method = "CSS",
seasonal = list(order = c(0, 0, 1), periods = 4))
# airline model
m2 = arima(qjnj, order = c(0, 1, 1), include.mean = F,
seasonal = list(order = c(0, 1, 1), frequency = 4))
m2
# monthly simple returns of CRSP Decile 1 Index
da = read.table("m-deciles08.txt", header = T)
d1 = da[,2]
d1s = ts(d1, start = c(1970, 1), frequency = 12)
plot(d1s)
acf(d1, lag = 40)
jan = rep(c(1,rep(0, 11)), 39)
m1 = lm(d1 ~ jan)
summary(m1)
d1_a = d1 - m1$coefficients[2] * jan
d1s_a = d1s - m1$coefficients[2] * jan
plot(d1s_a)
acf(d1_a, lag = 40)
m1 = arima(d1, order = c(1, 0, 0),
seasonal = list(order = c(1, 0, 1), periods = 12))
m1
m2 = arima(d1, order = c(1, 0, 0), include.mean = F,
seasonal = list(order = c(1, 0, 1), periods = 12))
m2
# regression with time series errors
r1 = read.table("w-gs1yr.txt", header = T)[,4]
r3 = read.table("w-gs3yr.txt", header = T)[,4]
m1 = lm(r3 ~ r1)
m1
plot(m1$residuals, type = "l")
acf(m1$residuals, lag = 36)
c1 = diff(r1)
c3 = diff(r3)
m2 = lm(c3 ~ -1 + c1)
summary(m2)
acf(m2$residuals, lag = 36)
m3 = arima(c3, order = c(0, 0, 1), xreg = c1, include.mean = F)
m3
mean(c3)
rsq = (sum(c3^2) - sum(m3$residuals^2))/sum(c3^2)
rsq
# unit-root test
da = read.table("q-gdp4708.txt", header = T)
gdp=log(da[,4])
gdp_s = ts(gdp, start = c(1947, 1), frequency = 4)
plot(gdp_s, type="l", ylab = "ln(GDP)")
acf(gdp)
gdp_diff = diff(gdp)
gdp_s_diff = diff(gdp_s)
plot(gdp_s_diff, type="l", ylab="diff(ln(GDP))")
abline(h=0)
pacf(gdp_diff, lag = 20)
m1 = ar(gdp_diff, method="mle")
m1$order
adfTest(gdp, lags = 10, type=c("c"))
da = read.table("d-sp55008.txt", header = T)
sp5=log(da[,7])
time = ISOdate(da[,1], da[,2], da[,3])
plot(time, sp5, type = "l")
plot(time, log(sp5), type = "l")
time[length(time)]
time[1]
m2 = ar(diff(sp5), method = 'mle')
m2$order
adfTest(sp5, lags = 2, type = "ct")
Sunday, August 23, 2015
R cheatsheet: data type and data structure
# mode represents how an object is stored in memory
# (numeric, character, list and function)
#
# class represents its abstract type.
# mode function: return the mode of R objects
num.obj <- seq(from=1, to=10, by=2)
mode(num.obj)
logical.obj <- c(TRUE, TRUE, FALSE, FALSE, FALSE)
mode(logical.obj)
character.obj <- c("a", "b", "c")
mode(character.obj)
is.numeric(num.obj)
is.character(num.obj)
is.logical(logical.obj)
is.numeric(logical.obj)
mode(mean)
is.function(mean)
# class function: return the class information of R objects
class(num.obj)
class(logical.obj)
class(character.obj)
typeof(num.obj)
mat.obj <- matrix(runif(9), ncol = 3, nrow = 3)
mode(mat.obj)
class(mat.obj)
print(mat.obj)
print(num.obj)
# factor: for categorical data
character.obj
is.factor(character.obj)
is.character(character.obj)
factor.obj <- as.factor(character.obj)
is.factor(factor.obj)
mode(factor.obj)
class(factor.obj)
factor1 <- factor(c(1, 2, 3, 4, 5, 6, 7, 8, 9))
factor1
levels(factor1)
labels(factor1)
factor2 <- factor(c(1, 2, 3, 4, 5, 6, 7, 8, 9), labels=letters[1:9])
factor2
levels(factor2)
labels(factor2)
# data frame
var1 <- c(101, 102, 103, 104, 105)
var2 <- c(25, 22, 29, 34, 33)
var3 <- c("Non-Diabetic", "Diabetic", "Non-Diabetic", "Non-Diabetic", "Diabetic")
var4 <- factor(c("male", "male", "female", "female", "male"))
diab.dat <- data.frame(var1, var2, var3, var4)
diab.dat
summary(diab.dat)
class(diab.dat)
mode(diab.dat)
typeof(diab.dat)
# matrices
mat.diab <- as.matrix(diab.dat)
class(mat.diab)
mode(mat.diab)
typeof(mat.diab)
num.mat <- matrix(rnorm(9), nrow = 3, ncol = 3)
num.mat
class(num.mat)
mode(num.mat)
t(num.mat)
t(num.mat) %*% num.mat
# array: can be of any number of dimensions
mat.array = array(dim = c(2, 2, 3))
mat.array[,,1] <- rnorm(4)
mat.array[,,2] <- rnorm(4)
mat.array[,,3] <- rnorm(4)
mat.array
obj.list <- list(elem1=var1, elem2=var2, elem3=var3,
elem4=var4, elem5=diab.dat, elem6=mat.array)
obj.list[1]
obj.list[[1]]
obj.list[[1]][2]
# Date variables
as.Date("1970-01-01")
as.numeric(as.Date("1970-01-01"))
as.numeric(as.Date("1970-01-02"))
as.Date("Jan-01-1970", format="%b-%d-%Y")
Sys.time()
substr(as.character(Sys.time()), 1, 10)
substr(as.character(Sys.time()), 12, 19)
# number of seconds since 1 January 1970
unclass(Sys.time())
# POSIXct: the signed number of seconds since the beginning of 1970 as a numeric vector
# more convenient for including in dataframes
# POSIXlt: a named list of vectors
# representing seconds, minutes, hours, days, months and years
date <- as.POSIXlt(Sys.time())
date
date$wday
date$yday
mode(date)
class(date)
typeof(date)
unclass(date)
date2 <- unlist(unclass(date))
class(date2)
mode(date2)
typeof(date2)
length(date)
length(date2)
date <- as.POSIXct(Sys.time())
date
unclass(date)
unlist(unclass(date))
class(date)
mode(date)
typeof(date)
length(date)
date.str <- "2015-12-02"
date4 <- as.POSIXct(date.str, format="%Y-%m-%d")
datetime.str <- "2015-12-02 16:30:00"
date5 <- as.POSIXct(datetime.str, format="%Y-%m-%d %H:%M:%S", tz="EST")
date5
# (numeric, character, list and function)
#
# class represents its abstract type.
# mode function: return the mode of R objects
num.obj <- seq(from=1, to=10, by=2)
mode(num.obj)
logical.obj <- c(TRUE, TRUE, FALSE, FALSE, FALSE)
mode(logical.obj)
character.obj <- c("a", "b", "c")
mode(character.obj)
is.numeric(num.obj)
is.character(num.obj)
is.logical(logical.obj)
is.numeric(logical.obj)
mode(mean)
is.function(mean)
# class function: return the class information of R objects
class(num.obj)
class(logical.obj)
class(character.obj)
typeof(num.obj)
mat.obj <- matrix(runif(9), ncol = 3, nrow = 3)
mode(mat.obj)
class(mat.obj)
print(mat.obj)
print(num.obj)
# factor: for categorical data
character.obj
is.factor(character.obj)
is.character(character.obj)
factor.obj <- as.factor(character.obj)
is.factor(factor.obj)
mode(factor.obj)
class(factor.obj)
factor1 <- factor(c(1, 2, 3, 4, 5, 6, 7, 8, 9))
factor1
levels(factor1)
labels(factor1)
factor2 <- factor(c(1, 2, 3, 4, 5, 6, 7, 8, 9), labels=letters[1:9])
factor2
levels(factor2)
labels(factor2)
# data frame
var1 <- c(101, 102, 103, 104, 105)
var2 <- c(25, 22, 29, 34, 33)
var3 <- c("Non-Diabetic", "Diabetic", "Non-Diabetic", "Non-Diabetic", "Diabetic")
var4 <- factor(c("male", "male", "female", "female", "male"))
diab.dat <- data.frame(var1, var2, var3, var4)
diab.dat
summary(diab.dat)
class(diab.dat)
mode(diab.dat)
typeof(diab.dat)
# matrices
mat.diab <- as.matrix(diab.dat)
class(mat.diab)
mode(mat.diab)
typeof(mat.diab)
num.mat <- matrix(rnorm(9), nrow = 3, ncol = 3)
num.mat
class(num.mat)
mode(num.mat)
t(num.mat)
t(num.mat) %*% num.mat
# array: can be of any number of dimensions
mat.array = array(dim = c(2, 2, 3))
mat.array[,,1] <- rnorm(4)
mat.array[,,2] <- rnorm(4)
mat.array[,,3] <- rnorm(4)
mat.array
obj.list <- list(elem1=var1, elem2=var2, elem3=var3,
elem4=var4, elem5=diab.dat, elem6=mat.array)
obj.list[1]
obj.list[[1]]
obj.list[[1]][2]
# Date variables
as.Date("1970-01-01")
as.numeric(as.Date("1970-01-01"))
as.numeric(as.Date("1970-01-02"))
as.Date("Jan-01-1970", format="%b-%d-%Y")
Sys.time()
substr(as.character(Sys.time()), 1, 10)
substr(as.character(Sys.time()), 12, 19)
# number of seconds since 1 January 1970
unclass(Sys.time())
# POSIXct: the signed number of seconds since the beginning of 1970 as a numeric vector
# more convenient for including in dataframes
# POSIXlt: a named list of vectors
# representing seconds, minutes, hours, days, months and years
date <- as.POSIXlt(Sys.time())
date
date$wday
date$yday
mode(date)
class(date)
typeof(date)
unclass(date)
date2 <- unlist(unclass(date))
class(date2)
mode(date2)
typeof(date2)
length(date)
length(date2)
date <- as.POSIXct(Sys.time())
date
unclass(date)
unlist(unclass(date))
class(date)
mode(date)
typeof(date)
length(date)
date.str <- "2015-12-02"
date4 <- as.POSIXct(date.str, format="%Y-%m-%d")
datetime.str <- "2015-12-02 16:30:00"
date5 <- as.POSIXct(datetime.str, format="%Y-%m-%d %H:%M:%S", tz="EST")
date5
Monday, December 2, 2013
SAS Survival Analysis IV: Cox Model with Time-dependet Variable
libname asa_data 'C:\ASA_SAS';
* Time-dependent variable;
ods output parameterestimates = temp1;
proc phreg data = asa_data.uis;
model time*censor(0) = treat off_trt tot;
if time <= los then off_trt = 0;
else off_trt = 1;
tot = treat*off_trt;
run;
* Compared with the following code;
data temp2;
set asa_data.uis;
if time <= los then off_trt = 0;
else off_trt = 1;
tot = treat*off_trt;
run;
proc phreg data = temp2;
model time*censor(0) = treat off_trt tot;
run;
* Time-dependent variable;
ods output parameterestimates = temp1;
proc phreg data = asa_data.uis;
model time*censor(0) = treat off_trt tot;
if time <= los then off_trt = 0;
else off_trt = 1;
tot = treat*off_trt;
run;
* Compared with the following code;
data temp2;
set asa_data.uis;
if time <= los then off_trt = 0;
else off_trt = 1;
tot = treat*off_trt;
run;
proc phreg data = temp2;
model time*censor(0) = treat off_trt tot;
run;
Wednesday, November 27, 2013
SAS Survival Analysis III: Cox model
libname asa_data 'C:\ASA_SAS';
ods trace on;
proc phreg data = asa_data.whas100;
model lenfol*fstat(0) = gender;
run;
ods trace off;
* Table 3.2;
ods output ParameterEstimates = temp1;
proc phreg data = asa_data.whas100;
model lenfol*fstat(0) = gender;
run;
libname asa_data 'C:\ASA_SAS';
ods trace on;
proc phreg data = asa_data.whas100;
model lenfol*fstat(0) = gender;
run;
ods trace off;
* Table 4.2;
ods output ParameterEstimates = temp1;
proc phreg data = asa_data.whas100;
model lenfol*fstat(0) = gender / ties = breslow;
run;
proc print data = temp1;run;
proc contents data = temp1;run;
data table4p2;
set temp1;
z = sqrt(ChiSq);
z2 = Estimate / StdErr;
lowerb = estimate - 1.96*stderr;
upperb = estimate + 1.96*stderr;
run;
proc print data = table4p2 label noobs;
var Parameter Estimate stderr z z2 Probchisq lowerb upperb;
format Estimate z z2 Probchisq lowerb upperb f8.3;
format stderr f8.4;
label Parameter = 'Variable'
Estimate = 'Coeff.'
stderr = 'Std.Err.'
Probchisq = 'p>|z|'
lowerb = 'Conf Lower'
upperb = 'Conf Upper';
run;
* Table 4.4;
data temp2;
set asa_data.whas100;
age_2 = 0;
age_3 = 0;
age_4 = 0;
if 60 <= age <= 69 then age_2 = 1;
else if 70 <= age <= 79 then age_3 = 1;
else if 80 <= age then age_4 = 1;
run;
* Table 4.5 & Table 4.7;
proc phreg data = temp2;
model lenfol*fstat(0) = age_2 age_3 age_4 / ties = breslow risklimits covb;
run;
data temp3;
set asa_data.whas100;
age_2 = 0;
age_3 = 0;
age_4 = 0;
if age < 60 then do;
age_2 = -1;
age_3 = -1;
age_4 = -1;
end;
if 60 <= age <= 69 then age_2 = 1;
if 60 <= age <= 69 then age_2 = 1;
else if 70 <= age <= 79 then age_3 = 1;
else if 80 <= age then age_4 = 1;
run;
* Table 4.8;
proc phreg data = temp3;
model lenfol*fstat(0) = age_2 age_3 age_4 / ties = breslow risklimits covb;
run;
* Table 4.13;
* Crude Model;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = gender/ risklimits;
run;
* Adjusted Model;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = gender age/ risklimits;
run;
* Interaction Model;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = gender age gender*age/ risklimits; *no hazard ratio in this case;
run;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = gender age ga/ risklimits;
ga = gender*age;
output out = temp4 xbeta = xbeta;
run;
proc print data = temp4; run;
proc gplot data = temp4;
plot xbeta*age = gender;
run;
* Figure 4.5;
data temp1;
set asa_data.gbcs;
time = rectime/30;
run;
proc phreg data = temp1;
model time*censrec(0) = hormone;
output out = temp2 survival = survival;
run;
proc sort data = temp2;
by hormone time;
run;
proc gplot data = temp2;
plot survival*time = hormone;
run;
quit;
libname asa_data 'C:\ASA_SAS';
* Figure 5.2a;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = age hr diasbp gender chf;
id id;
output out = fig52a resmart = mart;
run;
proc sort data = asa_data.whas500;
by id;
run;
proc sort data = fig52a;
by id;
run;
data fig52a;
merge fig52a asa_data.whas500;
by id;
run;
proc loess data = fig52a;
model mart = bmi;
ods output outputstatistics = temp1;
run;
proc sort data = temp1;
by bmi;
run;
proc contents data = temp1; run;
goptions reset = all;
symbol1 v = dot i = none;
symbol2 v = none i = join;
proc gplot data = temp1;
plot DepVar*bmi=1 pred*bmi=2 / overlay;
*plot pred*bmi=2;
run;
quit;
* Stepwise ;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = age chf hr diasbp bmi gender mitype miord sysbp cvd afb
/ selection = forward slentry = 0.25 slstay = 0.8;
run;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = age chf hr diasbp bmi gender mitype miord sysbp cvd afb
/ selection = stepwise slentry = 0.25 slstay = 0.8 details;
run;
* Best Subset;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = age chf hr diasbp bmi gender mitype miord sysbp cvd afb
/ selection = score best = 3 details;
run;
ods trace on;
proc phreg data = asa_data.whas100;
model lenfol*fstat(0) = gender;
run;
ods trace off;
* Table 3.2;
ods output ParameterEstimates = temp1;
proc phreg data = asa_data.whas100;
model lenfol*fstat(0) = gender;
run;
libname asa_data 'C:\ASA_SAS';
ods trace on;
proc phreg data = asa_data.whas100;
model lenfol*fstat(0) = gender;
run;
ods trace off;
* Table 4.2;
ods output ParameterEstimates = temp1;
proc phreg data = asa_data.whas100;
model lenfol*fstat(0) = gender / ties = breslow;
run;
proc print data = temp1;run;
proc contents data = temp1;run;
data table4p2;
set temp1;
z = sqrt(ChiSq);
z2 = Estimate / StdErr;
lowerb = estimate - 1.96*stderr;
upperb = estimate + 1.96*stderr;
run;
proc print data = table4p2 label noobs;
var Parameter Estimate stderr z z2 Probchisq lowerb upperb;
format Estimate z z2 Probchisq lowerb upperb f8.3;
format stderr f8.4;
label Parameter = 'Variable'
Estimate = 'Coeff.'
stderr = 'Std.Err.'
Probchisq = 'p>|z|'
lowerb = 'Conf Lower'
upperb = 'Conf Upper';
run;
* Table 4.4;
data temp2;
set asa_data.whas100;
age_2 = 0;
age_3 = 0;
age_4 = 0;
if 60 <= age <= 69 then age_2 = 1;
else if 70 <= age <= 79 then age_3 = 1;
else if 80 <= age then age_4 = 1;
run;
* Table 4.5 & Table 4.7;
proc phreg data = temp2;
model lenfol*fstat(0) = age_2 age_3 age_4 / ties = breslow risklimits covb;
run;
data temp3;
set asa_data.whas100;
age_2 = 0;
age_3 = 0;
age_4 = 0;
if age < 60 then do;
age_2 = -1;
age_3 = -1;
age_4 = -1;
end;
if 60 <= age <= 69 then age_2 = 1;
if 60 <= age <= 69 then age_2 = 1;
else if 70 <= age <= 79 then age_3 = 1;
else if 80 <= age then age_4 = 1;
run;
* Table 4.8;
proc phreg data = temp3;
model lenfol*fstat(0) = age_2 age_3 age_4 / ties = breslow risklimits covb;
run;
* Table 4.13;
* Crude Model;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = gender/ risklimits;
run;
* Adjusted Model;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = gender age/ risklimits;
run;
* Interaction Model;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = gender age gender*age/ risklimits; *no hazard ratio in this case;
run;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = gender age ga/ risklimits;
ga = gender*age;
output out = temp4 xbeta = xbeta;
run;
proc print data = temp4; run;
proc gplot data = temp4;
plot xbeta*age = gender;
run;
* Figure 4.5;
data temp1;
set asa_data.gbcs;
time = rectime/30;
run;
proc phreg data = temp1;
model time*censrec(0) = hormone;
output out = temp2 survival = survival;
run;
proc sort data = temp2;
by hormone time;
run;
proc gplot data = temp2;
plot survival*time = hormone;
run;
quit;
libname asa_data 'C:\ASA_SAS';
* Figure 5.2a;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = age hr diasbp gender chf;
id id;
output out = fig52a resmart = mart;
run;
proc sort data = asa_data.whas500;
by id;
run;
proc sort data = fig52a;
by id;
run;
data fig52a;
merge fig52a asa_data.whas500;
by id;
run;
proc loess data = fig52a;
model mart = bmi;
ods output outputstatistics = temp1;
run;
proc sort data = temp1;
by bmi;
run;
proc contents data = temp1; run;
goptions reset = all;
symbol1 v = dot i = none;
symbol2 v = none i = join;
proc gplot data = temp1;
plot DepVar*bmi=1 pred*bmi=2 / overlay;
*plot pred*bmi=2;
run;
quit;
* Stepwise ;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = age chf hr diasbp bmi gender mitype miord sysbp cvd afb
/ selection = forward slentry = 0.25 slstay = 0.8;
run;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = age chf hr diasbp bmi gender mitype miord sysbp cvd afb
/ selection = stepwise slentry = 0.25 slstay = 0.8 details;
run;
* Best Subset;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = age chf hr diasbp bmi gender mitype miord sysbp cvd afb
/ selection = score best = 3 details;
run;
Monday, November 18, 2013
SAS Survival Analysis II: descriptive analysis
libname asa_data 'C:\ASA_SAS';
data toydata;
input subject time censor;
datalines;
1 6 1
2 44 1
3 21 0
4 14 1
5 62 1
;
run;
* Table 2.2, Figure 2.1;
proc lifetest data = toydata plots = s cs = none;
time time*censor(0);
run;
data whas100;
set asa_data.whas100;
year = lenfol/365.25;
run;
* Figure 2.2, Table 2.3;
proc lifetest data = whas100 plots = (s ls h) cs = none;
time year*fstat(0);
label year = 'Survival Time (Years)';
run;
ods trace on;
proc lifetest data = whas100 plots = (s ls h) cs = none;
time year*fstat(0);
label year = 'Survival Time (Years)';
run;
ods trace off;
ods select productlimitestimates;
proc lifetest data = whas100 plots = (s ls h) cs = none;
time year*fstat(0);
label year = 'Survival Time (Years)';
run;
* Table 2.4, Figure 2.4;
proc lifetest data = whas100 plots = s method = lt;
time year*fstat(0);
run;
proc lifetest data = whas100 plots = s conftype = loglog confband = hw
outs = conf_data;
time year*fstat(0);
run;
proc print;run;
* Figure 2.7;
goptions reset = all;
legend1 label=none value=(h=1.5 font=swiss 'Est. Surv. Pr.' 'Pointwise 95%' 'Hall & Wellner Band')
position=(bottom center inside) mode=protect down=3 across=1;
axis1 label=(a=90 'Estimated Survival Probability') order=(0 to 1 by 0.1) minor=none;
axis2 label=('Survival Time (Years)') order=(0 to 8 by 1) minor=none;
symbol1 i=steplj color=black line=1;
symbol2 i=steplj color=black line=14;
symbol3 i=steplj color=black line=46;
symbol4 i=steplj color=black line=14;
symbol5 i=steplj color=black line=46;
proc gplot data = conf_data;
plot (survival sdf_ucl hw_ucl)*year / overlay vaxis=axis1 haxis=axis2 legend=legend1;
plot2 (sdf_lcl hw_lcl)*year /overlay vaxis=axis1 noaxis nolegend;
run;
quit;
* Figure 2.7;
proc sort data = whas100;
by year;
run;
proc lifetest data = whas100 outs = loglogdata conftype = loglog;
time year*fstat(0);
run;
proc contents; run;
proc print;run;
data loglogdata;
set loglogdata;
rename SDF_LCL = lcl_loglog SDF_UCL = ucl_loglog;
drop _CENSOR_;
run;
proc lifetest data = whas100 outs = logdata conftype = log;
time year*fstat(0);
run;
data logdata;
set logdata;
rename SDF_LCL = lcl_log SDF_UCL = ucl_log;
drop _CENSOR_;
run;
proc lifetest data = whas100 outs = logitdata conftype = logit;
time year*fstat(0);
run;
data logitdata;
set logitdata;
rename SDF_LCL = lcl_logit SDF_UCL = ucl_logit;
drop _CENSOR_;
run;
proc lifetest data = whas100 outs = asindata conftype = asinsqrt noprint;
time year*fstat(0);
run;
data asindata;
set asindata;
rename SDF_LCL = lcl_asin SDF_UCL = ucl_asin;
drop _CENSOR_;
run;
data all;
merge loglogdata logdata logitdata asindata;
by year;
run;
goptions reset = all;
legend1 label=none value=(h=1.5 font=swiss 'Est. Surv. Pr.' 'Log' 'Log-Log' 'Logit' 'Arcsine'
'lower logit' 'upper logit' 'lower arcsine' 'upper arcsine')
position=(bottom left inside) mode=protect across=2;
axis1 label=(a=90 'Estimated Survival Probability') order=(0 to 1 by .2) minor=none ;
axis2 label=('Survival Time (Years)') order=(0 to 8 by 1) minor=none;
symbol1 i=steplj line=1 color=black;
symbol2 i=steplj line=4 color=black;
symbol3 i=steplj line=14 color=black;
symbol4 i=steplj line=46 color=black;
symbol5 i=steplj line=41 color=black;
symbol6 i=steplj line=4 color=black;
symbol7 i=steplj line=14 color=black;
symbol8 i=steplj line=46 color=black;
symbol9 i=steplj line=41 color=black;
proc gplot data = all;
plot (survival lcl_log lcl_loglog lcl_logit lcl_asin)*year / overlay vaxis=axis1 haxis=axis2 legend=legend1;
plot2 (ucl_log ucl_loglog ucl_logit ucl_asin)*year /overlay vaxis=axis1 noaxis nolegend;
run;
quit;
* Table 2.5;
ods select quartiles;
proc lifetest data = whas100 conftype = log;
time year*fstat(0);
run;
* Table 2.7;
proc lifetest data = toydata;
time time*censor(0);
run;
data toy_temp;
set toydata;
if subject = 5 then censor = 0;
run;
proc lifetest data = toy_temp;
time time*censor(0);
run;
* Figure 2.9, Table 2.12;
proc lifetest data = whas100 plots = s cs = none;
time lenfol*fstat(0);
strata gender / test = (logrank wilcoxon peto tarone);
run;
data toydata;
input subject time censor;
datalines;
1 6 1
2 44 1
3 21 0
4 14 1
5 62 1
;
run;
* Table 2.2, Figure 2.1;
proc lifetest data = toydata plots = s cs = none;
time time*censor(0);
run;
data whas100;
set asa_data.whas100;
year = lenfol/365.25;
run;
* Figure 2.2, Table 2.3;
proc lifetest data = whas100 plots = (s ls h) cs = none;
time year*fstat(0);
label year = 'Survival Time (Years)';
run;
ods trace on;
proc lifetest data = whas100 plots = (s ls h) cs = none;
time year*fstat(0);
label year = 'Survival Time (Years)';
run;
ods trace off;
ods select productlimitestimates;
proc lifetest data = whas100 plots = (s ls h) cs = none;
time year*fstat(0);
label year = 'Survival Time (Years)';
run;
* Table 2.4, Figure 2.4;
proc lifetest data = whas100 plots = s method = lt;
time year*fstat(0);
run;
proc lifetest data = whas100 plots = s conftype = loglog confband = hw
outs = conf_data;
time year*fstat(0);
run;
proc print;run;
* Figure 2.7;
goptions reset = all;
legend1 label=none value=(h=1.5 font=swiss 'Est. Surv. Pr.' 'Pointwise 95%' 'Hall & Wellner Band')
position=(bottom center inside) mode=protect down=3 across=1;
axis1 label=(a=90 'Estimated Survival Probability') order=(0 to 1 by 0.1) minor=none;
axis2 label=('Survival Time (Years)') order=(0 to 8 by 1) minor=none;
symbol1 i=steplj color=black line=1;
symbol2 i=steplj color=black line=14;
symbol3 i=steplj color=black line=46;
symbol4 i=steplj color=black line=14;
symbol5 i=steplj color=black line=46;
proc gplot data = conf_data;
plot (survival sdf_ucl hw_ucl)*year / overlay vaxis=axis1 haxis=axis2 legend=legend1;
plot2 (sdf_lcl hw_lcl)*year /overlay vaxis=axis1 noaxis nolegend;
run;
quit;
* Figure 2.7;
proc sort data = whas100;
by year;
run;
proc lifetest data = whas100 outs = loglogdata conftype = loglog;
time year*fstat(0);
run;
proc contents; run;
proc print;run;
data loglogdata;
set loglogdata;
rename SDF_LCL = lcl_loglog SDF_UCL = ucl_loglog;
drop _CENSOR_;
run;
proc lifetest data = whas100 outs = logdata conftype = log;
time year*fstat(0);
run;
data logdata;
set logdata;
rename SDF_LCL = lcl_log SDF_UCL = ucl_log;
drop _CENSOR_;
run;
proc lifetest data = whas100 outs = logitdata conftype = logit;
time year*fstat(0);
run;
data logitdata;
set logitdata;
rename SDF_LCL = lcl_logit SDF_UCL = ucl_logit;
drop _CENSOR_;
run;
proc lifetest data = whas100 outs = asindata conftype = asinsqrt noprint;
time year*fstat(0);
run;
data asindata;
set asindata;
rename SDF_LCL = lcl_asin SDF_UCL = ucl_asin;
drop _CENSOR_;
run;
data all;
merge loglogdata logdata logitdata asindata;
by year;
run;
goptions reset = all;
legend1 label=none value=(h=1.5 font=swiss 'Est. Surv. Pr.' 'Log' 'Log-Log' 'Logit' 'Arcsine'
'lower logit' 'upper logit' 'lower arcsine' 'upper arcsine')
position=(bottom left inside) mode=protect across=2;
axis1 label=(a=90 'Estimated Survival Probability') order=(0 to 1 by .2) minor=none ;
axis2 label=('Survival Time (Years)') order=(0 to 8 by 1) minor=none;
symbol1 i=steplj line=1 color=black;
symbol2 i=steplj line=4 color=black;
symbol3 i=steplj line=14 color=black;
symbol4 i=steplj line=46 color=black;
symbol5 i=steplj line=41 color=black;
symbol6 i=steplj line=4 color=black;
symbol7 i=steplj line=14 color=black;
symbol8 i=steplj line=46 color=black;
symbol9 i=steplj line=41 color=black;
proc gplot data = all;
plot (survival lcl_log lcl_loglog lcl_logit lcl_asin)*year / overlay vaxis=axis1 haxis=axis2 legend=legend1;
plot2 (ucl_log ucl_loglog ucl_logit ucl_asin)*year /overlay vaxis=axis1 noaxis nolegend;
run;
quit;
* Table 2.5;
ods select quartiles;
proc lifetest data = whas100 conftype = log;
time year*fstat(0);
run;
* Table 2.7;
proc lifetest data = toydata;
time time*censor(0);
run;
data toy_temp;
set toydata;
if subject = 5 then censor = 0;
run;
proc lifetest data = toy_temp;
time time*censor(0);
run;
* Figure 2.9, Table 2.12;
proc lifetest data = whas100 plots = s cs = none;
time lenfol*fstat(0);
strata gender / test = (logrank wilcoxon peto tarone);
run;
SAS Survival Analysis I: data input
libname asa_data 'C:\ASA_SAS';
data asa_data.whas100;
infile 'C:\Applied_Survival_Analysis_Data\whas100.dat';
input id 1-5 +2 admitdate mmddyy10. +2 foldate mmddyy10. los lenfol fstat age gender bmi;
run;
/*
proc print data = asa_data.whas100;
title 'WHAS100';
format admitdate date9. foldate date11.;
run;
*/
data asa_data.actg320;
infile 'C:\Applied_Survival_Analysis_Data\actg320.dat';
input id time censor time_d tx txgrp strat2 sex raceth ivdrug hemophil
karnof cd4 priorzdv age;
run;
data asa_data.actg320ncc;
infile 'C:\Applied_Survival_Analysis_Data\actg320ncc.dat';
input set case id time tx age cd4;
run;
data asa_data.bpd;
infile 'C:\Applied_Survival_Analysis_Data\bpd.dat';
input id surfact ondays censor;
run;
data asa_data.comprisk;
infile 'C:\Applied_Survival_Analysis_Data\comprisk.dat';
input id age gender bmi time ev_typ;
run;
data asa_data.cvdrisk;
infile 'C:\Applied_Survival_Analysis_Data\cvdrisk.dat';
input id age bmi gender time ev_typ;
run;
data asa_data.FRTCS;
infile 'C:\Applied_Survival_Analysis_Data\FRTCS.dat';
input id age sex date0 date9. sbp0 dbp0 antihyp0 date1 date9. sbp1
dbp1 antihyp1 date2 date9. sbp2 dbp2 antihyp2 date_event date9. censor;
run;
/*
proc print data = asa_data.FRTCS;
format date0 date9. date1 date9. date_event date9.;
run;
*/
data asa_data.gbcs;
infile 'C:\Applied_Survival_Analysis_Data\gbcs.dat';
input id diagdate date11. recdate date11. deathdate date11. age menopause
hormone size grade nodes prog_recp estrg_recp rectime censrec
survtime censdead;
run;
/*
proc print data = asa_data.gbcs (obs = 5);
format diagdate date9. recdate date11. deathdate date11.;
run;
*/
data asa_data.GRACE1000;
infile 'C:\Applied_Survival_Analysis_Data\GRACE1000.dat';
input id days death revasc revascdays los age sysbp stchange;
run;
data asa_data.recur;
infile 'C:\Applied_Survival_Analysis_Data\recur.dat';
input id age treat time0 time1 censor event;
run;
data asa_data.uis;
infile 'C:\Applied_Survival_Analysis_Data\uis.dat';
input id age beck hercoc ivhx ndrugtx race treat site los time censor;
run;
data asa_data.whas500;
infile 'C:\Applied_Survival_Analysis_Data\whas500.dat';
input id age gender hr sysbp diasbp bmi cvd afb sho chf av3 miord mitype year
admitdate mmddyy12. disdate mmddyy12. fdate mmddyy12. los dstat lenfol fstat;
run;
/*
proc print data = asa_data.whas500;
format admitdate date9. disdate date9. fdate date9.;
run;
*/
data asa_data.whasncc;
infile 'C:\Applied_Survival_Analysis_Data\whasncc.dat';
input set case t lenfol fstat age sex bmi chf miord nr;
run;
libname asa_data 'C:\ASA_SAS';
proc format;
value status 1 = 'Dead'
0 = 'Alive';
value gender 0 = 'Male'
1 = 'Female';
run;
title '100 Subjects in the Worcester Heart Attack Study (WHAS100)';
proc print data = asa_data.whas100 label;
id id;
var admitdate foldate los lenfol fstat age gender bmi;
format admitdate mmddyy8. foldate mmddyy8. fstat status. gender gender.;
label id = 'ID'
admitdate = 'Admission Date'
foldate = 'Follow Up Date'
los = 'Length of Stay'
lenfol = 'Follow Up Time'
fstat = 'Vital Status'
age = 'Age at Admission'
gender = 'Gender'
bmi = 'BMI';
run;
data asa_data.whas100;
infile 'C:\Applied_Survival_Analysis_Data\whas100.dat';
input id 1-5 +2 admitdate mmddyy10. +2 foldate mmddyy10. los lenfol fstat age gender bmi;
run;
/*
proc print data = asa_data.whas100;
title 'WHAS100';
format admitdate date9. foldate date11.;
run;
*/
data asa_data.actg320;
infile 'C:\Applied_Survival_Analysis_Data\actg320.dat';
input id time censor time_d tx txgrp strat2 sex raceth ivdrug hemophil
karnof cd4 priorzdv age;
run;
data asa_data.actg320ncc;
infile 'C:\Applied_Survival_Analysis_Data\actg320ncc.dat';
input set case id time tx age cd4;
run;
data asa_data.bpd;
infile 'C:\Applied_Survival_Analysis_Data\bpd.dat';
input id surfact ondays censor;
run;
data asa_data.comprisk;
infile 'C:\Applied_Survival_Analysis_Data\comprisk.dat';
input id age gender bmi time ev_typ;
run;
data asa_data.cvdrisk;
infile 'C:\Applied_Survival_Analysis_Data\cvdrisk.dat';
input id age bmi gender time ev_typ;
run;
data asa_data.FRTCS;
infile 'C:\Applied_Survival_Analysis_Data\FRTCS.dat';
input id age sex date0 date9. sbp0 dbp0 antihyp0 date1 date9. sbp1
dbp1 antihyp1 date2 date9. sbp2 dbp2 antihyp2 date_event date9. censor;
run;
/*
proc print data = asa_data.FRTCS;
format date0 date9. date1 date9. date_event date9.;
run;
*/
data asa_data.gbcs;
infile 'C:\Applied_Survival_Analysis_Data\gbcs.dat';
input id diagdate date11. recdate date11. deathdate date11. age menopause
hormone size grade nodes prog_recp estrg_recp rectime censrec
survtime censdead;
run;
/*
proc print data = asa_data.gbcs (obs = 5);
format diagdate date9. recdate date11. deathdate date11.;
run;
*/
data asa_data.GRACE1000;
infile 'C:\Applied_Survival_Analysis_Data\GRACE1000.dat';
input id days death revasc revascdays los age sysbp stchange;
run;
data asa_data.recur;
infile 'C:\Applied_Survival_Analysis_Data\recur.dat';
input id age treat time0 time1 censor event;
run;
data asa_data.uis;
infile 'C:\Applied_Survival_Analysis_Data\uis.dat';
input id age beck hercoc ivhx ndrugtx race treat site los time censor;
run;
data asa_data.whas500;
infile 'C:\Applied_Survival_Analysis_Data\whas500.dat';
input id age gender hr sysbp diasbp bmi cvd afb sho chf av3 miord mitype year
admitdate mmddyy12. disdate mmddyy12. fdate mmddyy12. los dstat lenfol fstat;
run;
/*
proc print data = asa_data.whas500;
format admitdate date9. disdate date9. fdate date9.;
run;
*/
data asa_data.whasncc;
infile 'C:\Applied_Survival_Analysis_Data\whasncc.dat';
input set case t lenfol fstat age sex bmi chf miord nr;
run;
libname asa_data 'C:\ASA_SAS';
proc format;
value status 1 = 'Dead'
0 = 'Alive';
value gender 0 = 'Male'
1 = 'Female';
run;
title '100 Subjects in the Worcester Heart Attack Study (WHAS100)';
proc print data = asa_data.whas100 label;
id id;
var admitdate foldate los lenfol fstat age gender bmi;
format admitdate mmddyy8. foldate mmddyy8. fstat status. gender gender.;
label id = 'ID'
admitdate = 'Admission Date'
foldate = 'Follow Up Date'
los = 'Length of Stay'
lenfol = 'Follow Up Time'
fstat = 'Vital Status'
age = 'Age at Admission'
gender = 'Gender'
bmi = 'BMI';
run;
Subscribe to:
Posts (Atom)