Sunday, August 23, 2015

R cheatsheet: data type and data structure

# mode represents how an object is stored in memory
# (numeric, character, list and function)
#
# class represents its abstract type.


# mode function: return the mode of R objects
num.obj <- seq(from=1, to=10, by=2)
mode(num.obj)

logical.obj <- c(TRUE, TRUE, FALSE, FALSE, FALSE)
mode(logical.obj)

character.obj <- c("a", "b", "c")
mode(character.obj)

is.numeric(num.obj)

is.character(num.obj)
is.logical(logical.obj)
is.numeric(logical.obj)

mode(mean)
is.function(mean)

# class function: return the class information of R objects
class(num.obj)
class(logical.obj)
class(character.obj)

typeof(num.obj)

mat.obj <- matrix(runif(9), ncol = 3, nrow = 3)
mode(mat.obj)
class(mat.obj)
print(mat.obj)
print(num.obj)

# factor: for categorical data
character.obj
is.factor(character.obj)
is.character(character.obj)
factor.obj <- as.factor(character.obj)
is.factor(factor.obj)
mode(factor.obj)
class(factor.obj)

factor1 <- factor(c(1, 2, 3, 4, 5, 6, 7, 8, 9))
factor1
levels(factor1)
labels(factor1)
factor2 <- factor(c(1, 2, 3, 4, 5, 6, 7, 8, 9), labels=letters[1:9])
factor2
levels(factor2)
labels(factor2)

# data frame
var1 <- c(101, 102, 103, 104, 105)
var2 <- c(25, 22, 29, 34, 33)
var3 <- c("Non-Diabetic", "Diabetic", "Non-Diabetic", "Non-Diabetic", "Diabetic")
var4 <- factor(c("male", "male", "female", "female", "male"))
diab.dat <- data.frame(var1, var2, var3, var4)
diab.dat
summary(diab.dat)
class(diab.dat)
mode(diab.dat)
typeof(diab.dat)

# matrices
mat.diab <- as.matrix(diab.dat)
class(mat.diab)
mode(mat.diab)
typeof(mat.diab)

num.mat <- matrix(rnorm(9), nrow = 3, ncol = 3)
num.mat
class(num.mat)
mode(num.mat)
t(num.mat)
t(num.mat) %*% num.mat

# array: can be of any number of dimensions
mat.array = array(dim = c(2, 2, 3))
mat.array[,,1] <- rnorm(4)
mat.array[,,2] <- rnorm(4)
mat.array[,,3] <- rnorm(4)

mat.array

obj.list <- list(elem1=var1, elem2=var2, elem3=var3,
                 elem4=var4, elem5=diab.dat, elem6=mat.array)

obj.list[1]
obj.list[[1]]
obj.list[[1]][2]

# Date variables
as.Date("1970-01-01")
as.numeric(as.Date("1970-01-01"))

as.numeric(as.Date("1970-01-02"))
as.Date("Jan-01-1970", format="%b-%d-%Y")

Sys.time()
substr(as.character(Sys.time()), 1, 10)
substr(as.character(Sys.time()), 12, 19)

# number of seconds since 1 January 1970
unclass(Sys.time())

# POSIXct: the signed number of seconds since the beginning of 1970 as a numeric vector
# more convenient for including in dataframes

# POSIXlt: a named list of vectors
# representing seconds, minutes, hours, days, months and years

date <- as.POSIXlt(Sys.time())
date
date$wday
date$yday
mode(date)
class(date)
typeof(date)
unclass(date)
date2 <- unlist(unclass(date))
class(date2)
mode(date2)
typeof(date2)
length(date)
length(date2)

date <- as.POSIXct(Sys.time())
date
unclass(date)
unlist(unclass(date))
class(date)
mode(date)
typeof(date)
length(date)

date.str <- "2015-12-02"
date4 <- as.POSIXct(date.str, format="%Y-%m-%d")
datetime.str <- "2015-12-02 16:30:00"
date5 <- as.POSIXct(datetime.str, format="%Y-%m-%d %H:%M:%S", tz="EST")
date5

Monday, December 2, 2013

SAS Survival Analysis IV: Cox Model with Time-dependet Variable

libname asa_data 'C:\ASA_SAS';
* Time-dependent variable;
ods output parameterestimates = temp1;
proc phreg data = asa_data.uis;
model time*censor(0) = treat off_trt tot;
if time <= los then off_trt = 0;
else off_trt = 1;
tot = treat*off_trt;
run;
* Compared with the following code;
data temp2;
set asa_data.uis;
if time <= los then off_trt = 0;
else off_trt = 1;
tot = treat*off_trt;
run;
proc phreg data = temp2;
model time*censor(0) = treat off_trt tot;
run;

Wednesday, November 27, 2013

SAS Survival Analysis III: Cox model

libname asa_data 'C:\ASA_SAS';
ods trace on;
proc phreg data = asa_data.whas100;
model lenfol*fstat(0) = gender;
run;
ods trace off;
* Table 3.2;
ods output ParameterEstimates = temp1;
proc phreg data = asa_data.whas100;
model lenfol*fstat(0) = gender;
run;
libname asa_data 'C:\ASA_SAS';
ods trace on;
proc phreg data = asa_data.whas100;
model lenfol*fstat(0) = gender;
run;
ods trace off;
* Table 4.2;
ods output ParameterEstimates = temp1;
proc phreg data = asa_data.whas100;
model lenfol*fstat(0) = gender / ties = breslow;
run;
proc print data = temp1;run;
proc contents data = temp1;run;
data table4p2;
set temp1;
z = sqrt(ChiSq);
z2 = Estimate / StdErr;
lowerb = estimate - 1.96*stderr;
upperb = estimate + 1.96*stderr;
run;

proc print data = table4p2 label noobs;
var Parameter Estimate stderr z z2 Probchisq lowerb upperb;
format Estimate z z2 Probchisq lowerb upperb f8.3;
format stderr f8.4;
label Parameter = 'Variable'
   Estimate = 'Coeff.'
      stderr = 'Std.Err.'
   Probchisq = 'p>|z|'
   lowerb = 'Conf Lower'
   upperb = 'Conf Upper';
run;

* Table 4.4;
data temp2;
set asa_data.whas100;
age_2 = 0;
age_3 = 0;
age_4 = 0;
if 60 <= age <= 69 then age_2 = 1;
else if 70 <= age <= 79 then age_3 = 1;
else if 80 <= age then age_4 = 1;  
run;
* Table 4.5 & Table 4.7;
proc phreg data = temp2;
model lenfol*fstat(0) = age_2 age_3 age_4 / ties = breslow risklimits covb;
run;

data temp3;
set asa_data.whas100;
age_2 = 0;
age_3 = 0;
age_4 = 0;
if age < 60 then do;
age_2 = -1;
age_3 = -1;
age_4 = -1;
end;
if 60 <= age <= 69 then age_2 = 1;
if 60 <= age <= 69 then age_2 = 1;
else if 70 <= age <= 79 then age_3 = 1;
else if 80 <= age then age_4 = 1;  
run;
* Table 4.8;
proc phreg data = temp3;
model lenfol*fstat(0) = age_2 age_3 age_4 / ties = breslow risklimits covb;
run;

* Table 4.13;
* Crude Model;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = gender/ risklimits;
run;
* Adjusted Model;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = gender age/ risklimits;
run;
* Interaction Model;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = gender age gender*age/ risklimits; *no hazard ratio in this case;
run;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = gender age ga/ risklimits;
ga = gender*age;
output out = temp4 xbeta = xbeta;
run;
proc print data = temp4; run;
proc gplot data = temp4;
plot xbeta*age = gender;
run;
* Figure 4.5;
data temp1;
set asa_data.gbcs;
time = rectime/30;
run;
proc phreg data = temp1;
model time*censrec(0) = hormone;
output out = temp2 survival = survival;
run;
proc sort data = temp2;
by hormone time;
run;
proc gplot data = temp2;
plot survival*time = hormone;
run;
quit;

libname asa_data 'C:\ASA_SAS';
* Figure 5.2a;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = age hr diasbp gender chf;
id id;
output out = fig52a resmart = mart;
run;
proc sort data = asa_data.whas500;
by id;
run;
proc sort data = fig52a;
by id;
run;
data fig52a;
merge fig52a asa_data.whas500;
by id;
run;
proc loess data = fig52a;
model mart = bmi;
ods output outputstatistics = temp1;
run;
proc sort data = temp1;
by bmi;
run;
proc contents data = temp1; run;
goptions reset = all;
symbol1 v = dot i = none;
symbol2 v = none i = join;
proc gplot data = temp1;
plot DepVar*bmi=1 pred*bmi=2 / overlay;
*plot pred*bmi=2;
run;
quit;

* Stepwise ;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = age chf hr diasbp bmi gender mitype miord sysbp cvd afb
/ selection = forward slentry = 0.25 slstay = 0.8;
run;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = age chf hr diasbp bmi gender mitype miord sysbp cvd afb
/ selection = stepwise slentry = 0.25 slstay = 0.8 details;
run;
* Best Subset;
proc phreg data = asa_data.whas500;
model lenfol*fstat(0) = age chf hr diasbp bmi gender mitype miord sysbp cvd afb
/ selection = score best = 3 details;
run;

Monday, November 18, 2013

SAS Survival Analysis II: descriptive analysis

libname asa_data 'C:\ASA_SAS';
data toydata;
input subject time censor;
datalines;
1 6 1
2 44 1
3 21 0
4 14 1
5 62 1
;
run;
* Table 2.2, Figure 2.1;
proc lifetest data = toydata plots = s cs = none;
time time*censor(0);
run;
data whas100;
set asa_data.whas100;
year = lenfol/365.25;
run;
* Figure 2.2, Table 2.3;
proc lifetest data = whas100 plots = (s ls h) cs = none;
time year*fstat(0);
label year = 'Survival Time (Years)';
run;
ods trace on;
proc lifetest data = whas100 plots = (s ls h) cs = none;
time year*fstat(0);
label year = 'Survival Time (Years)';
run;
ods trace off;
ods select productlimitestimates;
proc lifetest data = whas100 plots = (s ls h) cs = none;
time year*fstat(0);
label year = 'Survival Time (Years)';
run;
* Table 2.4, Figure 2.4;
proc lifetest data = whas100 plots = s method = lt;
time year*fstat(0);
run;

proc lifetest data = whas100 plots = s conftype = loglog confband = hw
outs = conf_data;
time year*fstat(0);
run;
proc print;run;
* Figure 2.7;
goptions reset = all;
legend1 label=none value=(h=1.5 font=swiss 'Est. Surv. Pr.' 'Pointwise 95%' 'Hall & Wellner Band')
        position=(bottom center inside) mode=protect down=3 across=1;
axis1 label=(a=90 'Estimated Survival Probability') order=(0 to 1 by 0.1) minor=none;
axis2 label=('Survival Time (Years)') order=(0 to 8 by 1) minor=none;
symbol1 i=steplj color=black line=1;
symbol2 i=steplj color=black line=14;
symbol3 i=steplj color=black line=46;
symbol4 i=steplj color=black line=14;
symbol5 i=steplj color=black line=46;
proc gplot data = conf_data;
plot (survival sdf_ucl hw_ucl)*year / overlay vaxis=axis1 haxis=axis2 legend=legend1;
plot2 (sdf_lcl hw_lcl)*year /overlay vaxis=axis1 noaxis nolegend;
run;
quit;

* Figure 2.7;
proc sort data = whas100;
by year;
run;
proc lifetest data = whas100 outs = loglogdata conftype = loglog;
time year*fstat(0);
run;
proc contents; run;
proc print;run;
data loglogdata;
set loglogdata;
rename SDF_LCL = lcl_loglog SDF_UCL = ucl_loglog;
drop _CENSOR_;
run;
proc lifetest data = whas100 outs = logdata conftype = log;
time year*fstat(0);
run;
data logdata;
set logdata;
rename SDF_LCL = lcl_log SDF_UCL = ucl_log;
drop _CENSOR_;
run;
proc lifetest data = whas100 outs = logitdata conftype = logit;
time year*fstat(0);
run;
data logitdata;
set logitdata;
rename SDF_LCL = lcl_logit SDF_UCL = ucl_logit;
drop _CENSOR_;
run;
proc lifetest data = whas100 outs = asindata conftype = asinsqrt noprint;
time year*fstat(0);
run;
data asindata;
set asindata;
rename SDF_LCL = lcl_asin SDF_UCL = ucl_asin;
drop _CENSOR_;
run;
data all;
merge loglogdata logdata logitdata asindata;
by year;
run;
goptions reset = all;
legend1 label=none value=(h=1.5 font=swiss 'Est. Surv. Pr.' 'Log' 'Log-Log' 'Logit' 'Arcsine'
        'lower logit' 'upper logit' 'lower arcsine' 'upper arcsine')
        position=(bottom left inside) mode=protect across=2;
axis1 label=(a=90 'Estimated Survival Probability') order=(0 to 1 by .2) minor=none ;
axis2 label=('Survival Time (Years)') order=(0 to 8 by 1) minor=none;
symbol1 i=steplj line=1 color=black;
symbol2 i=steplj line=4 color=black;
symbol3 i=steplj line=14 color=black;
symbol4 i=steplj line=46 color=black;
symbol5 i=steplj line=41 color=black;
symbol6 i=steplj line=4 color=black;
symbol7 i=steplj line=14 color=black;
symbol8 i=steplj line=46 color=black;
symbol9 i=steplj line=41 color=black;
proc gplot data = all;
plot (survival lcl_log lcl_loglog lcl_logit lcl_asin)*year / overlay vaxis=axis1 haxis=axis2 legend=legend1;
plot2 (ucl_log ucl_loglog ucl_logit ucl_asin)*year /overlay vaxis=axis1 noaxis nolegend;
run;
quit;
* Table 2.5;
ods select quartiles;
proc lifetest data = whas100 conftype = log;
time year*fstat(0);
run;
* Table 2.7;
proc lifetest data = toydata;
time time*censor(0);
run;
data toy_temp;
set toydata;
if subject = 5 then censor = 0;
run;
proc lifetest data = toy_temp;
time time*censor(0);
run;
* Figure 2.9, Table 2.12;
proc lifetest data = whas100 plots = s cs = none;
time lenfol*fstat(0);
strata gender / test = (logrank wilcoxon peto tarone);
run;

SAS Survival Analysis I: data input

libname asa_data 'C:\ASA_SAS';
data asa_data.whas100;
 infile 'C:\Applied_Survival_Analysis_Data\whas100.dat';
 input id 1-5 +2 admitdate mmddyy10. +2 foldate mmddyy10. los lenfol fstat age gender bmi;
run;
/*
proc print data = asa_data.whas100;
 title 'WHAS100';
 format admitdate date9. foldate date11.;
run;
*/
data asa_data.actg320;
 infile 'C:\Applied_Survival_Analysis_Data\actg320.dat';
 input id time censor time_d tx txgrp strat2 sex raceth ivdrug hemophil
   karnof cd4 priorzdv age;
run;
data asa_data.actg320ncc;
 infile 'C:\Applied_Survival_Analysis_Data\actg320ncc.dat';
 input set case id time tx age cd4;
run;
data asa_data.bpd;
 infile 'C:\Applied_Survival_Analysis_Data\bpd.dat';
 input id surfact ondays censor;
run;
data asa_data.comprisk;
 infile 'C:\Applied_Survival_Analysis_Data\comprisk.dat';
 input id age gender bmi time ev_typ;
run;
data asa_data.cvdrisk;
 infile 'C:\Applied_Survival_Analysis_Data\cvdrisk.dat';
 input id age bmi gender time ev_typ;
run;
data asa_data.FRTCS;
 infile 'C:\Applied_Survival_Analysis_Data\FRTCS.dat';
 input id age sex date0 date9. sbp0 dbp0 antihyp0 date1 date9. sbp1
   dbp1 antihyp1 date2 date9. sbp2 dbp2 antihyp2 date_event date9. censor;
run;
/*
proc print data = asa_data.FRTCS;
 format date0 date9. date1 date9. date_event date9.;
run;
*/
data asa_data.gbcs;
 infile 'C:\Applied_Survival_Analysis_Data\gbcs.dat';
 input id diagdate date11. recdate date11. deathdate date11. age menopause
   hormone size grade nodes prog_recp estrg_recp rectime censrec
   survtime censdead;
run;
/*
proc print data = asa_data.gbcs (obs = 5);
 format diagdate date9. recdate date11. deathdate date11.;
run;
*/
data asa_data.GRACE1000;
 infile 'C:\Applied_Survival_Analysis_Data\GRACE1000.dat';
 input id days death revasc revascdays los age sysbp stchange;
run;
data asa_data.recur;
 infile 'C:\Applied_Survival_Analysis_Data\recur.dat';
 input id age treat time0 time1 censor event;
run;
data asa_data.uis;
 infile 'C:\Applied_Survival_Analysis_Data\uis.dat';
 input id age beck hercoc ivhx ndrugtx race treat site los time censor;
run;
data asa_data.whas500;
 infile 'C:\Applied_Survival_Analysis_Data\whas500.dat';
 input id age gender hr sysbp diasbp bmi cvd afb sho chf av3 miord mitype year
    admitdate mmddyy12. disdate mmddyy12. fdate mmddyy12. los dstat lenfol fstat;
run;
/*
proc print data = asa_data.whas500;
 format admitdate date9. disdate date9. fdate date9.;
run;
*/
data asa_data.whasncc;
 infile 'C:\Applied_Survival_Analysis_Data\whasncc.dat';
 input set case t lenfol fstat age sex bmi chf miord nr;
run;
libname asa_data 'C:\ASA_SAS';
proc format;
 value status 1 = 'Dead'
     0 = 'Alive';
 value gender 0 = 'Male'
     1 = 'Female';
run;
title '100 Subjects in the Worcester Heart Attack Study (WHAS100)';
proc print data = asa_data.whas100 label;
 id id;
 var admitdate foldate los lenfol fstat age gender bmi;
 format admitdate mmddyy8. foldate mmddyy8. fstat status. gender gender.;
 label id = 'ID'
    admitdate = 'Admission Date'
    foldate = 'Follow Up Date'
    los = 'Length of Stay'
    lenfol = 'Follow Up Time'
    fstat = 'Vital Status'
    age = 'Age at Admission'
    gender = 'Gender'
    bmi = 'BMI';
run;

Monday, September 30, 2013

Time Series II: AR, MA, ARMA, ARIMA

rm(list=ls())
# white noise
set.seed(1)
w <- rnorm(100)
plot(w, type = "l")
x <- seq(-3,3, length = 1000)
hist(rnorm(100), prob = T); points(x, dnorm(x), type = "l")
set.seed(2)
acf(rnorm(100))
# random walk
set.seed(2)
x <- w <- rnorm(1000)
for (t in 2:1000) x[t] <- x[t - 1] + w[t]
plot(x, type = "l")
acf(x)
acf(diff(x))
www <- "http://elena.aut.ac.nz/~pcowpert/ts/HP.txt"
HP.dat <- read.table(www, header = T) ; attach(HP.dat)
plot (as.ts(Price))
DP <- diff(Price) ; plot (as.ts(DP)) ; acf (DP)
mean(DP) + c(-2, 2) * sd(DP)/sqrt(length(DP))
# AR(1)
rho <- function(k, alpha) alpha^k
layout(1:2)
plot(0:10, rho(0:10, 0.7), type = "b")
plot(0:10, rho(0:10, -0.7), type = "b")
layout(1:1)
set.seed(1)
x <- w <- rnorm(100)
for (t in 2:100) x[t] <- 0.7 * x[t - 1] + w[t]
plot(x, type = "l")
acf(x)
pacf(x)
x.ar <- ar(x, method = "mle")
x.ar$order
x.ar$ar
x.ar$ar + c(-2, 2) * sqrt(x.ar$asy.var)
www = "http://elena.aut.ac.nz/~pcowpert/ts/pounds_nz.dat"
Z = read.table(www, header = T)
Z.ts = ts(Z, st = 1991, fr = 4)
Z.ar <- ar(Z.ts)
mean(Z.ts)
Z.ar$order
Z.ar$ar
Z.ar$ar + c(-2, 2) * sqrt(Z.ar$asy.var)
acf(Z.ar$res[-1])
www = "http://elena.aut.ac.nz/~pcowpert/ts/global.dat"
Global = scan(www)
Global.ts = ts(Global, st = c(1856, 1), end = c(2005, 12), fr = 12)
Global.ar <- ar(aggregate(Global.ts, FUN = mean), method = "mle")
mean(aggregate(Global.ts, FUN = mean))
Global.ar$order
Global.ar$ar
acf(Global.ar$res[-(1:Global.ar$order)], lag = 50)
# MA: correlogram and simulation
# correlogram
rho = function(k, beta){
  q = length(beta) - 1
  if (k > q) ACF = 0
  else{
    s1 = 0
    s2 = 0
    for (i in 1:(q - k + 1)){
      s1 = s1 + beta[i] * beta[i + k]
    }
    for (i in (1:(q + 1))){
      s2 = s2 + beta[i]^2
    }
    ACF = s1/s2
  }
  ACF 
}
 
beta <- c(1, 0.7, 0.5, 0.2) 
rho.k <- rep(1, 10)
for (k in 1:10) rho.k[k] <- rho(k, beta)
plot(0:10, c(1, rho.k), pch = 4, ylab = expression(rho[k])) 
abline(0, 0) 
 
# simulation of MA(3) 
set.seed(1) 
b = c(0.8, 0.6, 0.4) 
x = w = rnorm(1000)
for (t in c(4:1000)){
  for (j in c(1:3)){
    x[t] = x[t] + b[j] * w[t - j]
  }
}
plot(x, type = "l")
acf(x)
x.ma <- arima(x, order = c(0, 0, 3))
x.ma
x.ma <- arima(x, order = c(0, 0, 3), include.mean=FALSE)
x.ma
# fit MA model
www = "http://elena.aut.ac.nz/~pcowpert/ts/pounds_nz.dat"
x <- read.table(www, header = T)
x.ts <- ts(x, st = 1991, fr = 4)
x.ma <- arima(x.ts, order = c(0, 0, 1))
x.ma
# ARMA simulation
set.seed(1)
x = arima.sim(n = 10000, list(ar = -0.6, ma = 0.5))
coef(arima(x, order = c(1, 0, 1)))
AIC(arima(x, order = c(1, 0, 1)))
acf(resid(arima(x, order = c(1, 0, 1))))
www = "http://elena.aut.ac.nz/~pcowpert/ts/cbe.dat"
CBE = read.table(www, header = T)
Elec.ts = ts(CBE[,3], start = 1958, freq = 12)
Time = 1:length(Elec.ts)
Imth = cycle(Elec.ts)
Elec.lm = lm(log(Elec.ts) ~ Time + I(Time^2) + factor(Imth))
summary(Elec.lm)
acf(resid(Elec.lm))
# train ARMA model
best.order = c(0, 0, 0)
best.aic = Inf
for (i in c(0:2)){
  for (j in c(0:2)){
    fit.aic = AIC(arima(resid(Elec.lm), order = c(i, 0, j)))
    if (fit.aic < best.aic){
      best.order = c(i, 0, j)
      best.arma = arima(resid(Elec.lm), order = best.order)
      best.aic = fit.aic           
    }
  }
}
best.order
acf(resid(best.arma))
# prediction
new.time <- seq(length(Elec.ts), length = 36)
new.data <- data.frame(Time = new.time, Imth = rep(1:12, 3))
predict.lm <- predict(Elec.lm, new.data)
predict.arma <- predict(best.arma, n.ahead = 36)
elec.pred <- ts(exp(predict.lm + predict.arma$pred), start = 1991, freq = 12)
ts.plot(cbind(Elec.ts, elec.pred), lty = 1:2)
www = "http://elena.aut.ac.nz/~pcowpert/ts/wave.dat"
wave.dat <- read.table(www, header = T)
attach(wave.dat)
layout(1:3)
plot(as.ts(waveht), ylab = 'Wave height')
acf(waveht)
pacf (waveht)
wave.arma <- arima(waveht, order = c(4,0,4))
acf(wave.arma$res[-(1:4)])
pacf(wave.arma$res[-(1:4)])
hist(wave.arma$res[-(1:4)], xlab='height / mm', main='')
# train Non-seasonal ARIMA model
www = "http://elena.aut.ac.nz/~pcowpert/ts/cbe.dat"
CBE = read.table(www, header = T)
Elec.ts = ts(CBE[, 3], start = 1958, freq = 12)
layout(c(1, 1, 2, 3))
plot(Elec.ts)
plot(diff(Elec.ts))
plot(diff(log(Elec.ts)))
layout(c(1, 1))
# ARIMA(1, 1, 1) simulation and inference
set.seed(1)
x = w = rnorm(1000)
for (i in 3:1000){
  x[i] = 0.5 * x[i - 1] + x[i - 1] - 0.5 * x[i - 2] + w[i] + 0.3 * w[i - 1]
}
plot(x, type = "l")
arima(x = x, order  = c(1, 1, 1))
x = arima.sim(model = list(order = c(1, 1, 1), ar = 0.5, ma = 0.3), n = 1000)
arima(x = x, order  = c(1, 1, 1))
# IMA(1, 1)
Beer.ts = ts(CBE[, 2], start = 1958, freq = 12)
Beer.ima = arima(Beer.ts, order = c(0, 1, 1))
Beer.ima
acf(resid(Beer.ima))
# prediction
Beer.1991 = predict(Beer.ima, n.ahead = 12)
plot(Beer.1991$pred)
plot(Beer.1991$se)

Time Series I: Basics

rm(list=ls())
Fact = function(n) if (n == 1) 1 else n * Fact(n - 1)
Fact(5)
data(AirPassengers)
AP = AirPassengers
AP
class(AP)
start(AP)
end(AP)
frequency(AP)
summary(AP)
plot(AP, ylab = "Passengers (1000's)")
layout(1:2)
plot(aggregate(AP))
boxplot(AP ~ cycle(AP))
www = "http://elena.aut.ac.nz/~pcowpert/ts/Maine.dat"
Maine.month = read.table(www, header = TRUE)
attach(Maine.month)
class(Maine.month)
Maine.month.ts = ts(unemploy, start = c(1996, 1), freq = 12)
Maine.annual.ts = aggregate(Maine.month.ts)/12
layout(1:2)
plot(Maine.month.ts, ylab = "unemployed (%)")
plot(Maine.annual.ts, ylab = "unemployed (%)")
Maine.Feb = window(Maine.month.ts, start = c(1996,2), freq = TRUE)
plot(Maine.Feb)
Maine.Aug = window(Maine.month.ts, start = c(1996,8), freq = TRUE)
plot(Maine.Aug)
Feb.ratio = mean(Maine.Feb) / mean(Maine.month.ts)
Aug.ratio = mean(Maine.Aug) / mean(Maine.month.ts)
Feb.ratio
Aug.ratio
layout(1:1)
www = "http://elena.aut.ac.nz/~pcowpert/ts/USunemp.dat"
US.month = read.table(www, header = T)
attach(US.month)
US.month.ts = ts(USun, start=c(1996,1), end=c(2006,10), freq = 12)
plot(US.month.ts, ylab = "unemployed (%)")
# Multiple time series
www = "http://elena.aut.ac.nz/~pcowpert/ts/cbe.dat"
CBE = read.table(www, header = T)
CBE[1:4, ]
class(CBE)
Elec.ts = ts(CBE[, 3], start = 1958, freq = 12)
Beer.ts = ts(CBE[, 2], start = 1958, freq = 12)
Choc.ts = ts(CBE[, 1], start = 1958, freq = 12)
plot(cbind(Elec.ts, Beer.ts, Choc.ts))
length(AP)
length(Elec.ts)
AP.elec = ts.intersect(AP, Elec.ts)
class(AP.elec)
AP.elec
AP
Elec.ts
AP = AP.elec[,1]
Elec = AP.elec[,2]
layout(1:2)
plot(AP, main = "", ylab = "Air passengers / 1000's")
plot(Elec, main = "", ylab = "Electricity production / MkWh")
layout(1:1)
plot(as.vector(AP), as.vector(Elec),
     xlab = "Air passengers / 1000's",
     ylab = "Electricity production / MWh")
abline(reg = lm(Elec ~ AP))
cor(AP, Elec)
www = "http://elena.aut.ac.nz/~pcowpert/ts/pounds_nz.dat"
Z = read.table(www, header = T)
Z.ts = ts(Z, st = 1991, fr = 4)
plot(Z.ts, xlab = "time / years",
     ylab = "Quarterly exchange rate in $NZ / pound")
Z.92.96 = window(Z.ts, start = c(1992, 1), end = c(1996, 1))
Z.96.98 = window(Z.ts, start = c(1996, 1), end = c(1998, 1))
layout (1:2)
plot(Z.92.96, ylab = "Exchange rate in $NZ/pound",
     xlab = "Time (years)" )
plot(Z.96.98, ylab = "Exchange rate in $NZ/pound",
     xlab = "Time (years)" )

www = "http://elena.aut.ac.nz/~pcowpert/ts/global.dat"
Global = scan(www)
Global.ts = ts(Global, st = c(1856, 1), end = c(2005, 12), fr = 12)
Global.annual = aggregate(Global.ts, FUN = mean)
plot(Global.ts)
plot(Global.annual)
layout(1:1)
New.series = window(Global.ts, start=c(1970, 1), end=c(2005, 12))
New.time = time(New.series)
plot(New.series)
abline(reg=lm(New.series ~ New.time))

# decomposition in R
plot(decompose(Elec.ts))
Elec.decom = decompose(Elec.ts, type = "mult")
plot(Elec.decom)
Trend = Elec.decom$trend
Seasonal = Elec.decom$seasonal
ts.plot(cbind(Trend, Trend * Seasonal), lty = 1:2)

www = "http://elena.aut.ac.nz/~pcowpert/ts/Herald.dat"
Herald.dat = read.table(www, header = T)
attach (Herald.dat)
x = CO; y = Benzoa; n = length(x)
sum((x - mean(x))*(y - mean(y))) / (n - 1)
mean((x - mean(x)) * (y - mean(y)))
cov(x, y)
cov(x,y) / (sd(x)*sd(y))
cor(x,y)
www = "http://elena.aut.ac.nz/~pcowpert/ts/wave.dat"
wave.dat = read.table (www, header=T)
attach(wave.dat)
plot(ts(waveht))
plot(ts(waveht[1:60]))
# acf
acf(waveht)$acf[2]
acf(waveht)$acf[1]
# autocovariance
acf(waveht, type = c("covariance"))$acf[2]
# correlogram
acf(waveht)
acf(AirPassengers)
data(AirPassengers)
AP = AirPassengers
AP.decom = decompose(AP, "multiplicative")
plot(ts(AP.decom$random[7:138]))
acf(AP.decom$random[7:138])
sd(AP[7:138])
sd(AP[7:138] - AP.decom$trend[7:138])
sd(AP.decom$random[7:138])
www = "http://elena.aut.ac.nz/~pcowpert/ts/Fontdsdt.dat"
Fontdsdt.dat = read.table(www, header=T)
attach(Fontdsdt.dat)
plot(ts(adflow), ylab = 'adflow')
acf(adflow, xlab = 'lag (months)', main="")
Build.dat = read.table("http://elena.aut.ac.nz/~pcowpert/ts/ApprovActiv.dat", header=T)
attach(Build.dat)
App.ts = ts(Approvals, start = c(1996,1), freq=4)
Act.ts = ts(Activity, start = c(1996,1), freq=4)
ts.plot(App.ts, Act.ts, lty = c(1,3))

# correlogram and cross-correlogram
acf(ts.union(App.ts, Act.ts))
print(ts.union(App.ts, Act.ts))
print(acf(ts.union(App.ts, Act.ts)))

# Bass model
T79 = 1:10
Tdelt = (1:100) / 10
Sales = c(840,1470,2110,4000, 7590, 10950, 10530, 9470, 7790, 5890)
Cusales = cumsum(Sales)
Bass.nls = nls(Sales ~ M * ( ((P+Q)^2 / P) * exp(-(P+Q) * T79) ) /
                  (1+(Q/P)*exp(-(P+Q)*T79))^2, start = list(M=60630, P=0.03, Q=0.38))
summary(Bass.nls)
Bcoef = coef(Bass.nls)
m = Bcoef[1]
p = Bcoef[2]
q = Bcoef[3]
ngete = exp(-(p+q) * Tdelt)
Bpdf = m * ( (p+q)^2 / p ) * ngete / (1 + (q/p) * ngete)^2
plot(Tdelt, Bpdf, xlab = "Year from 1979",
     ylab = "Sales per year", type='l')
points(T79, Sales)
Bcdf = m * (1 - ngete)/(1 + (q/p)*ngete)
plot(Tdelt, Bcdf, xlab = "Year from 1979",
     ylab = "Cumulative sales", type='l')
points(T79, Cusales)

?HoltWinters
require(graphics)
## Seasonal Holt-Winters
(m <- HoltWinters(co2))
plot(m)
plot(fitted(m))
(m <- HoltWinters(AirPassengers, seasonal = "mult"))
plot(m)
## Non-Seasonal Holt-Winters
x <- uspop + rnorm(uspop, sd = 5)
m <- HoltWinters(x, gamma = FALSE)
plot(m)
## Exponential Smoothing
m2 <- HoltWinters(x, gamma = FALSE, beta = FALSE)
lines(fitted(m2)[,1], col = 3)