Wednesday, September 23, 2015

Trend and Seasonality

rm(list=ls())

setwd("//Users//yangzou//Desktop//R_Study//BrockwellDavis")

strikes = scan(file = "STRIKES.DAT")
strikes = ts(data = strikes, start = 1951, frequency = 1)
plot(strikes)
points(strikes, pch = 16)

# exponential smoothing by arima
exsmooth = arima(strikes, order = c(0, 1, 1))
exsmooth

strikes.ex = strikes - exsmooth$residuals
plot(strikes.ex, ylab = "STRICK")
points(strikes, pch = 16)

exsmooth = arima(strikes, order = c(0, 1, 1), fixed = -0.6,
                 include.mean = FALSE)
exsmooth

strikes.ex = strikes - exsmooth$residuals
plot(strikes.ex, ylab = "STRICK")
points(strikes, pch = 16)

# Holt-Winters
deaths = scan(file = "DEATHS.DAT")
deaths = ts(data = deaths, start = c(1973, 1), frequency = 12)
plot(deaths)
points(deaths, pch = 16)

deaths.hw = HoltWinters(deaths, seasonal = "additive")
plot(deaths.hw)
deaths.hw$fitted

plot(deaths.hw$fitted[,3], ylab = "trend")
plot(deaths.hw$fitted[,4], ylab = "seosonality")
points(deaths.hw$fitted[,4], pch = 16)

deaths.hw = HoltWinters(deaths, seasonal = "additive",
                        alpha = 0.2, beta = 0.2, gamma = 0.2)
plot(deaths.hw)
deaths.hw$fitted

plot(deaths.hw$fitted[,3], ylab = "trend")
plot(deaths.hw$fitted[,4], ylab = "seosonality")
points(deaths.hw$fitted[,4], pch = 16)

Sunday, September 20, 2015

linear time series models

rm(list=ls())
library(ggplot2)
library(fBasics)
library(fUnitRoots)

setwd("//Users//yangzou//Desktop//R_Study//tsay_3")

# IBM stock data
da = read.table("m-ibm3dx2608.txt", header = T)
head(da)
summary(da)
da$date = as.POSIXct(as.character(da$date), format="%Y%m%d")

basicStats(da$ibmrtn)

ggplot(da, aes(date, ibmrtn)) + geom_line() +
  xlab("Time") + ylab("IBM monthly returns") +
  scale_x_datetime(breaks = '10 years')

# create a time series object
ibm = ts(da$ibmrtn, frequency = 12, start = c(1926, 1))
plot(ibm, type='l')
acf(ibm, lag = 100)

# simple return
sibm = da$ibmrtn
# log return
libm = log(da$ibmrtn + 1)

par(mfcol=c(2,1))
acf(sibm, lag = 100)
acf(libm, lag = 100)
par(mfcol=c(1,1))

par(mfcol=c(2,1))
acf(sibm, lag = 100, ylim=c(-0.2, 0.2))
acf(libm, lag = 100, ylim=c(-0.2, 0.2))
par(mfcol=c(1,1))

Box.test(sibm, lag = 5, type = 'Ljung-Box')
box.obj = Box.test(sibm, lag = 5, type = 'Ljung-Box')
names(box.obj)
box.obj$statistic
Box.test(sibm, lag = 5, type = 'Box-Pierce')
Box.test(sibm, lag = 10, type = 'Ljung-Box')

# CRSP data
crsp = scan(file="m-ew6299.txt")
crsp1 = ts(crsp, frequency = 12, start = c(1962, 1))
plot(crsp1, type='l')
acf(crsp1, lag = 100)

# Quarterly growth of US real GNP
gnp = read.table("q-gnp4791.txt", header = F)
gnp1 = ts(gnp, frequency = 4, start = c(1947, 2))
plot(gnp1, ylab="Growth")
points(gnp1, pch='*')
abline(h = 0)
acf(gnp, lag=100)

# method = c("yule-walker", "burg", "ols", "mle", "yw")
m1 = ar(gnp[,1], method = "mle")
m1$order
m1$ar
m1$aic
m1$method
m1$n.used

m2 = arima(gnp, order=c(3, 0, 0))
m2
m2$coef
# intercept is the mean of the series
# the constant is obtained below:
(1 - sum(m2$coef[1:length(m2$coef)-1]))*(m2$coef[length(m2$coef)][[1]])
# residual standard error
sqrt(m2$sigma2)

# Characteristic equation
p1 = c(1, -m2$coef[1:length(m2$coef)-1])
p1
# find solutions
roots = polyroot(p1)
roots
# compute the absolute values of the solutions
Mod(roots)
# compute average length of business cycles
roots[1]
Re(roots[1])
Im(roots[1])
k=2*pi/acos(Re(roots[1])/Mod(roots[1]))
k/4

pacf = pacf(gnp[,1])
pacf$acf

# vm
vm = read.table("m-ibm3dx2608.txt", header = T)[,3]
# the average annual simple gross returns
t1 = prod(vm + 1)
t1^(12/length(vm)) - 1
# AR(3) model
m1 = arima(vm, order = c(3, 0, 0))
m1
mean(vm)
m1$coef[4]
# compute the intercept
(1 - sum(m1$coef[seq(1, 3)]))*mean(vm)
# compute standard error of residuals
sqrt(m1$sigma2)
Box.test(m1$residuals, lag=12, type="Ljung-Box")
# p-value using 9 degrees of freedom
pv = 1 - pchisq(16.352, 9)
pv
# to fix the AR(2) coef to zero
m2 = arima(vm, order = c(3, 0, 0), fixed = c(NA, 0, NA, NA))
m2
m2 = predict(m2, n.ahead = 12)
pred = m2$pred[1:12]
err = m2$se[1:12]
ulb = pred + 1.96*err
llb = pred - 1.96*err
x = seq(997, 1008)
df = data.frame(x, err, ulb, llb)
ggplot(df, aes(x)) +
  geom_line(aes(y = pred), colors = "red") +
  geom_line(aes(y = ulb), colors = "blue", linetype = "dotted") +
  geom_line(aes(y = llb), colors = "blue", linetype = "dotted")


# Johnson & Johnson quarterly earnings per share
qjnj = scan(file="q-jnj.txt")
qjnj1 = ts(qjnj, frequency = 4, start = c(1960, 1))
lqjnj1 = log(qjnj1)
par(mfcol=c(2,1))
plot(qjnj1, type = "l", ylab = "Earning")
plot(lqjnj1, type = "l", ylab = "Log earning")
par(mfcol=c(1,1))

x = log(qjnj)
dx1 = diff(x)
dx4 = diff(x, lag = 4)
dx_1_4 = diff(dx1, lag = 4)
acf(x)
acf(dx1)
acf(dx4)
acf(dx_1_4)
m1 = arima(dx_1_4, order = c(0, 0, 1),
           seasonal = list(order = c(0, 0, 1), periods = 4))
sqrt(m1$sigma2)
arima(dx_1_4, order = c(0, 0, 1), method = "ML",
      seasonal = list(order = c(0, 0, 1), periods = 4))

arima(dx_1_4, order = c(0, 0, 1), method = "CSS",
      seasonal = list(order = c(0, 0, 1), periods = 4))

# airline model
m2 = arima(qjnj, order = c(0, 1, 1), include.mean = F,
           seasonal = list(order = c(0, 1, 1), frequency = 4))
m2

# monthly simple returns of CRSP Decile 1 Index
da = read.table("m-deciles08.txt", header = T)
d1 = da[,2]
d1s = ts(d1, start = c(1970, 1), frequency = 12)
plot(d1s)
acf(d1, lag = 40)
jan = rep(c(1,rep(0, 11)), 39)
m1 = lm(d1 ~ jan)
summary(m1)
d1_a = d1 - m1$coefficients[2] * jan
d1s_a = d1s - m1$coefficients[2] * jan
plot(d1s_a)
acf(d1_a, lag = 40)
m1 = arima(d1, order = c(1, 0, 0),
           seasonal = list(order = c(1, 0, 1), periods = 12))
m1
m2 = arima(d1, order = c(1, 0, 0), include.mean = F,
           seasonal = list(order = c(1, 0, 1), periods = 12))
m2


# regression with time series errors
r1 = read.table("w-gs1yr.txt", header = T)[,4]
r3 = read.table("w-gs3yr.txt", header = T)[,4]
m1 = lm(r3 ~ r1)
m1
plot(m1$residuals, type = "l")
acf(m1$residuals, lag = 36)

c1 = diff(r1)
c3 = diff(r3)
m2 = lm(c3 ~ -1 + c1)
summary(m2)
acf(m2$residuals, lag = 36)

m3 = arima(c3, order = c(0, 0, 1), xreg = c1, include.mean = F)
m3
mean(c3)
rsq = (sum(c3^2) - sum(m3$residuals^2))/sum(c3^2)
rsq

# unit-root test
da = read.table("q-gdp4708.txt", header = T)
gdp=log(da[,4])
gdp_s = ts(gdp, start = c(1947, 1), frequency = 4)
plot(gdp_s, type="l", ylab = "ln(GDP)")
acf(gdp)
gdp_diff = diff(gdp)
gdp_s_diff = diff(gdp_s)
plot(gdp_s_diff, type="l", ylab="diff(ln(GDP))")
abline(h=0)
pacf(gdp_diff, lag = 20)
m1 = ar(gdp_diff, method="mle")
m1$order
adfTest(gdp, lags = 10, type=c("c"))


da = read.table("d-sp55008.txt", header = T)
sp5=log(da[,7])
time = ISOdate(da[,1], da[,2], da[,3])
plot(time, sp5, type = "l")
plot(time, log(sp5), type = "l")
time[length(time)]
time[1]
m2 = ar(diff(sp5), method = 'mle')
m2$order
adfTest(sp5, lags = 2, type = "ct")