Autoregressive Moving Average (ARMA): Sunspots data

python

%matplotlib inline

python

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats
from statsmodels.tsa.arima.model import ARIMA

python

from statsmodels.graphics.api import qqplot

Sunspots Data

python

print(sm.datasets.sunspots.NOTE)

python

dta = sm.datasets.sunspots.load_pandas().data

python

dta.index = pd.Index(sm.tsa.datetools.dates_from_range("1700", "2008"))
dta.index.freq = dta.index.inferred_freq
del dta["YEAR"]

python

dta.plot(figsize=(12, 8))

python

fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(dta, lags=40, ax=ax2)

python

arma_mod20 = ARIMA(dta, order=(2, 0, 0)).fit()
print(arma_mod20.params)

python

arma_mod30 = ARIMA(dta, order=(3, 0, 0)).fit()

python

print(arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic)

python

print(arma_mod30.params)

python

print(arma_mod30.aic, arma_mod30.bic, arma_mod30.hqic)

Does our model obey the theory?

python

sm.stats.durbin_watson(arma_mod30.resid.values)

python

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
ax = arma_mod30.resid.plot(ax=ax)

python

resid = arma_mod30.resid

python

stats.normaltest(resid)

python

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
fig = qqplot(resid, line="q", ax=ax, fit=True)

python

fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)

python

r, q, p = sm.tsa.acf(resid.values.squeeze(), fft=True, qstat=True)
data = np.c_[np.arange(1, 25), r[1:], q, p]

python

table = pd.DataFrame(data, columns=["lag", "AC", "Q", "Prob(>Q)"])
print(table.set_index("lag"))

This indicates a lack of fit.
In-sample dynamic prediction. How good does our model do?

python

predict_sunspots = arma_mod30.predict("1990", "2012", dynamic=True)
print(predict_sunspots)

python

def mean_forecast_err(y, yhat):
    return y.sub(yhat).mean()

python

mean_forecast_err(dta.SUNACTIVITY, predict_sunspots)

Exercise: Can you obtain a better fit for the Sunspots model? (Hint: sm.tsa.AR has a method select_order)

Simulated ARMA(4,1): Model Identification is Difficult

python

from statsmodels.tsa.arima_process import ArmaProcess

python

np.random.seed(1234)
# include zero-th lag
arparams = np.array([1, 0.75, -0.65, -0.55, 0.9])
maparams = np.array([1, 0.65])

Let's make sure this model is estimable.

python

arma_t = ArmaProcess(arparams, maparams)

python

arma_t.isinvertible

python

arma_t.isstationary

What does this mean?

python

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
ax.plot(arma_t.generate_sample(nsample=50))

python

arparams = np.array([1, 0.35, -0.15, 0.55, 0.1])
maparams = np.array([1, 0.65])
arma_t = ArmaProcess(arparams, maparams)
arma_t.isstationary

python

arma_rvs = arma_t.generate_sample(nsample=500, burnin=250, scale=2.5)

python

fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(arma_rvs, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(arma_rvs, lags=40, ax=ax2)

For mixed ARMA processes the Autocorrelation function is a mixture of exponentials and damped sine waves after (q-p) lags.
The partial autocorrelation function is a mixture of exponentials and dampened sine waves after (p-q) lags.

python

lags = int(10 * np.log10(arma_rvs.shape[0]))
arma11 = ARIMA(arma_rvs, order=(1, 0, 1)).fit()
resid = arma11.resid
r, q, p = sm.tsa.acf(resid, nlags=lags, fft=True, qstat=True)
data = np.c_[range(1, lags + 1), r[1:], q, p]
table = pd.DataFrame(data, columns=["lag", "AC", "Q", "Prob(>Q)"])
print(table.set_index("lag"))

python

arma41 = ARIMA(arma_rvs, order=(4, 0, 1)).fit()
resid = arma41.resid
r, q, p = sm.tsa.acf(resid, nlags=lags, fft=True, qstat=True)
data = np.c_[range(1, lags + 1), r[1:], q, p]
table = pd.DataFrame(data, columns=["lag", "AC", "Q", "Prob(>Q)"])
print(table.set_index("lag"))

Exercise: How good of in-sample prediction can you do for another series, say, CPI

python

macrodta = sm.datasets.macrodata.load_pandas().data
macrodta.index = pd.Index(sm.tsa.datetools.dates_from_range("1959Q1", "2009Q3"))
cpi = macrodta["cpi"]

Hint:

python

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
ax = cpi.plot(ax=ax)
ax.legend()

P-value of the unit-root test, resoundingly rejects the null of a unit-root.

python

print(sm.tsa.adfuller(cpi)[1])