Autoregressive Moving Average (ARMA): Sunspots data

This notebook replicates the existing ARMA notebook using the statsmodels.tsa.statespace.SARIMAX class rather than the statsmodels.tsa.ARMA class.

python

%matplotlib inline

python

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats

python

from statsmodels.graphics.api import qqplot

Sunspots Data

python

print(sm.datasets.sunspots.NOTE)

python

dta = sm.datasets.sunspots.load_pandas().data

python

dta.index = pd.Index(pd.date_range("1700", end="2009", freq="YE-DEC"))
del dta["YEAR"]

python

dta.plot(figsize=(12, 4));

python

fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(dta, lags=40, ax=ax2)

python

arma_mod20 = sm.tsa.statespace.SARIMAX(dta, order=(2, 0, 0), trend="c").fit(disp=False)
print(arma_mod20.params)

python

arma_mod30 = sm.tsa.statespace.SARIMAX(dta, order=(3, 0, 0), trend="c").fit(disp=False)

python

print(arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic)

python

print(arma_mod30.params)

python

print(arma_mod30.aic, arma_mod30.bic, arma_mod30.hqic)

Does our model obey the theory?

python

sm.stats.durbin_watson(arma_mod30.resid)

python

fig = plt.figure(figsize=(12, 4))
ax = fig.add_subplot(111)
ax = plt.plot(arma_mod30.resid)

python

resid = arma_mod30.resid

python

stats.normaltest(resid)

python

fig = plt.figure(figsize=(12, 4))
ax = fig.add_subplot(111)
fig = qqplot(resid, line="q", ax=ax, fit=True)

python

fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(resid, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)

python

r, q, p = sm.tsa.acf(resid, fft=True, qstat=True)
data = np.c_[r[1:], q, p]
index = pd.Index(range(1, q.shape[0] + 1), name="lag")
table = pd.DataFrame(data, columns=["AC", "Q", "Prob(>Q)"], index=index)
print(table)

This indicates a lack of fit.
In-sample dynamic prediction. How good does our model do?

python

predict_sunspots = arma_mod30.predict(start="1990", end="2012", dynamic=True)

python

fig, ax = plt.subplots(figsize=(12, 8))
dta.loc["1950":].plot(ax=ax)
predict_sunspots.plot(ax=ax, style="r");

python

def mean_forecast_err(y, yhat):
    return y.sub(yhat).mean()

python

mean_forecast_err(dta.SUNACTIVITY, predict_sunspots)