Time Series Basics: Trend, Seasonality and Stationarity
Time series data requires special handling due to temporal dependencies. This lesson covers fundamental concepts for time series analysis.
Time Series Components
The time series decomposition formula splits a series into components:
Loading and Preparing Time Series
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
# Load time series data
df = pd.read_csv('sales_data.csv', parse_dates=['date'])
df = df.set_index('date')
# Create time series
ts = df['sales']
# Ensure regular frequency
ts = ts.asfreq('D') # Daily frequency
ts = ts.fillna(method='ffill') # Forward fill missing values
# Check frequency
print(f"Frequency: {ts.index.freq}")
print(f"Date range: {ts.index.min()} to {ts.index.max()}")
print(f"Length: {len(ts)}")
Visual Inspection
def plot_time_series(ts, title="Time Series"):
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# Raw series
axes[0, 0].plot(ts, linewidth=0.8)
axes[0, 0].set_title(f'{title} - Raw')
axes[0, 0].set_ylabel('Value')
# Rolling statistics
rolling_mean = ts.rolling(window=12).mean()
rolling_std = ts.rolling(window=12).std()
axes[0, 1].plot(ts, linewidth=0.8, label='Original')
axes[0, 1].plot(rolling_mean, label='Rolling Mean')
axes[0, 1].plot(rolling_std, label='Rolling Std')
axes[0, 1].legend()
axes[0, 1].set_title('Rolling Statistics')
# Distribution
axes[1, 0].hist(ts.dropna(), bins=50, edgecolor='black')
axes[1, 0].set_title('Distribution')
# Monthly boxplot
ts_box = ts.copy()
ts_box.index = ts_box.index.month
ts_box.boxplot(ax=axes[1, 1])
axes[1, 1].set_title('Monthly Distribution')
axes[1, 1].set_xlabel('Month')
plt.tight_layout()
plt.show()
plot_time_series(ts)
Time Series Decomposition
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.seasonal import STL
# Additive decomposition
decomposition_add = seasonal_decompose(ts, model='additive', period=12)
fig = decomposition_add.plot()
plt.tight_layout()
plt.show()
# Multiplicative decomposition
decomposition_mul = seasonal_decompose(ts, model='multiplicative', period=12)
fig = decomposition_mul.plot()
plt.tight_layout()
plt.show()
# STL decomposition (robust to outliers)
stl = STL(ts, period=12, robust=True)
result = stl.fit()
fig = result.plot()
plt.tight_layout()
plt.show()
Stationarity Tests
The Augmented Dickey-Fuller test statistic:
Null hypothesis: (unit root exists, non-stationary).
from statsmodels.tsa.stattools import adfuller, kpss, acf, pacf
def adf_test(series):
"""Augmented Dickey-Fuller test"""
result = adfuller(series.dropna())
print('ADF Test Results:')
print(f'ADF Statistic: {result[0]:.4f}')
print(f'p-value: {result[1]:.4f}')
print('Critical Values:')
for key, value in result[4].items():
print(f' {key}: {value:.4f}')
print(f'Stationary: {result[1] < 0.05}')
def kpss_test(series):
"""KPSS test (opposite hypotheses)"""
result = kpss(series.dropna(), regression='ct', nlags='auto')
print('KPSS Test Results:')
print(f'KPSS Statistic: {result[0]:.4f}')
print(f'p-value: {result[1]:.4f}')
print(f'Stationary: {result[1] > 0.05}')
# Test for stationarity
print("Original Series:")
adf_test(ts)
kpss_test(ts)
Making Series Stationary
# 1. Differencing
ts_diff1 = ts.diff().dropna()
ts_diff2 = ts.diff().diff().dropna()
# Seasonal differencing
ts_seasonal_diff = ts.diff(12).dropna()
# 2. Log transformation (for multiplicative seasonality)
ts_log = np.log(ts)
ts_log_diff = ts_log.diff().dropna()
# 3. Box-Cox transformation
from scipy.stats import boxcox
ts_boxcox, lambda_opt = boxcox(ts.dropna())
# Plot transformed series
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
axes[0].plot(ts_diff1)
axes[0].set_title('First Difference')
axes[1].plot(ts_seasonal_diff)
axes[1].set_title('Seasonal Difference')
axes[2].plot(ts_log_diff)
axes[2].set_title('Log Transform + Difference')
plt.tight_layout()
plt.show()
# Test stationarity after transformation
print("\nAfter Differencing:")
adf_test(ts_diff1)
ACF and PACF
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
# ACF and PACF for original series
plot_acf(ts.dropna(), lags=40, ax=axes[0, 0])
axes[0, 0].set_title('ACF - Original')
plot_pacf(ts.dropna(), lags=40, ax=axes[0, 1])
axes[0, 1].set_title('PACF - Original')
# ACF and PACF for differenced series
plot_acf(ts_diff1, lags=40, ax=axes[1, 0])
axes[1, 0].set_title('ACF - Differenced')
plot_pacf(ts_diff1, lags=40, ax=axes[1, 1])
axes[1, 1].set_title('PACF - Differenced')
plt.tight_layout()
plt.show()
Autocorrelation Analysis
def analyze_autocorrelation(ts, max_lags=40):
"""Analyze ACF and PACF patterns"""
acf_values = acf(ts.dropna(), nlags=max_lags)
pacf_values = pacf(ts.dropna(), nlags=max_lags)
# Find significant lags
n = len(ts.dropna())
significance_level = 1.96 / np.sqrt(n)
significant_acf = np.where(np.abs(acf_values) > significance_level)[0]
significant_pacf = np.where(np.abs(pacf_values) > significance_level)[0]
print(f"Significant ACF lags: {significant_acf[1:]}")
print(f"Significant PACF lags: {significant_pacf[1:]}")
# Interpret patterns
if len(significant_acf) > 2:
print("Pattern: Slow decay in ACF suggests non-stationarity")
if len(significant_pacf) > 0:
print(f"Pattern: PACF cuts off after lag {significant_pacf[-1]}")
return acf_values, pacf_values
acf_vals, pacf_vals = analyze_autocorrelation(ts_diff1)
Key Takeaways
- Always visualize time series before modeling
- Test for stationarity using ADF and KPSS tests
- Use differencing or transformations to achieve stationarity
- ACF/PACF plots help identify model orders
- Consider seasonal patterns in decomposition