Advanced Time Series Forecasting
Classical ARIMA models handle linear patterns. Modern deep learning models capture complex nonlinear dynamics, multiple seasonalities, and external regressors. This lesson covers the state of the art.
Time Series Decomposition
Why Advanced Forecasting Matters
Retail demand, energy consumption, financial markets β forecasting drives decisions worth billions. Even small accuracy improvements translate to massive value. Modern methods outperform classical approaches on complex, multi-seasonal data.
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')
Generate Complex Time Series
np.random.seed(42)
n = 365 * 2 # 2 years of daily data
dates = pd.date_range('2023-01-01', periods=n, freq='D')
# Complex signal: trend + weekly + yearly + holidays + noise
t = np.arange(n)
trend = 100 + 0.05 * t
weekly = 10 * np.sin(2 * np.pi * t / 7)
yearly = 20 * np.sin(2 * np.pi * t / 365)
holidays = 15 * np.isin(dates.dayofyear, [1, 180, 355]).astype(float)
noise = np.random.normal(0, 3, n)
y = trend + weekly + yearly + holidays + noise
df = pd.DataFrame({
'ds': dates,
'y': y,
'holiday': np.isin(dates.dayofyear, [1, 180, 355]).astype(int),
'temperature': 15 + 10 * np.sin(2 * np.pi * t / 365) + np.random.normal(0, 2, n)
})
print(f"Time series: {len(df)} days, range: {df['y'].min():.1f} to {df['y'].max():.1f}")
Walk-Forward Validation
The gold standard for time series β never use future data to predict the past.
def walk_forward_validation(model_class, df, n_splits=5, **model_params):
"""Walk-forward validation for time series."""
n = len(df)
test_size = n // (n_splits + 1)
predictions = []
actuals = []
for i in range(n_splits):
train_end = n - (n_splits - i) * test_size
test_end = train_end + test_size
train = df.iloc[:train_end]
test = df.iloc[train_end:test_end]
model = model_class(**model_params)
model.fit(train)
pred = model.predict(len(test))
predictions.extend(pred)
actuals.extend(test['y'].values)
predictions = np.array(predictions)
actuals = np.array(actuals)
mae = mean_absolute_error(actuals, predictions)
rmse = np.sqrt(mean_squared_error(actuals, predictions))
mape = np.mean(np.abs((actuals - predictions) / actuals)) * 100
return {'MAE': mae, 'RMSE': rmse, 'MAPE': mape}
class SimpleExpSmoothing:
def __init__(self, alpha=0.3):
self.alpha = alpha
def fit(self, train_df):
self.train = train_df['y'].values
def predict(self, horizon):
pred = np.full(horizon, self.train[-1])
return pred
results = walk_forward_validation(SimpleExpSmoothing, df, alpha=0.3)
print(f"Simple baseline: MAE={results['MAE']:.2f}, RMSE={results['RMSE']:.2f}")
Prophet
Facebook's Prophet handles trends, seasonalities, and holidays automatically.
from prophet import Prophet
# Prepare data
prophet_df = df[['ds', 'y']].copy()
train = prophet_df.iloc[:-60]
test = prophet_df.iloc[-60:]
# Fit Prophet
model = Prophet(
yearly_seasonality=True,
weekly_seasonality=True,
daily_seasonality=False,
changepoint_prior_scale=0.05,
seasonality_prior_scale=10
)
# Add holidays
holidays_df = pd.DataFrame({
'holiday': 'holiday',
'ds': pd.to_datetime(['2023-01-01', '2023-06-29', '2023-12-21',
'2024-01-01', '2024-06-29', '2024-12-21']),
'lower_window': -1,
'upper_window': 1
})
model = Prophet(holidays=holidays_df)
model.fit(train)
# Predict
future = model.make_future_dataframe(periods=60)
forecast = model.predict(future)
pred = forecast.iloc[-60:]['yhat'].values
mae = mean_absolute_error(test['y'], pred)
print(f"Prophet MAE: {mae:.2f}")
print(f"Components: trend, yearly, weekly, holidays")
N-BEATS
Neural Basis Expansion Analysis for Time Series β pure deep learning without recurrence.
class NBeatsBlock(nn.Module):
def __init__(self, input_size, theta_size, hidden_size, n_layers=4):
super().__init__()
layers = []
for i in range(n_layers):
layers.extend([
nn.Linear(input_size if i == 0 else hidden_size, hidden_size),
nn.ReLU()
])
self.layers = nn.Sequential(*layers)
self.theta = nn.Linear(hidden_size, theta_size)
def forward(self, x):
h = self.layers(x)
return self.theta(h)
class NBeats(nn.Module):
def __init__(self, input_size, forecast_size, hidden_size=256):
super().__init__()
self.forecast_size = forecast_size
# Stack blocks
self.block1 = NBeatsBlock(input_size, forecast_size, hidden_size)
self.block2 = NBeatsBlock(input_size, forecast_size, hidden_size)
def forward(self, x):
# x: (batch, input_size)
theta1 = self.block1(x)
theta2 = self.block2(x)
return theta1 + theta2
# Training
model = NBeats(input_size=30, forecast_size=7, hidden_size=128)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Create sequences
def create_sequences(data, input_size, forecast_size):
X, y = [], []
for i in range(len(data) - input_size - forecast_size):
X.append(data[i:i+input_size])
y.append(data[i+input_size:i+input_size+forecast_size])
return np.array(X), np.array(y)
input_size = 30
forecast_size = 7
X, y_seq = create_sequences(df['y'].values, input_size, forecast_size)
X_tensor = torch.FloatTensor(X)
y_tensor = torch.FloatTensor(y_seq)
for epoch in range(200):
model.train()
pred = model(X_tensor)
loss = nn.MSELoss()(pred, y_tensor)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch + 1) % 50 == 0:
print(f"Epoch {epoch+1}: MSE={loss.item():.4f}")
Temporal Fusion Transformer (TFT)
TFT handles static covariates, known future inputs, and unknown future inputs.
class TemporalFusionBlock(nn.Module):
def __init__(self, d_model, n_heads=4, d_ff=128, dropout=0.1):
super().__init__()
self.attention = nn.MultiheadAttention(d_model, n_heads, dropout=dropout, batch_first=True)
self.ff = nn.Sequential(
nn.Linear(d_model, d_ff),
nn.ReLU(),
nn.Linear(d_ff, d_model)
)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
attn_out, _ = self.attention(x, x, x)
x = self.norm1(x + self.dropout(attn_out))
x = self.norm2(x + self.dropout(self.ff(x)))
return x
class SimpleTFT(nn.Module):
def __init__(self, input_size, hidden_size, forecast_size, n_heads=4):
super().__init__()
self.input_proj = nn.Linear(input_size, hidden_size)
self.temporal_block = TemporalFusionBlock(hidden_size, n_heads)
self.output = nn.Linear(hidden_size, forecast_size)
def forward(self, x):
h = self.input_proj(x)
h = self.temporal_block(h)
return self.output(h[:, -1, :]) # Use last timestep
tft = SimpleTFT(input_size=5, hidden_size=64, forecast_size=7)
X_tft = torch.randn(32, 30, 5) # batch=32, seq_len=30, features=5
out = tft(X_tft)
print(f"TFT output: {out.shape}")
DeepAR: Autoregressive RNN
class DeepAR(nn.Module):
def __init__(self, input_size, hidden_size, n_layers=2):
super().__init__()
self.rnn = nn.LSTM(input_size, hidden_size, n_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, 2) # mean, log_var for Gaussian
def forward(self, x):
rnn_out, _ = self.rnn(x)
params = self.fc(rnn_out[:, -1, :])
mean, log_var = params[:, 0], params[:, 1]
return mean, log_var
def loss(self, mean, log_var, target):
var = torch.exp(log_var)
return 0.5 * torch.mean(log_var + (target - mean) ** 2 / var)
deepar = DeepAR(input_size=3, hidden_size=64)
x_deepar = torch.randn(32, 30, 3)
mean, log_var = deepar(x_deepar)
print(f"DeepAR output: mean={mean.shape}, log_var={log_var.shape}")
Ensemble Forecasting
def ensemble_forecast(models, X):
"""Simple averaging ensemble."""
predictions = []
for model in models:
model.eval()
with torch.no_grad():
pred = model(X)
if isinstance(pred, tuple):
pred = pred[0]
predictions.append(pred.numpy())
return np.mean(predictions, axis=0)
# Ensemble of different models
models = [NBeats(input_size=30, forecast_size=7) for _ in range(3)]
X_test = torch.randn(1, 30)
ensemble_pred = ensemble_forecast(models, X_test)
print(f"Ensemble prediction shape: {ensemble_pred.shape}")
ARIMA Background
Before deep learning, ARIMA was the standard for time series forecasting. The ARIMA(p,d,q) model combines autoregression, differencing, and moving averages:
where is the AR polynomial, is the MA polynomial, is the backshift operator, and is the differencing order.
Feature Engineering for Forecasting
# Time-based features
df['dayofweek'] = df['ds'].dt.dayofweek
df['month'] = df['ds'].dt.month
df['dayofyear'] = df['ds'].dt.dayofyear
df['is_weekend'] = df['dayofweek'].isin([5, 6]).astype(int)
# Lag features
for lag in [1, 7, 14, 30]:
df[f'lag_{lag}'] = df['y'].shift(lag)
# Rolling statistics
for window in [7, 14, 30]:
df[f'rolling_mean_{window}'] = df['y'].rolling(window).mean()
df[f'rolling_std_{window}'] = df['y'].rolling(window).std()
# Fourier features for seasonality
for k in range(1, 4):
df[f'sin_yearly_{k}'] = np.sin(2 * np.pi * k * df['dayofyear'] / 365)
df[f'cos_yearly_{k}'] = np.cos(2 * np.pi * k * df['dayofyear'] / 365)
df = df.dropna()
print(f"Engineered features: {df.shape[1]} columns")
Best Practices
- Walk-forward validation β never leak future data
- Multiple horizons β evaluate at different forecast lengths
- Ensemble methods β combine diverse models for robustness
- Feature engineering β lags, rolling stats, and calendar features help
- Probabilistic forecasting β predict intervals, not just point estimates
- Domain knowledge β incorporate holidays, promotions, and external events
Summary
Modern forecasting goes beyond ARIMA. Prophet handles trends and seasonality, N-BEATS learns patterns directly, and TFT integrates covariates with attention. Master walk-forward validation and ensemble methods to build reliable forecasting systems.