Advanced Time Series Forecasting

Classical ARIMA models handle linear patterns. Modern deep learning models capture complex nonlinear dynamics, multiple seasonalities, and external regressors. This lesson covers the state of the art.

Time Series Decomposition

Why Advanced Forecasting Matters

Retail demand, energy consumption, financial markets – forecasting drives decisions worth billions. Even small accuracy improvements translate to massive value. Modern methods outperform classical approaches on complex, multi-seasonal data.

import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

Generate Complex Time Series

np.random.seed(42)
n = 365 * 2  # 2 years of daily data
dates = pd.date_range('2023-01-01', periods=n, freq='D')

# Complex signal: trend + weekly + yearly + holidays + noise
t = np.arange(n)
trend = 100 + 0.05 * t
weekly = 10 * np.sin(2 * np.pi * t / 7)
yearly = 20 * np.sin(2 * np.pi * t / 365)
holidays = 15 * np.isin(dates.dayofyear, [1, 180, 355]).astype(float)
noise = np.random.normal(0, 3, n)

y = trend + weekly + yearly + holidays + noise

df = pd.DataFrame({
    'ds': dates,
    'y': y,
    'holiday': np.isin(dates.dayofyear, [1, 180, 355]).astype(int),
    'temperature': 15 + 10 * np.sin(2 * np.pi * t / 365) + np.random.normal(0, 2, n)
})
print(f"Time series: {len(df)} days, range: {df['y'].min():.1f} to {df['y'].max():.1f}")

Walk-Forward Validation

The gold standard for time series – never use future data to predict the past.

def walk_forward_validation(model_class, df, n_splits=5, **model_params):
    """Walk-forward validation for time series."""
    n = len(df)
    test_size = n // (n_splits + 1)
    
    predictions = []
    actuals = []
    
    for i in range(n_splits):
        train_end = n - (n_splits - i) * test_size
        test_end = train_end + test_size
        
        train = df.iloc[:train_end]
        test = df.iloc[train_end:test_end]
        
        model = model_class(**model_params)
        model.fit(train)
        pred = model.predict(len(test))
        
        predictions.extend(pred)
        actuals.extend(test['y'].values)
    
    predictions = np.array(predictions)
    actuals = np.array(actuals)
    
    mae = mean_absolute_error(actuals, predictions)
    rmse = np.sqrt(mean_squared_error(actuals, predictions))
    mape = np.mean(np.abs((actuals - predictions) / actuals)) * 100
    
    return {'MAE': mae, 'RMSE': rmse, 'MAPE': mape}

class SimpleExpSmoothing:
    def __init__(self, alpha=0.3):
        self.alpha = alpha
    
    def fit(self, train_df):
        self.train = train_df['y'].values
    
    def predict(self, horizon):
        pred = np.full(horizon, self.train[-1])
        return pred

results = walk_forward_validation(SimpleExpSmoothing, df, alpha=0.3)
print(f"Simple baseline: MAE={results['MAE']:.2f}, RMSE={results['RMSE']:.2f}")

Prophet

Facebook's Prophet handles trends, seasonalities, and holidays automatically.

from prophet import Prophet

# Prepare data
prophet_df = df[['ds', 'y']].copy()
train = prophet_df.iloc[:-60]
test = prophet_df.iloc[-60:]

# Fit Prophet
model = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    changepoint_prior_scale=0.05,
    seasonality_prior_scale=10
)

# Add holidays
holidays_df = pd.DataFrame({
    'holiday': 'holiday',
    'ds': pd.to_datetime(['2023-01-01', '2023-06-29', '2023-12-21',
                          '2024-01-01', '2024-06-29', '2024-12-21']),
    'lower_window': -1,
    'upper_window': 1
})
model = Prophet(holidays=holidays_df)
model.fit(train)

# Predict
future = model.make_future_dataframe(periods=60)
forecast = model.predict(future)
pred = forecast.iloc[-60:]['yhat'].values

mae = mean_absolute_error(test['y'], pred)
print(f"Prophet MAE: {mae:.2f}")
print(f"Components: trend, yearly, weekly, holidays")

N-BEATS

Neural Basis Expansion Analysis for Time Series – pure deep learning without recurrence.

class NBeatsBlock(nn.Module):
    def __init__(self, input_size, theta_size, hidden_size, n_layers=4):
        super().__init__()
        layers = []
        for i in range(n_layers):
            layers.extend([
                nn.Linear(input_size if i == 0 else hidden_size, hidden_size),
                nn.ReLU()
            ])
        self.layers = nn.Sequential(*layers)
        self.theta = nn.Linear(hidden_size, theta_size)
    
    def forward(self, x):
        h = self.layers(x)
        return self.theta(h)

class NBeats(nn.Module):
    def __init__(self, input_size, forecast_size, hidden_size=256):
        super().__init__()
        self.forecast_size = forecast_size
        
        # Stack blocks
        self.block1 = NBeatsBlock(input_size, forecast_size, hidden_size)
        self.block2 = NBeatsBlock(input_size, forecast_size, hidden_size)
    
    def forward(self, x):
        # x: (batch, input_size)
        theta1 = self.block1(x)
        theta2 = self.block2(x)
        return theta1 + theta2

# Training
model = NBeats(input_size=30, forecast_size=7, hidden_size=128)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Create sequences
def create_sequences(data, input_size, forecast_size):
    X, y = [], []
    for i in range(len(data) - input_size - forecast_size):
        X.append(data[i:i+input_size])
        y.append(data[i+input_size:i+input_size+forecast_size])
    return np.array(X), np.array(y)

input_size = 30
forecast_size = 7
X, y_seq = create_sequences(df['y'].values, input_size, forecast_size)

X_tensor = torch.FloatTensor(X)
y_tensor = torch.FloatTensor(y_seq)

for epoch in range(200):
    model.train()
    pred = model(X_tensor)
    loss = nn.MSELoss()(pred, y_tensor)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 50 == 0:
        print(f"Epoch {epoch+1}: MSE={loss.item():.4f}")

Temporal Fusion Transformer (TFT)

TFT handles static covariates, known future inputs, and unknown future inputs.

class TemporalFusionBlock(nn.Module):
    def __init__(self, d_model, n_heads=4, d_ff=128, dropout=0.1):
        super().__init__()
        self.attention = nn.MultiheadAttention(d_model, n_heads, dropout=dropout, batch_first=True)
        self.ff = nn.Sequential(
            nn.Linear(d_model, d_ff),
            nn.ReLU(),
            nn.Linear(d_ff, d_model)
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        attn_out, _ = self.attention(x, x, x)
        x = self.norm1(x + self.dropout(attn_out))
        x = self.norm2(x + self.dropout(self.ff(x)))
        return x

class SimpleTFT(nn.Module):
    def __init__(self, input_size, hidden_size, forecast_size, n_heads=4):
        super().__init__()
        self.input_proj = nn.Linear(input_size, hidden_size)
        self.temporal_block = TemporalFusionBlock(hidden_size, n_heads)
        self.output = nn.Linear(hidden_size, forecast_size)
    
    def forward(self, x):
        h = self.input_proj(x)
        h = self.temporal_block(h)
        return self.output(h[:, -1, :])  # Use last timestep

tft = SimpleTFT(input_size=5, hidden_size=64, forecast_size=7)
X_tft = torch.randn(32, 30, 5)  # batch=32, seq_len=30, features=5
out = tft(X_tft)
print(f"TFT output: {out.shape}")

DeepAR: Autoregressive RNN

class DeepAR(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=2):
        super().__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 2)  # mean, log_var for Gaussian
    
    def forward(self, x):
        rnn_out, _ = self.rnn(x)
        params = self.fc(rnn_out[:, -1, :])
        mean, log_var = params[:, 0], params[:, 1]
        return mean, log_var
    
    def loss(self, mean, log_var, target):
        var = torch.exp(log_var)
        return 0.5 * torch.mean(log_var + (target - mean) ** 2 / var)

deepar = DeepAR(input_size=3, hidden_size=64)
x_deepar = torch.randn(32, 30, 3)
mean, log_var = deepar(x_deepar)
print(f"DeepAR output: mean={mean.shape}, log_var={log_var.shape}")

Ensemble Forecasting

def ensemble_forecast(models, X):
    """Simple averaging ensemble."""
    predictions = []
    for model in models:
        model.eval()
        with torch.no_grad():
            pred = model(X)
            if isinstance(pred, tuple):
                pred = pred[0]
            predictions.append(pred.numpy())
    return np.mean(predictions, axis=0)

# Ensemble of different models
models = [NBeats(input_size=30, forecast_size=7) for _ in range(3)]
X_test = torch.randn(1, 30)
ensemble_pred = ensemble_forecast(models, X_test)
print(f"Ensemble prediction shape: {ensemble_pred.shape}")

ARIMA Background

Before deep learning, ARIMA was the standard for time series forecasting. The ARIMA(p,d,q) model combines autoregression, differencing, and moving averages:

\phi(B)(1-B)^d X_t = \theta(B)\epsilon_t

where $\phi(B) = 1 - \phi_1 B - \cdots - \phi_p B^p$ is the AR polynomial, $\theta(B) = 1 + \theta_1 B + \cdots + \theta_q B^q$ is the MA polynomial, $B$ is the backshift operator, and $d$ is the differencing order.

Feature Engineering for Forecasting

# Time-based features
df['dayofweek'] = df['ds'].dt.dayofweek
df['month'] = df['ds'].dt.month
df['dayofyear'] = df['ds'].dt.dayofyear
df['is_weekend'] = df['dayofweek'].isin([5, 6]).astype(int)

# Lag features
for lag in [1, 7, 14, 30]:
    df[f'lag_{lag}'] = df['y'].shift(lag)

# Rolling statistics
for window in [7, 14, 30]:
    df[f'rolling_mean_{window}'] = df['y'].rolling(window).mean()
    df[f'rolling_std_{window}'] = df['y'].rolling(window).std()

# Fourier features for seasonality
for k in range(1, 4):
    df[f'sin_yearly_{k}'] = np.sin(2 * np.pi * k * df['dayofyear'] / 365)
    df[f'cos_yearly_{k}'] = np.cos(2 * np.pi * k * df['dayofyear'] / 365)

df = df.dropna()
print(f"Engineered features: {df.shape[1]} columns")

Best Practices

Walk-forward validation – never leak future data
Multiple horizons – evaluate at different forecast lengths
Ensemble methods – combine diverse models for robustness
Feature engineering – lags, rolling stats, and calendar features help
Probabilistic forecasting – predict intervals, not just point estimates
Domain knowledge – incorporate holidays, promotions, and external events

Summary

Modern forecasting goes beyond ARIMA. Prophet handles trends and seasonality, N-BEATS learns patterns directly, and TFT integrates covariates with attention. Master walk-forward validation and ensemble methods to build reliable forecasting systems.