🎉 75% of content is free forever — Unlock Premium from $10/mo →
CW
Search courses…
💼 Servicesℹ️ About✉️ ContactView Pricing Plansfrom $10

Time Series Basics: Trend, Seasonality and Stationarity

Module 10: Specialized ML🟢 Free Lesson

Advertisement

Time Series Basics: Trend, Seasonality and Stationarity

Time series data requires special handling due to temporal dependencies. This lesson covers fundamental concepts for time series analysis.

Time Series Decompositiony(t) = T(t) + S(t) + R(t)OriginalTrendSeasonalResidualAdditive: y = T + S + R | Multiplicative: y = T × S × R

Time Series Components

The time series decomposition formula splits a series into components:

yt=Tt+St+Rt(Additive)y_t = T_t + S_t + R_t \quad \text{(Additive)}
yt=TtStRt(Multiplicative)y_t = T_t \cdot S_t \cdot R_t \quad \text{(Multiplicative)}

Loading and Preparing Time Series

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Load time series data
df = pd.read_csv('sales_data.csv', parse_dates=['date'])
df = df.set_index('date')

# Create time series
ts = df['sales']

# Ensure regular frequency
ts = ts.asfreq('D')  # Daily frequency
ts = ts.fillna(method='ffill')  # Forward fill missing values

# Check frequency
print(f"Frequency: {ts.index.freq}")
print(f"Date range: {ts.index.min()} to {ts.index.max()}")
print(f"Length: {len(ts)}")

Visual Inspection

def plot_time_series(ts, title="Time Series"):
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Raw series
    axes[0, 0].plot(ts, linewidth=0.8)
    axes[0, 0].set_title(f'{title} - Raw')
    axes[0, 0].set_ylabel('Value')
    
    # Rolling statistics
    rolling_mean = ts.rolling(window=12).mean()
    rolling_std = ts.rolling(window=12).std()
    axes[0, 1].plot(ts, linewidth=0.8, label='Original')
    axes[0, 1].plot(rolling_mean, label='Rolling Mean')
    axes[0, 1].plot(rolling_std, label='Rolling Std')
    axes[0, 1].legend()
    axes[0, 1].set_title('Rolling Statistics')
    
    # Distribution
    axes[1, 0].hist(ts.dropna(), bins=50, edgecolor='black')
    axes[1, 0].set_title('Distribution')
    
    # Monthly boxplot
    ts_box = ts.copy()
    ts_box.index = ts_box.index.month
    ts_box.boxplot(ax=axes[1, 1])
    axes[1, 1].set_title('Monthly Distribution')
    axes[1, 1].set_xlabel('Month')
    
    plt.tight_layout()
    plt.show()

plot_time_series(ts)

Time Series Decomposition

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.seasonal import STL

# Additive decomposition
decomposition_add = seasonal_decompose(ts, model='additive', period=12)
fig = decomposition_add.plot()
plt.tight_layout()
plt.show()

# Multiplicative decomposition
decomposition_mul = seasonal_decompose(ts, model='multiplicative', period=12)
fig = decomposition_mul.plot()
plt.tight_layout()
plt.show()

# STL decomposition (robust to outliers)
stl = STL(ts, period=12, robust=True)
result = stl.fit()
fig = result.plot()
plt.tight_layout()
plt.show()

Stationarity Tests

The Augmented Dickey-Fuller test statistic:

Δyt=α+βt+γyt1+i=1pδiΔyti+εt\Delta y_t = \alpha + \beta t + \gamma y_{t-1} + \sum_{i=1}^{p} \delta_i \Delta y_{t-i} + \varepsilon_t

Null hypothesis: γ=0\gamma = 0 (unit root exists, non-stationary).

from statsmodels.tsa.stattools import adfuller, kpss, acf, pacf

def adf_test(series):
    """Augmented Dickey-Fuller test"""
    result = adfuller(series.dropna())
    print('ADF Test Results:')
    print(f'ADF Statistic: {result[0]:.4f}')
    print(f'p-value: {result[1]:.4f}')
    print('Critical Values:')
    for key, value in result[4].items():
        print(f'  {key}: {value:.4f}')
    print(f'Stationary: {result[1] < 0.05}')

def kpss_test(series):
    """KPSS test (opposite hypotheses)"""
    result = kpss(series.dropna(), regression='ct', nlags='auto')
    print('KPSS Test Results:')
    print(f'KPSS Statistic: {result[0]:.4f}')
    print(f'p-value: {result[1]:.4f}')
    print(f'Stationary: {result[1] > 0.05}')

# Test for stationarity
print("Original Series:")
adf_test(ts)
kpss_test(ts)

Making Series Stationary

# 1. Differencing
ts_diff1 = ts.diff().dropna()
ts_diff2 = ts.diff().diff().dropna()

# Seasonal differencing
ts_seasonal_diff = ts.diff(12).dropna()

# 2. Log transformation (for multiplicative seasonality)
ts_log = np.log(ts)
ts_log_diff = ts_log.diff().dropna()

# 3. Box-Cox transformation
from scipy.stats import boxcox
ts_boxcox, lambda_opt = boxcox(ts.dropna())

# Plot transformed series
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
axes[0].plot(ts_diff1)
axes[0].set_title('First Difference')
axes[1].plot(ts_seasonal_diff)
axes[1].set_title('Seasonal Difference')
axes[2].plot(ts_log_diff)
axes[2].set_title('Log Transform + Difference')
plt.tight_layout()
plt.show()

# Test stationarity after transformation
print("\nAfter Differencing:")
adf_test(ts_diff1)

ACF and PACF

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

fig, axes = plt.subplots(2, 2, figsize=(12, 8))

# ACF and PACF for original series
plot_acf(ts.dropna(), lags=40, ax=axes[0, 0])
axes[0, 0].set_title('ACF - Original')

plot_pacf(ts.dropna(), lags=40, ax=axes[0, 1])
axes[0, 1].set_title('PACF - Original')

# ACF and PACF for differenced series
plot_acf(ts_diff1, lags=40, ax=axes[1, 0])
axes[1, 0].set_title('ACF - Differenced')

plot_pacf(ts_diff1, lags=40, ax=axes[1, 1])
axes[1, 1].set_title('PACF - Differenced')

plt.tight_layout()
plt.show()

Autocorrelation Analysis

def analyze_autocorrelation(ts, max_lags=40):
    """Analyze ACF and PACF patterns"""
    acf_values = acf(ts.dropna(), nlags=max_lags)
    pacf_values = pacf(ts.dropna(), nlags=max_lags)
    
    # Find significant lags
    n = len(ts.dropna())
    significance_level = 1.96 / np.sqrt(n)
    
    significant_acf = np.where(np.abs(acf_values) > significance_level)[0]
    significant_pacf = np.where(np.abs(pacf_values) > significance_level)[0]
    
    print(f"Significant ACF lags: {significant_acf[1:]}")
    print(f"Significant PACF lags: {significant_pacf[1:]}")
    
    # Interpret patterns
    if len(significant_acf) > 2:
        print("Pattern: Slow decay in ACF suggests non-stationarity")
    if len(significant_pacf) > 0:
        print(f"Pattern: PACF cuts off after lag {significant_pacf[-1]}")
    
    return acf_values, pacf_values

acf_vals, pacf_vals = analyze_autocorrelation(ts_diff1)

Key Takeaways

  1. Always visualize time series before modeling
  2. Test for stationarity using ADF and KPSS tests
  3. Use differencing or transformations to achieve stationarity
  4. ACF/PACF plots help identify model orders
  5. Consider seasonal patterns in decomposition

Premium Content

Time Series Basics: Trend, Seasonality and Stationarity

Unlock this lesson and 900+ advanced tutorials with a Premium plan.

🎯End-to-end Projects
💼Interview Prep
📜Certificates
🤝Community Access

Already a member? Log in

Need Expert Data Science Help?

Get personalized tutoring, project support, or professional consulting.

Advertisement