Monte Carlo Simulation

When analytical solutions are impossible, simulation gives answers. Learn Monte Carlo methods for risk analysis, hypothesis testing, and scenario planning.

Core Monte Carlo Principle

\hat{I} = \frac{1}{N}\sum_{i=1}^{N} f(x_i), \quad x_i \sim p(x)

Basic Monte Carlo Integration

import numpy as np
from typing import Callable

def monte_carlo_integration(func: Callable, bounds: tuple, n_samples: int = 100000):
    """Estimate integral using Monte Carlo sampling"""
    a, b = bounds
    
    # Uniform samples
    x_samples = np.random.uniform(a, b, n_samples)
    
    # Evaluate function
    y_samples = func(x_samples)
    
    # Estimate: (b-a) * mean(f(x))
    integral = (b - a) * np.mean(y_samples)
    std_error = (b - a) * np.std(y_samples) / np.sqrt(n_samples)
    
    return integral, std_error

# Example: estimate pi
def estimate_pi(n_samples=1000000):
    x = np.random.uniform(-1, 1, n_samples)
    y = np.random.uniform(-1, 1, n_samples)
    
    inside_circle = np.sum(x**2 + y**2 <= 1)
    pi_estimate = 4 * inside_circle / n_samples
    
    return pi_estimate

# Example: integrate e^(-x^2)
result, error = monte_carlo_integration(
    lambda x: np.exp(-x**2),
    bounds=(0, 2),
    n_samples=1000000
)
print(f"âˆ«â‚€² e^(-x²) dx ≈ˆ {result:.6f} ± {error:.6f}")

Bootstrap Resampling

import numpy as np
from scipy import stats

class BootstrapAnalyzer:
    def __init__(self, data, n_bootstrap=10000):
        self.data = np.array(data)
        self.n_bootstrap = n_bootstrap
    
    def confidence_interval(self, statistic_fn=np.mean, alpha=0.05):
        """Compute bootstrap confidence interval"""
        bootstrap_stats = np.zeros(self.n_bootstrap)
        
        for i in range(self.n_bootstrap):
            sample = np.random.choice(self.data, size=len(self.data), replace=True)
            bootstrap_stats[i] = statistic_fn(sample)
        
        lower = np.percentile(bootstrap_stats, 100 * alpha / 2)
        upper = np.percentile(bootstrap_stats, 100 * (1 - alpha / 2))
        
        return {
            'estimate': statistic_fn(self.data),
            'ci_lower': lower,
            'ci_upper': upper,
            'se': np.std(bootstrap_stats),
            'distribution': bootstrap_stats
        }
    
    def bootstrap_hypothesis_test(self, other_data, statistic_fn=np.mean, n_permutations=10000):
        """Two-sample bootstrap hypothesis test"""
        other_data = np.array(other_data)
        
        observed_diff = statistic_fn(self.data) - statistic_fn(other_data)
        
        combined = np.concatenate([self.data, other_data])
        n1 = len(self.data)
        
        null_diffs = np.zeros(n_permutations)
        for i in range(n_permutations):
            np.random.shuffle(combined)
            null_diffs[i] = statistic_fn(combined[:n1]) - statistic_fn(combined[n1:])
        
        p_value = np.mean(np.abs(null_diffs) >= np.abs(observed_diff))
        
        return {
            'observed_difference': observed_diff,
            'p_value': p_value,
            'null_distribution': null_diffs
        }

# Usage
data_a = np.random.normal(100, 15, 100)
data_b = np.random.normal(105, 15, 120)

analyzer = BootstrapAnalyzer(data_a)
ci = analyzer.confidence_interval(statistic_fn=np.mean)
print(f"Mean: {ci['estimate']:.2f}, 95% CI: [{ci['ci_lower']:.2f}, {ci['ci_upper']:.2f}]")

test_result = analyzer.bootstrap_hypothesis_test(data_b)
print(f"p-value: {test_result['p_value']:.4f}")

Permutation Tests

import numpy as np
from itertools import permutations

class PermutationTest:
    def __init__(self, group1, group2):
        self.group1 = np.array(group1)
        self.group2 = np.array(group2)
    
    def test_statistic(self, g1, g2):
        """Difference in means"""
        return np.mean(g1) - np.mean(g2)
    
    def exact_test(self):
        """Exact permutation test (combinatorial)"""
        observed = self.test_statistic(self.group1, self.group2)
        
        combined = np.concatenate([self.group1, self.group2])
        n1 = len(self.group1)
        
        # Generate all permutations
        count_extreme = 0
        total = 0
        
        for perm in permutations(range(len(combined))):
            perm_g1 = combined[list(perm[:n1])]
            perm_g2 = combined[list(perm[n1:])]
            
            perm_stat = self.test_statistic(perm_g1, perm_g2)
            if abs(perm_stat) >= abs(observed):
                count_extreme += 1
            total += 1
        
        return {
            'observed': observed,
            'p_value': count_extreme / total
        }
    
    def approximate_test(self, n_permutations=10000):
        """Approximate permutation test (Monte Carlo)"""
        observed = self.test_statistic(self.group1, self.group2)
        
        combined = np.concatenate([self.group1, self.group2])
        n1 = len(self.group1)
        
        null_distribution = np.zeros(n_permutations)
        for i in range(n_permutations):
            perm = np.random.permutation(len(combined))
            null_distribution[i] = self.test_statistic(
                combined[perm[:n1]], combined[perm[n1:]]
            )
        
        p_value = np.mean(np.abs(null_distribution) >= np.abs(observed))
        
        return {
            'observed': observed,
            'p_value': p_value,
            'null_distribution': null_distribution
        }

# Usage
treatment = np.random.normal(10, 2, 50)
control = np.random.normal(9.5, 2, 50)

test = PermutationTest(treatment, control)
result = test.approximate_test(n_permutations=100000)
print(f"Observed difference: {result['observed']:.4f}")
print(f"p-value: {result['p_value']:.4f}")

Risk Modeling and Scenario Analysis

import numpy as np
from dataclasses import dataclass
from typing import List, Dict

@dataclass
class RiskFactor:
    name: str
    distribution: str
    params: dict
    
    def sample(self, n: int = 1) -> np.ndarray:
        if self.distribution == "normal":
            return np.random.normal(self.params['mean'], self.params['std'], n)
        elif self.distribution == "uniform":
            return np.random.uniform(self.params['low'], self.params['high'], n)
        elif self.distribution == "triangular":
            return np.random.triangular(
                self.params['left'], self.params['mode'], self.params['right'], n
            )
        elif self.distribution == "lognormal":
            return np.random.lognormal(self.params['mean'], self.params['sigma'], n)

class MonteCarloSimulator:
    def __init__(self, n_simulations: int = 10000):
        self.n_simulations = n_simulations
        self.risk_factors: List[RiskFactor] = []
    
    def add_risk_factor(self, factor: RiskFactor):
        self.risk_factors.append(factor)
    
    def simulate(self, model_fn):
        """Run Monte Carlo simulation"""
        # Sample all risk factors
        samples = {}
        for factor in self.risk_factors:
            samples[factor.name] = factor.sample(self.n_simulations)
        
        # Apply model
        results = model_fn(samples)
        
        return results
    
    def analyze_results(self, results):
        """Compute risk metrics"""
        return {
            'mean': np.mean(results),
            'std': np.std(results),
            'percentiles': {
                '5th': np.percentile(results, 5),
                '25th': np.percentile(results, 25),
                '50th': np.percentile(results, 50),
                '75th': np.percentile(results, 75),
                '95th': np.percentile(results, 95)
            },
            'var_95': np.percentile(results, 5),  # Value at Risk
            'cvar_95': np.mean(results[results <= np.percentile(results, 5)])  # CVaR
        }

# Project cost estimation
simulator = MonteCarloSimulator(n_simulations=50000)

simulator.add_risk_factor(RiskFactor("labor_cost", "triangular", 
    {"left": 80000, "mode": 100000, "right": 150000}))
simulator.add_risk_factor(RiskFactor("material_cost", "normal", 
    {"mean": 50000, "std": 10000}))
simulator.add_risk_factor(RiskFactor("duration_months", "uniform", 
    {"low": 6, "high": 12}))

def project_cost(samples):
    labor = samples['labor_cost']
    materials = samples['material_cost']
    overhead = samples['duration_months'] * 5000
    return labor + materials + overhead

results = simulator.simulate(project_cost)
analysis = simulator.analyze_results(results)

print(f"Expected cost: ${analysis['mean']:,.0f}")
print(f"95% VaR: ${analysis['var_95']:,.0f}")
print(f"95% CVaR: ${analysis['cvar_95']:,.0f}")
print(f"5th-95th percentile: ${analysis['percentiles']['5th']:,.0f} - ${analysis['percentiles']['95th']:,.0f}")

Financial Option Pricing

import numpy as np

def black_scholes_monte_carlo(S0, K, T, r, sigma, n_sims=100000, option_type='call'):
    """Price European option using Monte Carlo"""
    np.random.seed(42)
    
    # Simulate stock prices at maturity
    Z = np.random.standard_normal(n_sims)
    ST = S0 * np.exp((r - 0.5 * sigma**2) * T + sigma * np.sqrt(T) * Z)
    
    # Compute payoffs
    if option_type == 'call':
        payoffs = np.maximum(ST - K, 0)
    else:
        payoffs = np.maximum(K - ST, 0)
    
    # Discount to present
    price = np.exp(-r * T) * np.mean(payoffs)
    std_error = np.exp(-r * T) * np.std(payoffs) / np.sqrt(n_sims)
    
    return price, std_error

# Example
price, se = black_scholes_monte_carlo(
    S0=100, K=105, T=1.0, r=0.05, sigma=0.2, n_sims=1000000
)
print(f"European Call Price: ${price:.4f} ± ${se:.4f}")

# Asian option (path-dependent)
def asian_option_monte_carlo(S0, K, T, r, sigma, n_sims=100000, n_steps=252):
    dt = T / n_steps
    
    Z = np.random.standard_normal((n_sims, n_steps))
    paths = np.zeros((n_sims, n_steps + 1))
    paths[:, 0] = S0
    
    for t in range(n_steps):
        paths[:, t+1] = paths[:, t] * np.exp(
            (r - 0.5 * sigma**2) * dt + sigma * np.sqrt(dt) * Z[:, t]
        )
    
    # Average price over path
    avg_price = np.mean(paths[:, 1:], axis=1)
    
    # Payoff
    payoffs = np.maximum(avg_price - K, 0)
    
    return np.exp(-r * T) * np.mean(payoffs)

asian_price = asian_option_monte_carlo(S0=100, K=100, T=1.0, r=0.05, sigma=0.2)
print(f"Asian Call Price: ${asian_price:.4f}")

Best Practices

Use variance reduction (antithetic variates, control variates) for efficiency
Validate with analytical solutions when available
Report confidence intervals, not just point estimates
Use sufficient samples – check convergence by varying n
Seed for reproducibility in production environments