Fairness and Bias Audit

Machine learning models can perpetuate and amplify societal biases. A hiring model might discriminate against women. A lending model might deny loans to minorities. Fairness auditing detects and mitigates these biases, ensuring equitable outcomes.

Fairness Metrics Comparison

Why Fairness Matters

Biased models harm individuals and expose organizations to legal liability. The EU AI Act, NYC Local Law 144, and other regulations now require bias audits. Fairness isn't optional – it's a legal and ethical requirement.

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import (
    confusion_matrix, classification_report,
    roc_auc_score
)
import warnings
warnings.filterwarnings('ignore')

Generate Biased Dataset

np.random.seed(42)
n = 5000

# Sensitive attributes
gender = np.random.binomial(1, 0.5, n)  # 0=female, 1=male
race = np.random.choice([0, 1, 2], n, p=[0.6, 0.25, 0.15])  # 0=white, 1=black, 2=hispanic

# Features (some correlated with sensitive attributes due to historical bias)
income = np.where(gender == 1, 
                  np.random.lognormal(10.5, 0.6, n),
                  np.random.lognormal(10.0, 0.6, n))
income *= np.where(race == 0, 1.1, np.where(race == 1, 0.85, 0.9))

credit_score = np.where(gender == 1,
                        np.random.normal(680, 70, n),
                        np.random.normal(650, 70, n))
credit_score += np.where(race == 0, 20, np.where(race == 1, -30, -15))

education = np.random.choice([0, 1, 2], n, p=[0.3, 0.5, 0.2])
employment_years = np.random.exponential(5, n)

# Target (with bias in historical data)
log_odds = (
    -3
    + 0.5 * (income > 50000).astype(int)
    + 0.8 * (credit_score > 700).astype(int)
    + 0.3 * education
    + 0.1 * employment_years
    - 0.2 * gender  # Gender bias in historical labels
    - 0.3 * (race == 1).astype(int)  # Racial bias
)
prob = 1 / (1 + np.exp(-log_odds))
approved = np.random.binomial(1, prob)

df = pd.DataFrame({
    'gender': gender,
    'race': race,
    'income': income,
    'credit_score': credit_score,
    'education': education,
    'employment_years': employment_years,
    'approved': approved
})

print(f"Dataset: {n} samples")
print(f"Approval rate by gender: {df.groupby('gender')['approved'].mean().to_dict()}")
print(f"Approval rate by race: {df.groupby('race')['approved'].mean().to_dict()}")

Disparate Impact Analysis

def disparate_impact_ratio(df, outcome_col, protected_col, privileged_val, unprivileged_val):
    """Calculate disparate impact ratio."""
    priv_rate = df[df[protected_col] == privileged_val][outcome_col].mean()
    unpriv_rate = df[df[protected_col] == unprivileged_val][outcome_col].mean()
    
    ratio = unpriv_rate / priv_rate if priv_rate > 0 else float('inf')
    return {
        'privileged_rate': priv_rate,
        'unprivileged_rate': unpriv_rate,
        'ratio': ratio,
        'passes_80_percent': ratio >= 0.8
    }

# Gender disparate impact
di_gender = disparate_impact_ratio(df, 'approved', 'gender', 1, 0)
print(f"Disparate Impact (Gender):")
print(f"  Male approval: {di_gender['privileged_rate']:.3f}")
print(f"  Female approval: {di_gender['unprivileged_rate']:.3f}")
print(f"  Ratio: {di_gender['ratio']:.3f} (≈¥0.8 = passes)")
print(f"  80% rule: {'PASS' if di_gender['passes_80_percent'] else 'FAIL'}")

# Race disparate impact
for race_val, race_name in [(1, 'Black'), (2, 'Hispanic')]:
    di_race = disparate_impact_ratio(df, 'approved', 'race', 0, race_val)
    print(f"\nDisparate Impact (White vs {race_name}):")
    print(f"  Ratio: {di_race['ratio']:.3f}")
    print(f"  80% rule: {'PASS' if di_race['passes_80_percent'] else 'FAIL'}")

Demographic Parity

def demographic_parity(y_pred, sensitive_attr):
    """Check if predictions are independent of sensitive attribute."""
    groups = np.unique(sensitive_attr)
    rates = {}
    
    for g in groups:
        mask = sensitive_attr == g
        rates[g] = y_pred[mask].mean()
    
    # Statistical parity difference
    max_diff = max(rates.values()) - min(rates.values())
    
    return rates, max_diff

# Train model
features = ['income', 'credit_score', 'education', 'employment_years']
X = df[features]
y = df['approved']

X_train, X_test, y_train, y_test, gender_train, gender_test, race_train, race_test = \
    train_test_split(X, y, df['gender'], df['race'], test_size=0.2, random_state=42)

model = GradientBoostingClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Demographic parity
rates_gender, diff_gender = demographic_parity(y_pred, gender_test.values)
print(f"Demographic Parity (Gender):")
print(f"  Rates: {rates_gender}")
print(f"  Difference: {diff_gender:.4f}")

rates_race, diff_race = demographic_parity(y_pred, race_test.values)
print(f"\nDemographic Parity (Race):")
print(f"  Rates: {rates_race}")
print(f"  Difference: {diff_race:.4f}")

Equalized Odds

def equalized_odds(y_true, y_pred, sensitive_attr):
    """Check equal TPR and FPR across groups."""
    groups = np.unique(sensitive_attr)
    metrics = {}
    
    for g in groups:
        mask = sensitive_attr == g
        y_t = y_true[mask]
        y_p = y_pred[mask]
        
        cm = confusion_matrix(y_t, y_p)
        tn, fp, fn, tp = cm.ravel()
        
        tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
        fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
        
        metrics[g] = {'TPR': tpr, 'FPR': fpr}
    
    # Equalized odds difference
    tpr_diff = max(m['TPR'] for m in metrics.values()) - min(m['TPR'] for m in metrics.values())
    fpr_diff = max(m['FPR'] for m in metrics.values()) - min(m['FPR'] for m in metrics.values())
    
    return metrics, tpr_diff, fpr_diff

eq_metrics, tpr_diff, fpr_diff = equalized_odds(y_test.values, y_pred, gender_test.values)
print("Equalized Odds (Gender):")
for g, m in eq_metrics.items():
    label = "Male" if g == 1 else "Female"
    print(f"  {label}: TPR={m['TPR']:.3f}, FPR={m['FPR']:.3f}")
print(f"  TPR difference: {tpr_diff:.4f}")
print(f"  FPR difference: {fpr_diff:.4f}")

Calibration Across Groups

def calibration_by_group(y_true, y_prob, sensitive_attr, n_bins=10):
    """Check if predicted probabilities are calibrated across groups."""
    groups = np.unique(sensitive_attr)
    
    for g in groups:
        mask = sensitive_attr == g
        y_t = y_true[mask]
        y_p = y_prob[mask]
        
        bins = np.linspace(0, 1, n_bins + 1)
        calibrations = []
        
        for i in range(n_bins):
            bin_mask = (y_p >= bins[i]) & (y_p < bins[i+1])
            if bin_mask.sum() > 0:
                observed = y_t[bin_mask].mean()
                expected = (bins[i] + bins[i+1]) / 2
                calibrations.append((expected, observed))
        
        label = "Male" if g == 1 else "Female"
        print(f"\n{label} calibration:")
        for expected, observed in calibrations[:5]:
            print(f"  Expected: {expected:.2f}, Observed: {observed:.2f}")

y_prob = model.predict_proba(X_test)[:, 1]
calibration_by_group(y_test.values, y_prob, gender_test.values)

Bias Mitigation Strategies

Pre-processing: Reweighting

def compute_reweights(df, protected_col, outcome_col):
    """Compute sample weights for demographic parity."""
    groups = df[protected_col].unique()
    outcomes = df[outcome_col].unique()
    
    weights = np.ones(len(df))
    
    for g in groups:
        for o in outcomes:
            # Observed proportion
            observed = ((df[protected_col] == g) & (df[outcome_col] == o)).mean()
            
            # Expected proportion under fairness
            expected = (df[protected_col] == g).mean() * (df[outcome_col] == o).mean()
            
            if observed > 0:
                weights[(df[protected_col] == g) & (df[outcome_col] == o)] = expected / observed
    
    return weights

# Apply reweighting
weights = compute_reweights(df, 'gender', 'approved')
model_reweighted = GradientBoostingClassifier(n_estimators=100, random_state=42)
model_reweighted.fit(X_train, y_train, sample_weight=weights[:len(X_train)])

y_pred_reweighted = model_reweighted.predict(X_test)
rates_before, diff_before = demographic_parity(y_pred, gender_test.values)
rates_after, diff_after = demographic_parity(y_pred_reweighted, gender_test.values)

print(f"Before reweighting: demographic parity diff = {diff_before:.4f}")
print(f"After reweighting:  demographic parity diff = {diff_after:.4f}")

In-processing: Adversarial Debiasing

import torch
import torch.nn as nn

class AdversarialDebiasing(nn.Module):
    """Adversarial network to remove sensitive information."""
    
    def __init__(self, input_dim, hidden_dim=64):
        super().__init__()
        
        # Predictor (task network)
        self.predictor = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()
        )
        
        # Adversary (tries to predict sensitive attribute)
        self.adversary = nn.Sequential(
            nn.Linear(1, hidden_dim // 2),
            nn.ReLU(),
            nn.Linear(hidden_dim // 2, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x, lambda_adv=1.0):
        # Task prediction
        pred = self.predictor(x)
        
        # Adversary prediction (should fail to predict sensitive attribute)
        adv_pred = self.adversary(pred)
        
        return pred, adv_pred
    
    def train_step(self, x, y_true, sensitive, optimizer_pred, optimizer_adv, lambda_adv=1.0):
        # Train adversary
        pred = self.predictor(x).detach()
        adv_pred = self.adversary(pred)
        adv_loss = nn.BCELoss()(adv_pred, sensitive)
        
        optimizer_adv.zero_grad()
        adv_loss.backward()
        optimizer_adv.step()
        
        # Train predictor (maximize task loss, minimize adversary loss)
        pred = self.predictor(x)
        task_loss = nn.BCELoss()(pred, y_true)
        
        adv_pred = self.adversary(pred)
        adv_loss_pred = nn.BCELoss()(adv_pred, sensitive)
        
        total_loss = task_loss - lambda_adv * adv_loss_pred
        
        optimizer_pred.zero_grad()
        total_loss.backward()
        optimizer_pred.step()
        
        return task_loss.item(), adv_loss.item()

# Example usage
model_adv = AdversarialDebiasing(input_dim=len(features))
print("Adversarial debiasing model initialized")
print(f"Predictor: {model_adv.predictor}")
print(f"Adversary: {model_adv.adversary}")

Post-processing: Threshold Adjustment

def equalized_odds_threshold(y_true, y_prob, sensitive_attr, n_thresholds=100):
    """Find threshold that equalizes TPR and FPR across groups."""
    groups = np.unique(sensitive_attr)
    best_threshold = 0.5
    best_diff = float('inf')
    
    for threshold in np.linspace(0, 1, n_thresholds):
        y_pred = (y_prob >= threshold).astype(int)
        
        metrics = {}
        for g in groups:
            mask = sensitive_attr == g
            cm = confusion_matrix(y_true[mask], y_pred[mask])
            tn, fp, fn, tp = cm.ravel()
            tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
            fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
            metrics[g] = {'TPR': tpr, 'FPR': fpr}
        
        # Equalize TPR
        tpr_values = [m['TPR'] for m in metrics.values()]
        tpr_diff = max(tpr_values) - min(tpr_values)
        
        if tpr_diff < best_diff:
            best_diff = tpr_diff
            best_threshold = threshold
    
    return best_threshold, best_diff

optimal_threshold, diff = equalized_odds_threshold(
    y_test.values, y_prob, gender_test.values
)
print(f"Optimal threshold: {optimal_threshold:.3f}")
print(f"TPR difference at threshold: {diff:.4f}")

# Apply threshold
y_pred_fair = (y_prob >= optimal_threshold).astype(int)
rates_fair, diff_fair = demographic_parity(y_pred_fair, gender_test.values)
print(f"Demographic parity after threshold: {diff_fair:.4f}")

Fairness Metrics Summary

def comprehensive_fairness_report(y_true, y_pred, y_prob, sensitive_attr, group_names=None):
    """Generate comprehensive fairness report."""
    if group_names is None:
        group_names = {g: f"Group {g}" for g in np.unique(sensitive_attr)}
    
    print("=" * 60)
    print("FAIRNESS AUDIT REPORT")
    print("=" * 60)
    
    # Overall metrics
    print(f"\nOverall Accuracy: {(y_pred == y_true).mean():.4f}")
    print(f"Overall AUC: {roc_auc_score(y_true, y_prob):.4f}")
    
    # Per-group metrics
    print(f"\nPer-Group Metrics:")
    for g in np.unique(sensitive_attr):
        mask = sensitive_attr == g
        name = group_names.get(g, f"Group {g}")
        print(f"\n  {name}:")
        print(f"    Size: {mask.sum()}")
        print(f"    Approval rate: {y_pred[mask].mean():.3f}")
        print(f"    Accuracy: {(y_pred[mask] == y_true[mask]).mean():.3f}")
    
    # Disparate impact
    rates, diff = demographic_parity(y_pred, sensitive_attr)
    print(f"\nDemographic Parity Difference: {diff:.4f}")
    
    # Equalized odds
    eq_metrics, tpr_diff, fpr_diff = equalized_odds(y_true, y_pred, sensitive_attr)
    print(f"Equalized Odds TPR Difference: {tpr_diff:.4f}")
    print(f"Equalized Odds FPR Difference: {fpr_diff:.4f}")
    
    print("=" * 60)

comprehensive_fairness_report(
    y_test.values, y_pred, y_prob, gender_test.values,
    group_names={0: 'Female', 1: 'Male'}
)

Best Practices

Audit before deployment – test for bias in held-out data
Use multiple fairness metrics – no single metric captures all fairness
Document tradeoffs – improving one metric may worsen another
Monitor in production – bias can emerge from data drift
Involve domain experts – statistical fairness ≈ contextual fairness
Legal compliance – check local regulations (EU AI Act, NYC LL144)

Summary

Fairness auditing is essential for responsible ML. Demographic parity, equalized odds, and calibration checks detect bias. Mitigation strategies include reweighting, adversarial debiasing, and threshold adjustment. Master these techniques to build models that are both accurate and equitable.