Fairness and Bias Audit
Machine learning models can perpetuate and amplify societal biases. A hiring model might discriminate against women. A lending model might deny loans to minorities. Fairness auditing detects and mitigates these biases, ensuring equitable outcomes.
Fairness Metrics Comparison
Why Fairness Matters
Biased models harm individuals and expose organizations to legal liability. The EU AI Act, NYC Local Law 144, and other regulations now require bias audits. Fairness isn't optional – it's a legal and ethical requirement.
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import (
confusion_matrix, classification_report,
roc_auc_score
)
import warnings
warnings.filterwarnings('ignore')
Generate Biased Dataset
np.random.seed(42)
n = 5000
# Sensitive attributes
gender = np.random.binomial(1, 0.5, n) # 0=female, 1=male
race = np.random.choice([0, 1, 2], n, p=[0.6, 0.25, 0.15]) # 0=white, 1=black, 2=hispanic
# Features (some correlated with sensitive attributes due to historical bias)
income = np.where(gender == 1,
np.random.lognormal(10.5, 0.6, n),
np.random.lognormal(10.0, 0.6, n))
income *= np.where(race == 0, 1.1, np.where(race == 1, 0.85, 0.9))
credit_score = np.where(gender == 1,
np.random.normal(680, 70, n),
np.random.normal(650, 70, n))
credit_score += np.where(race == 0, 20, np.where(race == 1, -30, -15))
education = np.random.choice([0, 1, 2], n, p=[0.3, 0.5, 0.2])
employment_years = np.random.exponential(5, n)
# Target (with bias in historical data)
log_odds = (
-3
+ 0.5 * (income > 50000).astype(int)
+ 0.8 * (credit_score > 700).astype(int)
+ 0.3 * education
+ 0.1 * employment_years
- 0.2 * gender # Gender bias in historical labels
- 0.3 * (race == 1).astype(int) # Racial bias
)
prob = 1 / (1 + np.exp(-log_odds))
approved = np.random.binomial(1, prob)
df = pd.DataFrame({
'gender': gender,
'race': race,
'income': income,
'credit_score': credit_score,
'education': education,
'employment_years': employment_years,
'approved': approved
})
print(f"Dataset: {n} samples")
print(f"Approval rate by gender: {df.groupby('gender')['approved'].mean().to_dict()}")
print(f"Approval rate by race: {df.groupby('race')['approved'].mean().to_dict()}")
Disparate Impact Analysis
def disparate_impact_ratio(df, outcome_col, protected_col, privileged_val, unprivileged_val):
"""Calculate disparate impact ratio."""
priv_rate = df[df[protected_col] == privileged_val][outcome_col].mean()
unpriv_rate = df[df[protected_col] == unprivileged_val][outcome_col].mean()
ratio = unpriv_rate / priv_rate if priv_rate > 0 else float('inf')
return {
'privileged_rate': priv_rate,
'unprivileged_rate': unpriv_rate,
'ratio': ratio,
'passes_80_percent': ratio >= 0.8
}
# Gender disparate impact
di_gender = disparate_impact_ratio(df, 'approved', 'gender', 1, 0)
print(f"Disparate Impact (Gender):")
print(f" Male approval: {di_gender['privileged_rate']:.3f}")
print(f" Female approval: {di_gender['unprivileged_rate']:.3f}")
print(f" Ratio: {di_gender['ratio']:.3f} (≈¥0.8 = passes)")
print(f" 80% rule: {'PASS' if di_gender['passes_80_percent'] else 'FAIL'}")
# Race disparate impact
for race_val, race_name in [(1, 'Black'), (2, 'Hispanic')]:
di_race = disparate_impact_ratio(df, 'approved', 'race', 0, race_val)
print(f"\nDisparate Impact (White vs {race_name}):")
print(f" Ratio: {di_race['ratio']:.3f}")
print(f" 80% rule: {'PASS' if di_race['passes_80_percent'] else 'FAIL'}")
Demographic Parity
def demographic_parity(y_pred, sensitive_attr):
"""Check if predictions are independent of sensitive attribute."""
groups = np.unique(sensitive_attr)
rates = {}
for g in groups:
mask = sensitive_attr == g
rates[g] = y_pred[mask].mean()
# Statistical parity difference
max_diff = max(rates.values()) - min(rates.values())
return rates, max_diff
# Train model
features = ['income', 'credit_score', 'education', 'employment_years']
X = df[features]
y = df['approved']
X_train, X_test, y_train, y_test, gender_train, gender_test, race_train, race_test = \
train_test_split(X, y, df['gender'], df['race'], test_size=0.2, random_state=42)
model = GradientBoostingClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Demographic parity
rates_gender, diff_gender = demographic_parity(y_pred, gender_test.values)
print(f"Demographic Parity (Gender):")
print(f" Rates: {rates_gender}")
print(f" Difference: {diff_gender:.4f}")
rates_race, diff_race = demographic_parity(y_pred, race_test.values)
print(f"\nDemographic Parity (Race):")
print(f" Rates: {rates_race}")
print(f" Difference: {diff_race:.4f}")
Equalized Odds
def equalized_odds(y_true, y_pred, sensitive_attr):
"""Check equal TPR and FPR across groups."""
groups = np.unique(sensitive_attr)
metrics = {}
for g in groups:
mask = sensitive_attr == g
y_t = y_true[mask]
y_p = y_pred[mask]
cm = confusion_matrix(y_t, y_p)
tn, fp, fn, tp = cm.ravel()
tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
metrics[g] = {'TPR': tpr, 'FPR': fpr}
# Equalized odds difference
tpr_diff = max(m['TPR'] for m in metrics.values()) - min(m['TPR'] for m in metrics.values())
fpr_diff = max(m['FPR'] for m in metrics.values()) - min(m['FPR'] for m in metrics.values())
return metrics, tpr_diff, fpr_diff
eq_metrics, tpr_diff, fpr_diff = equalized_odds(y_test.values, y_pred, gender_test.values)
print("Equalized Odds (Gender):")
for g, m in eq_metrics.items():
label = "Male" if g == 1 else "Female"
print(f" {label}: TPR={m['TPR']:.3f}, FPR={m['FPR']:.3f}")
print(f" TPR difference: {tpr_diff:.4f}")
print(f" FPR difference: {fpr_diff:.4f}")
Calibration Across Groups
def calibration_by_group(y_true, y_prob, sensitive_attr, n_bins=10):
"""Check if predicted probabilities are calibrated across groups."""
groups = np.unique(sensitive_attr)
for g in groups:
mask = sensitive_attr == g
y_t = y_true[mask]
y_p = y_prob[mask]
bins = np.linspace(0, 1, n_bins + 1)
calibrations = []
for i in range(n_bins):
bin_mask = (y_p >= bins[i]) & (y_p < bins[i+1])
if bin_mask.sum() > 0:
observed = y_t[bin_mask].mean()
expected = (bins[i] + bins[i+1]) / 2
calibrations.append((expected, observed))
label = "Male" if g == 1 else "Female"
print(f"\n{label} calibration:")
for expected, observed in calibrations[:5]:
print(f" Expected: {expected:.2f}, Observed: {observed:.2f}")
y_prob = model.predict_proba(X_test)[:, 1]
calibration_by_group(y_test.values, y_prob, gender_test.values)
Bias Mitigation Strategies
Pre-processing: Reweighting
def compute_reweights(df, protected_col, outcome_col):
"""Compute sample weights for demographic parity."""
groups = df[protected_col].unique()
outcomes = df[outcome_col].unique()
weights = np.ones(len(df))
for g in groups:
for o in outcomes:
# Observed proportion
observed = ((df[protected_col] == g) & (df[outcome_col] == o)).mean()
# Expected proportion under fairness
expected = (df[protected_col] == g).mean() * (df[outcome_col] == o).mean()
if observed > 0:
weights[(df[protected_col] == g) & (df[outcome_col] == o)] = expected / observed
return weights
# Apply reweighting
weights = compute_reweights(df, 'gender', 'approved')
model_reweighted = GradientBoostingClassifier(n_estimators=100, random_state=42)
model_reweighted.fit(X_train, y_train, sample_weight=weights[:len(X_train)])
y_pred_reweighted = model_reweighted.predict(X_test)
rates_before, diff_before = demographic_parity(y_pred, gender_test.values)
rates_after, diff_after = demographic_parity(y_pred_reweighted, gender_test.values)
print(f"Before reweighting: demographic parity diff = {diff_before:.4f}")
print(f"After reweighting: demographic parity diff = {diff_after:.4f}")
In-processing: Adversarial Debiasing
import torch
import torch.nn as nn
class AdversarialDebiasing(nn.Module):
"""Adversarial network to remove sensitive information."""
def __init__(self, input_dim, hidden_dim=64):
super().__init__()
# Predictor (task network)
self.predictor = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, 1),
nn.Sigmoid()
)
# Adversary (tries to predict sensitive attribute)
self.adversary = nn.Sequential(
nn.Linear(1, hidden_dim // 2),
nn.ReLU(),
nn.Linear(hidden_dim // 2, 1),
nn.Sigmoid()
)
def forward(self, x, lambda_adv=1.0):
# Task prediction
pred = self.predictor(x)
# Adversary prediction (should fail to predict sensitive attribute)
adv_pred = self.adversary(pred)
return pred, adv_pred
def train_step(self, x, y_true, sensitive, optimizer_pred, optimizer_adv, lambda_adv=1.0):
# Train adversary
pred = self.predictor(x).detach()
adv_pred = self.adversary(pred)
adv_loss = nn.BCELoss()(adv_pred, sensitive)
optimizer_adv.zero_grad()
adv_loss.backward()
optimizer_adv.step()
# Train predictor (maximize task loss, minimize adversary loss)
pred = self.predictor(x)
task_loss = nn.BCELoss()(pred, y_true)
adv_pred = self.adversary(pred)
adv_loss_pred = nn.BCELoss()(adv_pred, sensitive)
total_loss = task_loss - lambda_adv * adv_loss_pred
optimizer_pred.zero_grad()
total_loss.backward()
optimizer_pred.step()
return task_loss.item(), adv_loss.item()
# Example usage
model_adv = AdversarialDebiasing(input_dim=len(features))
print("Adversarial debiasing model initialized")
print(f"Predictor: {model_adv.predictor}")
print(f"Adversary: {model_adv.adversary}")
Post-processing: Threshold Adjustment
def equalized_odds_threshold(y_true, y_prob, sensitive_attr, n_thresholds=100):
"""Find threshold that equalizes TPR and FPR across groups."""
groups = np.unique(sensitive_attr)
best_threshold = 0.5
best_diff = float('inf')
for threshold in np.linspace(0, 1, n_thresholds):
y_pred = (y_prob >= threshold).astype(int)
metrics = {}
for g in groups:
mask = sensitive_attr == g
cm = confusion_matrix(y_true[mask], y_pred[mask])
tn, fp, fn, tp = cm.ravel()
tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
metrics[g] = {'TPR': tpr, 'FPR': fpr}
# Equalize TPR
tpr_values = [m['TPR'] for m in metrics.values()]
tpr_diff = max(tpr_values) - min(tpr_values)
if tpr_diff < best_diff:
best_diff = tpr_diff
best_threshold = threshold
return best_threshold, best_diff
optimal_threshold, diff = equalized_odds_threshold(
y_test.values, y_prob, gender_test.values
)
print(f"Optimal threshold: {optimal_threshold:.3f}")
print(f"TPR difference at threshold: {diff:.4f}")
# Apply threshold
y_pred_fair = (y_prob >= optimal_threshold).astype(int)
rates_fair, diff_fair = demographic_parity(y_pred_fair, gender_test.values)
print(f"Demographic parity after threshold: {diff_fair:.4f}")
Fairness Metrics Summary
def comprehensive_fairness_report(y_true, y_pred, y_prob, sensitive_attr, group_names=None):
"""Generate comprehensive fairness report."""
if group_names is None:
group_names = {g: f"Group {g}" for g in np.unique(sensitive_attr)}
print("=" * 60)
print("FAIRNESS AUDIT REPORT")
print("=" * 60)
# Overall metrics
print(f"\nOverall Accuracy: {(y_pred == y_true).mean():.4f}")
print(f"Overall AUC: {roc_auc_score(y_true, y_prob):.4f}")
# Per-group metrics
print(f"\nPer-Group Metrics:")
for g in np.unique(sensitive_attr):
mask = sensitive_attr == g
name = group_names.get(g, f"Group {g}")
print(f"\n {name}:")
print(f" Size: {mask.sum()}")
print(f" Approval rate: {y_pred[mask].mean():.3f}")
print(f" Accuracy: {(y_pred[mask] == y_true[mask]).mean():.3f}")
# Disparate impact
rates, diff = demographic_parity(y_pred, sensitive_attr)
print(f"\nDemographic Parity Difference: {diff:.4f}")
# Equalized odds
eq_metrics, tpr_diff, fpr_diff = equalized_odds(y_true, y_pred, sensitive_attr)
print(f"Equalized Odds TPR Difference: {tpr_diff:.4f}")
print(f"Equalized Odds FPR Difference: {fpr_diff:.4f}")
print("=" * 60)
comprehensive_fairness_report(
y_test.values, y_pred, y_prob, gender_test.values,
group_names={0: 'Female', 1: 'Male'}
)
Best Practices
- Audit before deployment – test for bias in held-out data
- Use multiple fairness metrics – no single metric captures all fairness
- Document tradeoffs – improving one metric may worsen another
- Monitor in production – bias can emerge from data drift
- Involve domain experts – statistical fairness ≈ contextual fairness
- Legal compliance – check local regulations (EU AI Act, NYC LL144)
Summary
Fairness auditing is essential for responsible ML. Demographic parity, equalized odds, and calibration checks detect bias. Mitigation strategies include reweighting, adversarial debiasing, and threshold adjustment. Master these techniques to build models that are both accurate and equitable.