Production ML Checklist
Difficulty: Senior Level | Companies: Google, Meta, Netflix, Uber, Stripe
Production Readiness
Use this checklist before deploying any ML model to production.
βΉοΈ
Google's ML launch checklist prevents 95% of production incidents through systematic validation.
Pre-Launch Checklist
# production_checklist.py
from typing import Dict, List, Optional
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
import json
class CheckStatus(Enum):
PASSED = "passed"
FAILED = "failed"
SKIPPED = "skipped"
PENDING = "pending"
@dataclass
class ChecklistItem:
name: str
category: str
status: CheckStatus
description: str
owner: Optional[str] = None
notes: Optional[str] = None
@dataclass
class ProductionChecklist:
model_name: str
version: str
items: List[ChecklistItem] = field(default_factory=list)
created_at: str = field(default_factory=lambda: datetime.now().isoformat())
class ProductionReadinessChecker:
def __init__(self):
self.checklists: List[ProductionChecklist] = []
def create_checklist(self, model_name: str, version: str) -> ProductionChecklist:
checklist = ProductionChecklist(model_name=model_name, version=version)
checklist.items = [
ChecklistItem("Model Performance", "Model Quality", CheckStatus.PENDING, "Accuracy meets threshold"),
ChecklistItem("Model Fairness", "Model Quality", CheckStatus.PENDING, "No bias detected"),
ChecklistItem("Latency SLA", "Performance", CheckStatus.PENDING, "P99 < 100ms"),
ChecklistItem("Throughput", "Performance", CheckStatus.PENDING, "Handles expected RPS"),
ChecklistItem("Data Validation", "Data", CheckStatus.PENDING, "Schema validation passes"),
ChecklistItem("Feature Store", "Data", CheckStatus.PENDING, "Features available"),
ChecklistItem("Model Registry", "MLOps", CheckStatus.PENDING, "Model registered"),
ChecklistItem("Monitoring", "MLOps", CheckStatus.PENDING, "Metrics configured"),
ChecklistItem("Alerting", "MLOps", CheckStatus.PENDING, "Alerts configured"),
ChecklistItem("Rollback Plan", "Operations", CheckStatus.PENDING, "Rollback tested"),
ChecklistItem("Load Testing", "Testing", CheckStatus.PENDING, "Load test passed"),
ChecklistItem("Security Scan", "Security", CheckStatus.PENDING, "No vulnerabilities"),
]
self.checklists.append(checklist)
return checklist
def update_item(self, checklist: ProductionChecklist, item_name: str, status: CheckStatus, notes: str = ""):
for item in checklist.items:
if item.name == item_name:
item.status = status
item.notes = notes
break
def get_readiness_score(self, checklist: ProductionChecklist) -> float:
passed = sum(1 for item in checklist.items if item.status == CheckStatus.PASSED)
return passed / len(checklist.items) if checklist.items else 0
def is_ready_for_launch(self, checklist: ProductionChecklist) -> bool:
return self.get_readiness_score(checklist) >= 0.9
def generate_report(self, checklist: ProductionChecklist) -> str:
report = [
f"Production Readiness Report: {checklist.model_name} v{checklist.version}",
"=" * 60,
f"Readiness Score: {self.get_readiness_score(checklist):.1%}",
""
]
categories = {}
for item in checklist.items:
if item.category not in categories:
categories[item.category] = []
categories[item.category].append(item)
for category, items in categories.items():
report.append(f"\n{category}:")
for item in items:
status_icon = {"passed": "PASS", "failed": "FAIL", "pending": "TODO", "skipped": "SKIP"}
report.append(f" [{status_icon[item.status.value]}] {item.name}")
if item.notes:
report.append(f" {item.notes}")
report.append(f"\nReady for Launch: {'YES' if self.is_ready_for_launch(checklist) else 'NO'}")
return "\n".join(report)
# Usage
checker = ProductionReadinessChecker()
checklist = checker.create_checklist("churn-predictor", "2.0.0")
checker.update_item(checklist, "Model Performance", CheckStatus.PASSED, "Accuracy: 0.92")
checker.update_item(checklist, "Model Fairness", CheckStatus.PASSED, "No bias detected")
checker.update_item(checklist, "Latency SLA", CheckStatus.PASSED, "P99: 45ms")
checker.update_item(checklist, "Data Validation", CheckStatus.PASSED, "All checks passed")
checker.update_item(checklist, "Monitoring", CheckStatus.PASSED, "Dashboards configured")
print(checker.generate_report(checklist))
print(f"\nReady: {checker.is_ready_for_launch(checklist)}")
Deployment Validation
# deployment_validation.py
import requests
import time
import json
from typing import Dict, List
from dataclasses import dataclass
@dataclass
class ValidationResult:
endpoint: str
status: str
latency_ms: float
response_valid: bool
error: str = ""
class DeploymentValidator:
def __init__(self, base_url: str):
self.base_url = base_url
self.results: List[ValidationResult] = []
def validate_health(self) -> ValidationResult:
try:
start = time.time()
response = requests.get(f"{self.base_url}/health", timeout=5)
latency = (time.time() - start) * 1000
result = ValidationResult(
endpoint="/health",
status="passed" if response.status_code == 200 else "failed",
latency_ms=latency,
response_valid=response.status_code == 200
)
except Exception as e:
result = ValidationResult(
endpoint="/health",
status="failed",
latency_ms=0,
response_valid=False,
error=str(e)
)
self.results.append(result)
return result
def validate_prediction(self, test_input: Dict) -> ValidationResult:
try:
start = time.time()
response = requests.post(
f"{self.base_url}/predict",
json=test_input,
timeout=10
)
latency = (time.time() - start) * 1000
is_valid = (
response.status_code == 200 and
"prediction" in response.json()
)
result = ValidationResult(
endpoint="/predict",
status="passed" if is_valid else "failed",
latency_ms=latency,
response_valid=is_valid
)
except Exception as e:
result = ValidationResult(
endpoint="/predict",
status="failed",
latency_ms=0,
response_valid=False,
error=str(e)
)
self.results.append(result)
return result
def run_load_test(self, endpoint: str, payload: Dict, num_requests: int = 100) -> Dict:
latencies = []
errors = 0
for _ in range(num_requests):
try:
start = time.time()
response = requests.post(
f"{self.base_url}{endpoint}",
json=payload,
timeout=10
)
latency = (time.time() - start) * 1000
latencies.append(latency)
if response.status_code != 200:
errors += 1
except Exception:
errors += 1
latencies.sort()
return {
"total_requests": num_requests,
"errors": errors,
"error_rate": errors / num_requests,
"avg_latency_ms": sum(latencies) / len(latencies) if latencies else 0,
"p50_latency_ms": latencies[len(latencies) // 2] if latencies else 0,
"p95_latency_ms": latencies[int(len(latencies) * 0.95)] if latencies else 0,
"p99_latency_ms": latencies[int(len(latencies) * 0.99)] if latencies else 0,
}
def generate_report(self) -> str:
report = ["Deployment Validation Report", "=" * 40]
for result in self.results:
report.append(f"[{result.status.upper()}] {result.endpoint}: {result.latency_ms:.1f}ms")
passed = sum(1 for r in self.results if r.status == "passed")
report.append(f"\nTotal: {passed}/{len(self.results)} checks passed")
return "\n".join(report)
# Usage
validator = DeploymentValidator("http://localhost:8000")
validator.validate_health()
validator.validate_prediction({"features": [1.0, 2.0, 3.0]})
load_results = validator.run_load_test("/predict", {"features": [1.0, 2.0, 3.0]}, 50)
print(f"Load test: {load_results['p99_latency_ms']:.1f}ms P99")
Follow-Up Questions
- How do you implement gradual rollouts for ML models?
- What monitoring dashboards are essential for production ML?
- How would you handle emergency model rollback?
- What documentation is required for production ML systems?