LLM Cloud Platforms: AWS, GCP, and Azure AI Services

Cloud platforms provide managed infrastructure for LLM training and inference. Understanding the tradeoffs between AWS, GCP, and Azure enables optimal platform selection for specific workload requirements.

Cloud Platform Pipeline

Cloud Provider Implementations

1. AWS SageMaker for LLMs

from dataclasses import dataclass
from typing import Dict, Optional

@dataclass
class AWSEndpointConfig:
    instance_type: str
    instance_count: int
    model_data: str
    container: str
    environment: Dict[str, str]

class AWSSageMakerLLM:
    def __init__(self, region: str = "us-east-1"):
        self.region = region
        self.endpoints: Dict[str, AWSEndpointConfig] = {}

    def create_endpoint(self, name: str, model_s3_uri: str,
                        instance_type: str = "ml.g5.12xlarge") -> Dict:
        config = AWSEndpointConfig(
            instance_type=instance_type,
            instance_count=1,
            model_data=model_s3_uri,
            container="763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi",
            environment={
                "HF_MODEL_ID": "meta-llama/Llama-2-70b-chat-hf",
                "SM_NUM_GPUS": "4"
            }
        )
        self.endpoints[name] = config
        return {"status": "creating", "endpoint_name": name}

    def get_pricing(self, instance_type: str) -> Dict:
        pricing = {
            "ml.g5.2xlarge": {"hourly": 1.21, "gpu": "A10G-24GB", "count": 1},
            "ml.g5.12xlarge": {"hourly": 7.09, "gpu": "A10G-24GB", "count": 4},
            "ml.p4d.24xlarge": {"hourly": 32.77, "gpu": "A100-40GB", "count": 8},
            "ml.trn1.32xlarge": {"hourly": 21.50, "gpu": "Trainium", "count": 16}
        }
        return pricing.get(instance_type, {"hourly": 0, "gpu": "unknown"})

    def estimate_monthly_cost(self, instance_type: str, hours_per_day: float = 8) -> Dict:
        pricing = self.get_pricing(instance_type)
        monthly = pricing["hourly"] * hours_per_day * 30
        return {
            "instance_type": instance_type,
            "hourly_cost": pricing["hourly"],
            "monthly_cost": round(monthly, 2),
            "annual_cost": round(monthly * 12, 2)
        }

2. GCP Vertex AI for LLMs

class GCPVertexAILLM:
    def __init__(self, project_id: str, region: str = "us-central1"):
        self.project_id = project_id
        self.region = region
        self.endpoints: Dict[str, Dict] = {}

    def deploy_model(self, model_id: str, machine_type: str = "a2-highgpu-8g",
                     min_replicas: int = 1, max_replicas: int = 4) -> Dict:
        endpoint_config = {
            "model_id": model_id,
            "machine_type": machine_type,
            "min_replicas": min_replicas,
            "max_replicas": max_replicas,
            "accelerator_type": "NVIDIA_TESLA_A100",
            "accelerator_count": 8
        }
        self.endpoints[model_id] = endpoint_config
        return {"status": "deploying", "endpoint": model_id}

    def get_pricing(self, machine_type: str) -> Dict:
        pricing = {
            "a2-highgpu-1g": {"hourly": 3.67, "gpu": "A100-40GB", "count": 1},
            "a2-highgpu-8g": {"hourly": 29.39, "gpu": "A100-40GB", "count": 8},
            "a2-megagpu-16g": {"hourly": 55.74, "gpu": "A100-40GB", "count": 16},
            "g2-standard-8": {"hourly": 1.89, "gpu": "L4-24GB", "count": 1}
        }
        return pricing.get(machine_type, {"hourly": 0})

    def estimate_cost(self, machine_type: str, usage_hours: float = 240) -> Dict:
        pricing = self.get_pricing(machine_type)
        return {
            "machine_type": machine_type,
            "monthly_cost": round(pricing["hourly"] * usage_hours, 2),
            "cost_per_1k_requests": round(pricing["hourly"] * 0.1, 4)
        }

3. Azure OpenAI Service

class AzureOpenAILLM:
    def __init__(self, subscription_id: str, resource_group: str):
        self.subscription_id = subscription_id
        self.resource_group = resource_group
        self.deployments: Dict[str, Dict] = {}

    def deploy_model(self, model_name: str, deployment_name: str,
                     capacity: int = 10) -> Dict:
        deployment = {
            "model_name": model_name,
            "deployment_name": deployment_name,
            "capacity": capacity,
            "sku": {"name": "Standard", "capacity": capacity}
        }
        self.deployments[deployment_name] = deployment
        return {"status": "deploying", "deployment": deployment_name}

    def get_pricing(self, model: str) -> Dict:
        pricing = {
            "gpt-4": {"input": 0.03, "output": 0.06},
            "gpt-4-turbo": {"input": 0.01, "output": 0.03},
            "gpt-35-turbo": {"input": 0.0015, "output": 0.002},
            "gpt-4o": {"input": 0.005, "output": 0.015}
        }
        return pricing.get(model, {"input": 0.001, "output": 0.002})

    def estimate_monthly_cost(self, model: str, input_tokens: int,
                               output_tokens: int) -> Dict:
        pricing = self.get_pricing(model)
        input_cost = (input_tokens / 1000) * pricing["input"]
        output_cost = (output_tokens / 1000) * pricing["output"]
        return {
            "model": model,
            "input_cost_usd": round(input_cost, 4),
            "output_cost_usd": round(output_cost, 4),
            "total_cost_usd": round(input_cost + output_cost, 4)
        }

Key Formulas

Cloud Cost Optimization

C_{optimized} = C_{on\_demand} \times (1 - S_{spot} \times R_{spot} - S_{reserved} \times R_{reserved})

Here,

$C_{on\_demand}$ =On-demand cost baseline
$S_{spot}$ =Fraction of work on spot instances
$R_{spot}$ =Spot discount rate
$S_{reserved}$ =Fraction on reserved instances
$R_{reserved}$ =Reserved discount rate

Platform Comparison

Feature	AWS SageMaker	GCP Vertex AI	Azure OpenAI
GPU Options	A10G, A100, Trainium	A100, L4, TPU	A100 (managed)
Managed Serving	Yes (TGI, DLC)	Yes (Triton)	Yes (API)
Spot Pricing	Up to 70% off	Up to 80% off	Limited
Fine-Tuning	Yes	Yes	Yes (API)
Cost (A100/hr)	$4-32 \|$ 3-29	$0.03-0.06/1K tok

Best Practices

Use reserved instances for predictable workloads (30-50% savings)
Spot instances for training (up to 70% discount)
Auto-scaling based on queue depth and latency
Multi-cloud strategy to avoid vendor lock-in
Cost monitoring with daily budget alerts

LLM Cloud Platforms: AWS, GCP, and Azure AI Services

LLM Cloud Platforms: AWS, GCP, and Azure AI Services

Cloud Platform Pipeline

Cloud Provider Implementations

1. AWS SageMaker for LLMs

2. GCP Vertex AI for LLMs

3. Azure OpenAI Service

Key Formulas

Cloud Cost Optimization

Platform Comparison

Best Practices

Premium Content

Need Expert AI Ops & LLM Ops Help?