πŸŽ‰ 75% of content is free forever β€” Unlock Premium from $10/mo β†’
CW
Search courses…
πŸ’Ό Servicesℹ️ Aboutβœ‰οΈ ContactView Pricing Plansfrom $10

LLM Agent Operations: Infrastructure and Tool Calling

Advanced LLMOpsLLM Agents Operations🟒 Free Lesson

Advertisement

LLM Agent Operations: Infrastructure and Tool Calling

LLM agents extend language models with tool use, memory, and planning capabilities. Operating these systems in production requires robust infrastructure for tool execution, state management, and error handling.

Agent Architecture

Tool Calling Infrastructure

1. Tool Registry and Executor

import json
import time
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional
from enum import Enum

class ToolStatus(Enum):
    AVAILABLE = "available"
    UNAVAILABLE = "unavailable"
    RATE_LIMITED = "rate_limited"

@dataclass
class ToolDefinition:
    name: str
    description: str
    parameters: Dict[str, Any]
    function: Callable
    status: ToolStatus = ToolStatus.AVAILABLE
    call_count: int = 0
    error_count: int = 0
    avg_latency_ms: float = 0.0

    def to_schema(self) -> Dict:
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.description,
                "parameters": self.parameters
            }
        }

class ToolRegistry:
    def __init__(self):
        self.tools: Dict[str, ToolDefinition] = {}

    def register(self, name: str, description: str, parameters: Dict,
                 function: Callable) -> ToolDefinition:
        tool = ToolDefinition(
            name=name,
            description=description,
            parameters=parameters,
            function=function
        )
        self.tools[name] = tool
        return tool

    def get_available_tools(self) -> List[Dict]:
        return [
            t.to_schema() for t in self.tools.values()
            if t.status == ToolStatus.AVAILABLE
        ]

    def execute(self, tool_name: str, arguments: Dict[str, Any]) -> Dict:
        tool = self.tools.get(tool_name)
        if not tool:
            return {"error": f"Tool '{tool_name}' not found"}
        if tool.status != ToolStatus.AVAILABLE:
            return {"error": f"Tool '{tool_name}' is {tool.status.value}"}
        start = time.time()
        try:
            result = tool.function(**arguments)
            latency = (time.time() - start) * 1000
            tool.call_count += 1
            tool.avg_latency_ms = (
                (tool.avg_latency_ms * (tool.call_count - 1) + latency) / tool.call_count
            )
            return {"result": result, "latency_ms": latency}
        except Exception as e:
            tool.error_count += 1
            return {"error": str(e)}

    def get_stats(self) -> List[Dict]:
        return [
            {
                "name": t.name,
                "calls": t.call_count,
                "errors": t.error_count,
                "avg_latency_ms": round(t.avg_latency_ms, 2),
                "status": t.status.value
            }
            for t in self.tools.values()
        ]

2. Agent Memory Manager

from dataclasses import dataclass, field
from typing import List, Dict, Optional
from collections import deque
import json

@dataclass
class MemoryEntry:
    content: str
    memory_type: str
    timestamp: float
    metadata: Dict = field(default_factory=dict)
    importance: float = 0.5

class AgentMemory:
    def __init__(self, short_term_size: int = 20):
        self.short_term: deque = deque(maxlen=short_term_size)
        self.long_term: List[MemoryEntry] = []
        self.working: List[MemoryEntry] = []

    def add_short_term(self, content: str, metadata: Optional[Dict] = None):
        entry = MemoryEntry(
            content=content,
            memory_type="short_term",
            timestamp=time.time(),
            metadata=metadata or {}
        )
        self.short_term.append(entry)

    def add_long_term(self, content: str, importance: float = 0.5,
                      metadata: Optional[Dict] = None):
        entry = MemoryEntry(
            content=content,
            memory_type="long_term",
            timestamp=time.time(),
            importance=importance,
            metadata=metadata or {}
        )
        self.long_term.append(entry)

    def get_context(self, max_tokens: int = 2000) -> str:
        parts = []
        token_count = 0
        for entry in reversed(self.short_term):
            tokens = len(entry.content.split())
            if token_count + tokens > max_tokens:
                break
            parts.append(entry.content)
            token_count += tokens
        important = sorted(self.long_term, key=lambda e: e.importance, reverse=True)[:5]
        for entry in important:
            tokens = len(entry.content.split())
            if token_count + tokens > max_tokens:
                break
            parts.append(entry.content)
            token_count += tokens
        return "\n".join(reversed(parts))

    def consolidate(self):
        if len(self.short_term) > 10:
            important_entries = [
                e for e in self.short_term if e.importance > 0.7
            ]
            self.long_term.extend(important_entries)

3. ReAct Agent Loop

from typing import List, Dict, Any, Optional

class ReActAgent:
    def __init__(self, tool_registry: ToolRegistry, memory: AgentMemory,
                 llm_caller: Any):
        self.tools = tool_registry
        self.memory = memory
        self.llm = llm_caller
        self.max_iterations = 10

    def run(self, query: str) -> str:
        self.memory.add_short_term(f"User: {query}")
        context = self.memory.get_context()
        tools_schema = self.tools.get_available_tools()

        for i in range(self.max_iterations):
            prompt = self._build_prompt(context, tools_schema)
            response = self.llm.generate(prompt)
            action = self._parse_action(response)

            if action["type"] == "final_answer":
                self.memory.add_short_term(f"Agent: {action['answer']}")
                return action["answer"]

            if action["type"] == "tool_call":
                result = self.tools.execute(action["tool"], action["arguments"])
                self.memory.add_short_term(
                    f"Tool {action['tool']}: {json.dumps(result)[:200]}"
                )
                context = self.memory.get_context()

        return "I was unable to complete the task within the allowed iterations."

    def _build_prompt(self, context: str, tools: List[Dict]) -> str:
        tools_desc = "\n".join([
            f"- {t['function']['name']}: {t['function']['description']}"
            for t in tools
        ])
        return f"""You are a helpful assistant with access to tools.

Context:
{context}

Available Tools:
{tools_desc}

Think step by step. Use a tool if needed, or provide a final answer.
Format: Thought: [reasoning] Action: [tool_name] Input: [arguments]
Or: Thought: [reasoning] Final Answer: [answer]"""

    def _parse_action(self, response: str) -> Dict[str, Any]:
        if "Final Answer:" in response:
            answer = response.split("Final Answer:")[-1].strip()
            return {"type": "final_answer", "answer": answer}
        if "Action:" in response:
            lines = response.split("\n")
            tool = None
            arguments = {}
            for line in lines:
                if line.startswith("Action:"):
                    tool = line.split("Action:")[-1].strip()
                if line.startswith("Input:"):
                    try:
                        arguments = json.loads(line.split("Input:")[-1].strip())
                    except json.JSONDecodeError:
                        arguments = {"input": line.split("Input:")[-1].strip()}
            return {"type": "tool_call", "tool": tool, "arguments": arguments}
        return {"type": "final_answer", "answer": response}

Key Concepts

ComponentPurposeProduction Concern
Tool RegistryManage available toolsRate limiting, auth
Tool ExecutorRun tool calls safelySandboxing, timeouts
Memory StoreMaintain conversation statePersistence, size limits
PlannerDecompose complex tasksLatency, token cost
ReAct LoopIterate until completeMax iterations, cost cap

Best Practices

  1. Sandbox tool execution to prevent code injection
  2. Set token budgets for agent loops to control costs
  3. Cache tool results where deterministic
  4. Implement circuit breakers for unreliable external tools
  5. Log every tool call for debugging and auditing
⭐

Premium Content

LLM Agent Operations: Infrastructure and Tool Calling

Unlock this lesson and 900+ advanced tutorials with a Premium plan.

🎯End-to-end Projects
πŸ’ΌInterview Prep
πŸ“œCertificates
🀝Community Access

Already a member? Log in

Need Expert AI Ops & LLM Ops Help?

Get personalized tutoring, project support, or professional consulting.

Advertisement