LLM Agent Operations: Infrastructure and Tool Calling

LLM agents extend language models with tool use, memory, and planning capabilities. Operating these systems in production requires robust infrastructure for tool execution, state management, and error handling.

Agent Architecture

Tool Calling Infrastructure

1. Tool Registry and Executor

import json
import time
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional
from enum import Enum

class ToolStatus(Enum):
    AVAILABLE = "available"
    UNAVAILABLE = "unavailable"
    RATE_LIMITED = "rate_limited"

@dataclass
class ToolDefinition:
    name: str
    description: str
    parameters: Dict[str, Any]
    function: Callable
    status: ToolStatus = ToolStatus.AVAILABLE
    call_count: int = 0
    error_count: int = 0
    avg_latency_ms: float = 0.0

    def to_schema(self) -> Dict:
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.description,
                "parameters": self.parameters
            }
        }

class ToolRegistry:
    def __init__(self):
        self.tools: Dict[str, ToolDefinition] = {}

    def register(self, name: str, description: str, parameters: Dict,
                 function: Callable) -> ToolDefinition:
        tool = ToolDefinition(
            name=name,
            description=description,
            parameters=parameters,
            function=function
        )
        self.tools[name] = tool
        return tool

    def get_available_tools(self) -> List[Dict]:
        return [
            t.to_schema() for t in self.tools.values()
            if t.status == ToolStatus.AVAILABLE
        ]

    def execute(self, tool_name: str, arguments: Dict[str, Any]) -> Dict:
        tool = self.tools.get(tool_name)
        if not tool:
            return {"error": f"Tool '{tool_name}' not found"}
        if tool.status != ToolStatus.AVAILABLE:
            return {"error": f"Tool '{tool_name}' is {tool.status.value}"}
        start = time.time()
        try:
            result = tool.function(**arguments)
            latency = (time.time() - start) * 1000
            tool.call_count += 1
            tool.avg_latency_ms = (
                (tool.avg_latency_ms * (tool.call_count - 1) + latency) / tool.call_count
            )
            return {"result": result, "latency_ms": latency}
        except Exception as e:
            tool.error_count += 1
            return {"error": str(e)}

    def get_stats(self) -> List[Dict]:
        return [
            {
                "name": t.name,
                "calls": t.call_count,
                "errors": t.error_count,
                "avg_latency_ms": round(t.avg_latency_ms, 2),
                "status": t.status.value
            }
            for t in self.tools.values()
        ]

2. Agent Memory Manager

from dataclasses import dataclass, field
from typing import List, Dict, Optional
from collections import deque
import json

@dataclass
class MemoryEntry:
    content: str
    memory_type: str
    timestamp: float
    metadata: Dict = field(default_factory=dict)
    importance: float = 0.5

class AgentMemory:
    def __init__(self, short_term_size: int = 20):
        self.short_term: deque = deque(maxlen=short_term_size)
        self.long_term: List[MemoryEntry] = []
        self.working: List[MemoryEntry] = []

    def add_short_term(self, content: str, metadata: Optional[Dict] = None):
        entry = MemoryEntry(
            content=content,
            memory_type="short_term",
            timestamp=time.time(),
            metadata=metadata or {}
        )
        self.short_term.append(entry)

    def add_long_term(self, content: str, importance: float = 0.5,
                      metadata: Optional[Dict] = None):
        entry = MemoryEntry(
            content=content,
            memory_type="long_term",
            timestamp=time.time(),
            importance=importance,
            metadata=metadata or {}
        )
        self.long_term.append(entry)

    def get_context(self, max_tokens: int = 2000) -> str:
        parts = []
        token_count = 0
        for entry in reversed(self.short_term):
            tokens = len(entry.content.split())
            if token_count + tokens > max_tokens:
                break
            parts.append(entry.content)
            token_count += tokens
        important = sorted(self.long_term, key=lambda e: e.importance, reverse=True)[:5]
        for entry in important:
            tokens = len(entry.content.split())
            if token_count + tokens > max_tokens:
                break
            parts.append(entry.content)
            token_count += tokens
        return "\n".join(reversed(parts))

    def consolidate(self):
        if len(self.short_term) > 10:
            important_entries = [
                e for e in self.short_term if e.importance > 0.7
            ]
            self.long_term.extend(important_entries)

3. ReAct Agent Loop

from typing import List, Dict, Any, Optional

class ReActAgent:
    def __init__(self, tool_registry: ToolRegistry, memory: AgentMemory,
                 llm_caller: Any):
        self.tools = tool_registry
        self.memory = memory
        self.llm = llm_caller
        self.max_iterations = 10

    def run(self, query: str) -> str:
        self.memory.add_short_term(f"User: {query}")
        context = self.memory.get_context()
        tools_schema = self.tools.get_available_tools()

        for i in range(self.max_iterations):
            prompt = self._build_prompt(context, tools_schema)
            response = self.llm.generate(prompt)
            action = self._parse_action(response)

            if action["type"] == "final_answer":
                self.memory.add_short_term(f"Agent: {action['answer']}")
                return action["answer"]

            if action["type"] == "tool_call":
                result = self.tools.execute(action["tool"], action["arguments"])
                self.memory.add_short_term(
                    f"Tool {action['tool']}: {json.dumps(result)[:200]}"
                )
                context = self.memory.get_context()

        return "I was unable to complete the task within the allowed iterations."

    def _build_prompt(self, context: str, tools: List[Dict]) -> str:
        tools_desc = "\n".join([
            f"- {t['function']['name']}: {t['function']['description']}"
            for t in tools
        ])
        return f"""You are a helpful assistant with access to tools.

Context:
{context}

Available Tools:
{tools_desc}

Think step by step. Use a tool if needed, or provide a final answer.
Format: Thought: [reasoning] Action: [tool_name] Input: [arguments]
Or: Thought: [reasoning] Final Answer: [answer]"""

    def _parse_action(self, response: str) -> Dict[str, Any]:
        if "Final Answer:" in response:
            answer = response.split("Final Answer:")[-1].strip()
            return {"type": "final_answer", "answer": answer}
        if "Action:" in response:
            lines = response.split("\n")
            tool = None
            arguments = {}
            for line in lines:
                if line.startswith("Action:"):
                    tool = line.split("Action:")[-1].strip()
                if line.startswith("Input:"):
                    try:
                        arguments = json.loads(line.split("Input:")[-1].strip())
                    except json.JSONDecodeError:
                        arguments = {"input": line.split("Input:")[-1].strip()}
            return {"type": "tool_call", "tool": tool, "arguments": arguments}
        return {"type": "final_answer", "answer": response}

Key Concepts

Component	Purpose	Production Concern
Tool Registry	Manage available tools	Rate limiting, auth
Tool Executor	Run tool calls safely	Sandboxing, timeouts
Memory Store	Maintain conversation state	Persistence, size limits
Planner	Decompose complex tasks	Latency, token cost
ReAct Loop	Iterate until complete	Max iterations, cost cap

Best Practices

Sandbox tool execution to prevent code injection
Set token budgets for agent loops to control costs
Cache tool results where deterministic
Implement circuit breakers for unreliable external tools
Log every tool call for debugging and auditing

LLM Agent Operations: Infrastructure and Tool Calling

LLM Agent Operations: Infrastructure and Tool Calling

Agent Architecture

Tool Calling Infrastructure

1. Tool Registry and Executor

2. Agent Memory Manager

3. ReAct Agent Loop

Key Concepts

Best Practices

Premium Content

Need Expert AI Ops & LLM Ops Help?