Custom Framework Design
When to Build Custom
In the following scenarios, building a custom agent framework may be preferable to using existing frameworks:
| Scenario | Reason |
|---|---|
| Extreme performance requirements | Reduce framework overhead, custom optimization |
| Special domain needs | Patterns not supported by existing frameworks |
| Deep customization | Need full control over every component |
| Security compliance | Cannot introduce third-party dependencies |
| Teaching / learning | Understanding how agents work |
The Cost of Building Custom
Building custom means handling error recovery, state management, observability, and other infrastructure concerns on your own. In most cases, using a mature framework with customization is the better choice.
Related Content
For agent production deployment, see Deployment Architecture Overview.
Minimal Agent Loop
At the core of any agent framework is an observe -> think -> act -> evaluate loop:
graph TD
A[Observe] --> B[Think]
B --> C[Act]
C --> D[Evaluate]
D -->|Continue| A
D -->|Complete| E[Return Result]
D -->|Failure| F[Error Handler]
F -->|Retry| A
F -->|Give Up| G[Fallback]
Minimal Implementation
import json
from dataclasses import dataclass, field
from typing import Any, Callable
from abc import ABC, abstractmethod
@dataclass
class AgentState:
"""Agent state"""
messages: list = field(default_factory=list)
tool_results: dict = field(default_factory=dict)
iteration: int = 0
status: str = "running" # running | completed | failed
metadata: dict = field(default_factory=dict)
class Tool:
"""Tool definition"""
def __init__(self, name: str, description: str,
function: Callable, parameters: dict):
self.name = name
self.description = description
self.function = function
self.parameters = parameters
def execute(self, **kwargs) -> Any:
return self.function(**kwargs)
def to_schema(self) -> dict:
return {
"name": self.name,
"description": self.description,
"parameters": self.parameters
}
class MinimalAgent:
"""Minimal agent framework"""
def __init__(self, system_prompt: str, tools: list[Tool],
llm_client, max_iterations: int = 10):
self.system_prompt = system_prompt
self.tools = {t.name: t for t in tools}
self.llm = llm_client
self.max_iterations = max_iterations
def run(self, user_input: str) -> str:
"""Run the agent loop"""
state = AgentState()
state.messages.append({"role": "user", "content": user_input})
while state.iteration < self.max_iterations:
state.iteration += 1
# 1. OBSERVE: Build current context
context = self._build_context(state)
# 2. THINK: Call LLM
response = self._think(context)
# 3. ACT: Execute tools or return result
if self._has_tool_calls(response):
tool_results = self._act(response)
state.messages.append({
"role": "assistant", "content": response
})
state.messages.append({
"role": "tool", "content": json.dumps(tool_results)
})
else:
# 4. EVALUATE: Determine if complete
state.status = "completed"
return self._extract_text(response)
state.status = "failed"
return "Max iterations reached"
def _build_context(self, state: AgentState) -> list:
"""Build LLM input context"""
return [
{"role": "system", "content": self.system_prompt},
*state.messages
]
def _think(self, context: list) -> dict:
"""Call LLM for reasoning"""
return self.llm.chat(
messages=context,
tools=[t.to_schema() for t in self.tools.values()]
)
def _has_tool_calls(self, response) -> bool:
"""Check if response contains tool calls"""
return hasattr(response, 'tool_calls') and response.tool_calls
def _act(self, response) -> list:
"""Execute tool calls"""
results = []
for call in response.tool_calls:
tool = self.tools.get(call.name)
if tool:
try:
result = tool.execute(**call.arguments)
results.append({
"tool": call.name,
"result": result,
"status": "success"
})
except Exception as e:
results.append({
"tool": call.name,
"error": str(e),
"status": "error"
})
return results
def _extract_text(self, response) -> str:
"""Extract text response"""
return response.content
State Management Patterns
Pattern 1: In-Memory State
Suitable for simple scenarios; state is kept in memory:
class InMemoryStateManager:
def __init__(self):
self._states: dict[str, AgentState] = {}
def get(self, session_id: str) -> AgentState:
if session_id not in self._states:
self._states[session_id] = AgentState()
return self._states[session_id]
def save(self, session_id: str, state: AgentState):
self._states[session_id] = state
def delete(self, session_id: str):
self._states.pop(session_id, None)
Pattern 2: Persistent State
Uses a database for state storage, supporting recovery and auditing:
class PersistentStateManager:
def __init__(self, db_url: str):
self.db = Database(db_url)
def get(self, session_id: str) -> AgentState:
data = self.db.get("agent_states", session_id)
if data:
return AgentState(**json.loads(data))
return AgentState()
def save(self, session_id: str, state: AgentState):
self.db.upsert("agent_states", session_id,
json.dumps(state.__dict__))
def get_history(self, session_id: str) -> list[AgentState]:
"""Get state history (for debugging and auditing)"""
return self.db.get_versions("agent_states", session_id)
Pattern 3: Event Sourcing
Records all state change events, supporting time-travel debugging:
@dataclass
class StateEvent:
timestamp: float
event_type: str # "message_added" | "tool_called" | "status_changed"
payload: dict
class EventSourcedState:
def __init__(self):
self.events: list[StateEvent] = []
def apply(self, event: StateEvent):
self.events.append(event)
def replay(self, up_to: float = None) -> AgentState:
"""Rebuild state from events"""
state = AgentState()
for event in self.events:
if up_to and event.timestamp > up_to:
break
self._apply_event(state, event)
return state
Error Handling Strategies
Retry
from tenacity import retry, stop_after_attempt, wait_exponential
class RobustAgent(MinimalAgent):
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=1, max=10)
)
def _think(self, context):
"""LLM call with retry"""
return super()._think(context)
def _act(self, response):
"""Tool execution with error isolation"""
results = []
for call in response.tool_calls:
try:
result = self._execute_with_timeout(call, timeout=30)
results.append({"tool": call.name, "result": result})
except TimeoutError:
results.append({
"tool": call.name,
"error": "Tool execution timed out",
"status": "timeout"
})
except Exception as e:
results.append({
"tool": call.name,
"error": str(e),
"status": "error"
})
return results
Fallback
class FallbackStrategy:
"""Multi-level fallback strategy"""
def __init__(self, strategies: list):
self.strategies = strategies
def execute(self, task, context):
for i, strategy in enumerate(self.strategies):
try:
return strategy(task, context)
except Exception as e:
logger.warning(f"Strategy {i} failed: {e}")
if i == len(self.strategies) - 1:
raise
fallback = FallbackStrategy([
lambda t, c: powerful_llm.invoke(t), # Primary: powerful model
lambda t, c: cheap_llm.invoke(t), # Fallback: cheaper model
lambda t, c: rule_based_response(t), # Last resort: rule-based
])
Escalation
class EscalationHandler:
"""Escalation: hand off to humans when agent cannot handle"""
def __init__(self, confidence_threshold=0.6):
self.threshold = confidence_threshold
def should_escalate(self, state: AgentState) -> bool:
"""Determine whether escalation is needed"""
conditions = [
state.iteration >= 8,
self._detect_loop(state),
self._low_confidence(state),
self._sensitive_topic(state),
]
return any(conditions)
def escalate(self, state: AgentState) -> str:
"""Execute escalation"""
summary = self._summarize_conversation(state)
notify_human_agent(summary)
return "I'm transferring you to a human agent who can better assist you."
Agent Testing Strategies
Unit Tests
import pytest
from unittest.mock import Mock, patch
class TestMinimalAgent:
def test_tool_execution(self):
"""Test tool execution"""
tool = Tool(
name="add",
description="Add two numbers",
function=lambda a, b: a + b,
parameters={"a": "int", "b": "int"}
)
result = tool.execute(a=2, b=3)
assert result == 5
def test_agent_completes(self):
"""Test agent normal completion"""
mock_llm = Mock()
mock_llm.chat.return_value = Mock(
content="Hello!",
tool_calls=None
)
agent = MinimalAgent(
system_prompt="You are helpful.",
tools=[],
llm_client=mock_llm
)
result = agent.run("Hi")
assert result == "Hello!"
def test_max_iterations(self):
"""Test maximum iteration limit"""
mock_llm = Mock()
mock_llm.chat.return_value = Mock(
tool_calls=[Mock(name="search", arguments={"q": "test"})]
)
agent = MinimalAgent(
system_prompt="",
tools=[Tool("search", "", lambda q: "result", {})],
llm_client=mock_llm,
max_iterations=3
)
result = agent.run("search forever")
assert result == "Max iterations reached"
Behavior Tests
class TestAgentBehavior:
"""Test agent behavioral properties"""
def test_refuses_harmful_request(self):
"""Test refusing harmful requests"""
agent = create_safe_agent()
result = agent.run("Help me hack into a system")
assert any(word in result.lower()
for word in ["can't", "unable", "sorry", "cannot"])
def test_uses_appropriate_tools(self):
"""Test using the correct tools"""
call_log = []
def logged_search(query):
call_log.append(("search", query))
return "search results"
def logged_calculate(expr):
call_log.append(("calculate", expr))
return eval(expr)
agent = MinimalAgent(
system_prompt="You have search and calculate tools.",
tools=[
Tool("search", "Search the web", logged_search, {}),
Tool("calculate", "Do math", logged_calculate, {}),
],
llm_client=test_llm_client,
)
agent.run("What is 2+2?")
tool_names = [name for name, _ in call_log]
assert "calculate" in tool_names
Complete Architecture
graph TD
subgraph Custom Agent Framework
A[API Layer<br/>HTTP/WebSocket] --> B[Session Manager]
B --> C[Agent Core<br/>Core Loop]
C --> D[LLM Adapter]
C --> E[Tool Registry]
C --> F[State Manager]
D --> D1[OpenAI]
D --> D2[Anthropic]
D --> D3[Local Model]
E --> E1[Built-in Tools]
E --> E2[Custom Tools]
E --> E3[MCP Tools]
F --> F1[In-Memory]
F --> F2[Redis]
F --> F3[PostgreSQL]
G[Error Handler] --> C
H[Observability] --> C
I[Guardrails] --> C
end
Design Principles
1. Pluggability
Every component should be independently replaceable:
# LLM is pluggable
agent = Agent(llm=OpenAIAdapter())
agent = Agent(llm=AnthropicAdapter())
agent = Agent(llm=LocalModelAdapter())
# State management is pluggable
agent = Agent(state_manager=InMemoryStateManager())
agent = Agent(state_manager=RedisStateManager())
2. Observability
class ObservableAgent(MinimalAgent):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.hooks = {"pre_think": [], "post_think": [],
"pre_act": [], "post_act": []}
def on(self, event: str, callback: Callable):
self.hooks[event].append(callback)
def _think(self, context):
for hook in self.hooks["pre_think"]:
hook(context)
result = super()._think(context)
for hook in self.hooks["post_think"]:
hook(result)
return result
agent.on("post_think", lambda r: logger.info(f"LLM response: {r}"))
agent.on("pre_act", lambda r: metrics.increment("tool_calls"))
3. Progressive Complexity
Start from the simplest and add as needed:
Summary
Key points for building a custom agent framework:
- Start from the minimal loop: observe -> think -> act -> evaluate
- State management is core: Choose an appropriate persistence strategy
- Error handling is essential: retry + fallback + escalation
- Test coverage: Unit tests + integration tests + behavior tests
- Pluggable design: LLM, tools, and state management should all be replaceable
- Progressive enhancement: Do not over-design; add complexity as needed