OpenAI Agents SDK

概述

OpenAI Agents SDK（前身为Swarm）是OpenAI官方推出的智能体开发框架。它提供了轻量级但功能完整的Agent构建原语，包括工具调用、智能体交接（Handoff）、护栏（Guardrails）等核心功能。

核心架构

graph TD
    subgraph OpenAI Agents SDK
        A[Agent] --> B[Instructions<br/>系统提示]
        A --> C[Tools<br/>工具集]
        A --> D[Handoffs<br/>智能体交接]
        A --> E[Guardrails<br/>护栏]
    end

    subgraph Built-in Tools
        C --> C1[Web Search<br/>网页搜索]
        C --> C2[Code Interpreter<br/>代码解释器]
        C --> C3[File Search<br/>文件搜索]
        C --> C4[Custom Functions<br/>自定义函数]
    end

    subgraph Execution
        F[Runner] --> G[Agent Loop]
        G --> H[LLM Call]
        H --> I{Tool Call?}
        I -->|是| J[Execute Tool]
        J --> H
        I -->|否| K[Return Result]
    end

基础用法

创建Agent

from openai import agents

# 最简单的Agent
agent = agents.Agent(
    name="Assistant",
    instructions="You are a helpful assistant.",
    model="gpt-4o"
)

# 运行
result = agents.Runner.run_sync(
    agent,
    messages=[{"role": "user", "content": "Hello!"}]
)
print(result.final_output)

添加工具

from openai.agents import Agent, Runner, function_tool

@function_tool
def get_weather(city: str) -> str:
    """Get current weather for a city."""
    # 实际实现会调用天气API
    return f"Weather in {city}: 72°F, sunny"

@function_tool
def search_flights(origin: str, destination: str, date: str) -> str:
    """Search for available flights."""
    return f"Found 3 flights from {origin} to {destination} on {date}"

travel_agent = Agent(
    name="Travel Assistant",
    instructions="""You help users plan travel. Use the available 
    tools to get weather and flight information.""",
    tools=[get_weather, search_flights],
    model="gpt-4o"
)

内置工具

Web Search

from openai.agents import Agent, WebSearchTool

research_agent = Agent(
    name="Researcher",
    instructions="Search the web to answer questions with current information.",
    tools=[WebSearchTool()],
    model="gpt-4o"
)

# Web Search自动处理：
# 1. 生成搜索查询
# 2. 执行网页搜索
# 3. 提取相关信息
# 4. 综合回答

Code Interpreter

from openai.agents import Agent, CodeInterpreterTool

data_agent = Agent(
    name="Data Analyst",
    instructions="""You analyze data using Python. 
    Write and execute code to answer questions.""",
    tools=[CodeInterpreterTool()],
    model="gpt-4o"
)

# Code Interpreter提供：
# - 安全的Python沙箱
# - 数据分析库（pandas, numpy, matplotlib）
# - 文件上传/下载
# - 图表生成

File Search (RAG)

from openai.agents import Agent, FileSearchTool

# 创建向量存储
vector_store = client.vector_stores.create(name="knowledge_base")
client.vector_stores.file_batches.upload(
    vector_store_id=vector_store.id,
    files=["doc1.pdf", "doc2.pdf"]
)

knowledge_agent = Agent(
    name="Knowledge Assistant",
    instructions="Answer questions based on the uploaded documents.",
    tools=[FileSearchTool(vector_store_ids=[vector_store.id])],
    model="gpt-4o"
)

Handoff模式

Handoff是Agents SDK的核心创新，允许智能体之间无缝交接控制权。

基本Handoff

from openai.agents import Agent, handoff

# 专业智能体
billing_agent = Agent(
    name="Billing Specialist",
    instructions="You handle billing and payment questions.",
    tools=[check_balance, process_payment],
)

technical_agent = Agent(
    name="Technical Support",
    instructions="You handle technical issues and troubleshooting.",
    tools=[check_system_status, restart_service],
)

# 路由智能体
triage_agent = Agent(
    name="Customer Service",
    instructions="""You are the first point of contact. 
    Route to the appropriate specialist:
    - Billing questions -> Billing Specialist
    - Technical issues -> Technical Support""",
    handoffs=[
        handoff(billing_agent),
        handoff(technical_agent),
    ],
)

# 运行时自动路由
result = Runner.run_sync(
    triage_agent,
    messages=[{"role": "user", "content": "I can't log into my account"}]
)
# triage_agent会自动交接给technical_agent

带上下文的Handoff

from openai.agents import handoff

def transfer_to_billing(context: dict) -> Agent:
    """带上下文信息的交接"""
    # 可以在交接时传递上下文
    return handoff(
        billing_agent,
        tool_name="transfer_to_billing",
        tool_description="Transfer to billing when user has payment issues",
    )

多层Handoff架构

graph TD
    A[Triage Agent<br/>分诊智能体] -->|账单问题| B[Billing Agent<br/>账单智能体]
    A -->|技术问题| C[Tech Agent<br/>技术智能体]
    A -->|销售咨询| D[Sales Agent<br/>销售智能体]

    C -->|网络问题| C1[Network Specialist<br/>网络专家]
    C -->|软件问题| C2[Software Specialist<br/>软件专家]

    B -->|退款请求| B1[Refund Agent<br/>退款智能体]

Guardrails (护栏)

护栏机制确保Agent行为在安全边界内。

输入护栏

from openai.agents import Agent, InputGuardrail, GuardrailResult

class ContentFilter(InputGuardrail):
    """过滤不当输入"""

    async def run(self, input_text: str) -> GuardrailResult:
        # 使用LLM判断输入是否安全
        response = await moderation_llm.invoke(
            f"Is this input safe and appropriate? '{input_text}'"
        )

        if "unsafe" in response.lower():
            return GuardrailResult(
                allow=False,
                message="I'm sorry, I can't help with that request."
            )
        return GuardrailResult(allow=True)

safe_agent = Agent(
    name="Safe Assistant",
    instructions="You are a helpful assistant.",
    input_guardrails=[ContentFilter()],
)

输出护栏

class FactCheckGuardrail(OutputGuardrail):
    """检查输出的事实准确性"""

    async def run(self, output_text: str) -> GuardrailResult:
        check = await fact_checker.verify(output_text)
        if check.confidence < 0.7:
            return GuardrailResult(
                allow=False,
                message="Let me double-check that information..."
            )
        return GuardrailResult(allow=True)

Agent Loop详解

执行流程

# Runner内部执行逻辑（简化）
class Runner:
    @staticmethod
    async def run(agent, messages, max_turns=10):
        current_agent = agent
        conversation = list(messages)

        for turn in range(max_turns):
            # 1. 检查输入护栏
            for guardrail in current_agent.input_guardrails:
                result = await guardrail.run(conversation[-1])
                if not result.allow:
                    return RunResult(final_output=result.message)

            # 2. 调用LLM
            response = await llm.create(
                model=current_agent.model,
                messages=[
                    {"role": "system", "content": current_agent.instructions},
                    *conversation
                ],
                tools=current_agent.get_tool_definitions(),
            )

            # 3. 处理响应
            if response.tool_calls:
                for tool_call in response.tool_calls:
                    # 检查是否是handoff
                    if is_handoff(tool_call, current_agent):
                        current_agent = get_handoff_target(tool_call)
                        continue

                    # 执行工具
                    result = await execute_tool(tool_call)
                    conversation.append(tool_result_message(result))
            else:
                # 4. 检查输出护栏
                for guardrail in current_agent.output_guardrails:
                    result = await guardrail.run(response.content)
                    if not result.allow:
                        conversation.append(retry_message(result))
                        continue

                return RunResult(final_output=response.content)

与Responses API的关系

Agents SDK构建在OpenAI的Responses API之上：

层级	说明
Responses API	底层API，支持工具调用、流式输出
Agents SDK	上层框架，提供Agent抽象、Handoff、Guardrails

# 底层 Responses API
response = client.responses.create(
    model="gpt-4o",
    input="Search for recent AI news",
    tools=[{"type": "web_search"}]
)

# 上层 Agents SDK（相同功能，更高抽象）
agent = Agent(
    name="News Agent",
    tools=[WebSearchTool()],
)
result = Runner.run_sync(agent, messages=[...])

流式输出

from openai.agents import Runner

async def stream_response():
    async for event in Runner.run_streamed(
        agent,
        messages=[{"role": "user", "content": "Write a story"}]
    ):
        if event.type == "text_delta":
            print(event.delta, end="", flush=True)
        elif event.type == "tool_call":
            print(f"\n[Calling tool: {event.tool_name}]")
        elif event.type == "handoff":
            print(f"\n[Transferring to: {event.target_agent}]")

实际案例: 客服系统

# 完整的多智能体客服系统
from openai.agents import Agent, Runner, function_tool, handoff

@function_tool
def lookup_order(order_id: str) -> str:
    """Look up order details by order ID."""
    return db.get_order(order_id)

@function_tool
def process_refund(order_id: str, reason: str) -> str:
    """Process a refund for an order."""
    return payment.refund(order_id, reason)

@function_tool  
def check_inventory(product_id: str) -> str:
    """Check product inventory status."""
    return inventory.check(product_id)

# 退款专员
refund_agent = Agent(
    name="Refund Specialist",
    instructions="""You process refund requests. Always verify the order 
    exists before processing. Be empathetic and professional.""",
    tools=[lookup_order, process_refund],
)

# 订单查询专员
order_agent = Agent(
    name="Order Specialist",  
    instructions="""You help customers track and manage their orders.""",
    tools=[lookup_order, check_inventory],
)

# 前台分诊
triage = Agent(
    name="Customer Service",
    instructions="""You are the front desk of customer service.
    - For refund requests: transfer to Refund Specialist
    - For order inquiries: transfer to Order Specialist
    - For general questions: answer directly""",
    handoffs=[handoff(refund_agent), handoff(order_agent)],
)

# 运行
result = await Runner.run(
    triage,
    messages=[{"role": "user", "content": "I want a refund for order #12345"}]
)

最佳实践

1. Agent设计原则

单一职责: 每个Agent专注一个领域
清晰指令: Instructions要具体、明确
适当工具: 只给Agent需要的工具
合理Handoff: 明确交接条件

2. 错误处理

from openai.agents import RunResult

result = await Runner.run(agent, messages=messages, max_turns=10)

if result.is_error:
    # 处理错误
    logger.error(f"Agent error: {result.error}")
    fallback_response = "I'm having trouble. Let me connect you with a human agent."

3. 成本控制

使用 gpt-4o-mini 处理简单路由
限制 max_turns 防止无限循环
缓存常见查询的结果

总结

OpenAI Agents SDK的核心优势：

简洁性: 最小化概念，快速上手
Handoff模式: 优雅的多智能体协作方案
内置工具: Web Search、Code Interpreter开箱即用
Guardrails: 原生安全机制
OpenAI集成: 与OpenAI生态无缝衔接