Skip to content

Agents

AgentHelm provides DSPy-native agents for tool calling and planning.

Quick Start

import dspy
from agenthelm import ToolAgent, PlannerAgent

# Configure your LLM
lm = dspy.LM("mistral/mistral-large-latest")

# Create a tool agent
agent = ToolAgent(
    name="assistant",
    lm=lm,
    tools=[my_search_tool, my_summarize_tool],
    max_iters=10
)
result = agent.run("What is the weather in NYC?")
print(result.answer)

# Create a planner agent
planner = PlannerAgent(
    name="planner",
    lm=lm,
    tools=[search, summarize, write_file]
)
plan = planner.plan("Research AI trends and create a report")
print(plan.to_yaml())  # Review before execution

Agent Types

ToolAgent

ReAct-style agent that reasons and executes tools in a loop.

from agenthelm import ToolAgent, tool

@tool()
def get_weather(city: str) -> str:
    """Get weather for a city."""
    return f"Sunny in {city}"

agent = ToolAgent(name="weather_bot", lm=lm, tools=[get_weather])
result = agent.run("What's the weather in Paris?")

# Access results
print(result.success)    # True
print(result.answer)     # "The weather in Paris is sunny."
print(result.events)     # List of traced tool executions

PlannerAgent

Generates structured execution plans before acting.

from agenthelm import PlannerAgent

planner = PlannerAgent(name="planner", lm=lm, tools=[search, analyze, report])
plan = planner.plan("Analyze competitor pricing")

# Review the plan
print(plan.goal)       # "Analyze competitor pricing strategies"
print(plan.reasoning)  # LLM's reasoning
for step in plan.steps:
    print(f"  {step.id}: {step.tool_name} - {step.description}")

# Execution is handled by Orchestrator (Week 4)

AgentResult

All agents return an AgentResult with execution details:

result = agent.run("Do something")

result.success          # bool - Did the agent complete successfully?
result.answer           # str | None - Final answer
result.error            # str | None - Error message if failed
result.events           # list[Event] - All traced tool executions
result.total_cost_usd   # float - Aggregated LLM cost
result.token_usage      # TokenUsage - Aggregated tokens
result.iterations       # int - Number of reasoning loops

Plan Schema

Plans are structured for parallel and sequential execution:

from agenthelm import Plan, PlanStep, StepStatus

plan = Plan(
    goal="Research and summarize",
    steps=[
        PlanStep(id="1", tool_name="search", description="Find sources"),
        PlanStep(id="2", tool_name="search", description="Find more sources"),
        PlanStep(id="3", tool_name="summarize", description="Combine results",
                 depends_on=["1", "2"]),  # Runs after 1 and 2 complete
    ]
)

# Steps 1 and 2 can run in parallel (no dependencies)
ready = plan.get_ready_steps()  # Returns steps 1 and 2

# After completion
plan.mark_completed("1", result="source A")
plan.mark_completed("2", result="source B")

# Now step 3 is ready
ready = plan.get_ready_steps()  # Returns step 3

Memory Integration

Agents can use the Memory Hub for context persistence:

from agenthelm import ToolAgent, MemoryHub

hub = MemoryHub(data_dir="./data")

agent = ToolAgent(
    name="memory_agent",
    lm=lm,
    tools=[search],
    memory=hub
)

# Agent can use memory internally
# await agent._remember("User prefers dark mode")
# results = await agent._recall("user preferences")

Tracing Integration

Agents automatically trace tool executions when given a tracer:

from agenthelm import ToolAgent, ExecutionTracer
from agenthelm.core.storage import SqliteStorage

tracer = ExecutionTracer(storage=SqliteStorage("traces.db"))

agent = ToolAgent(
    name="traced_agent",
    lm=lm,
    tools=[my_tool],
    tracer=tracer
)

result = agent.run("Do something")
# All tool calls are now persisted to traces.db

API Reference

BaseAgent

BaseAgent(
    name,
    lm,
    tools=None,
    memory=None,
    tracer=None,
    role=None,
)

Bases: ABC

Abstract base class for all agents.

Parameters:

Name Type Description Default
name str

Agent identifier for tracing and registry

required
lm LM

DSPy language model

required
tools list[Callable] | None

List of tool functions the agent can use

None
memory MemoryHub | None

Optional MemoryHub for context persistence

None
tracer ExecutionTracer | None

Optional ExecutionTracer for tool call logging

None
role str | None

Optional role/persona description that influences behavior

None
Source code in agenthelm/agent/base.py
def __init__(
    self,
    name: str,
    lm: dspy.LM,
    tools: list[Callable] | None = None,
    memory: MemoryHub | None = None,
    tracer: ExecutionTracer | None = None,
    role: str | None = None,
):
    self.name = name
    self.lm = lm
    self.tools = tools or []
    self.memory = memory
    self.tracer = tracer
    self.role = role

Attributes

lm instance-attribute

lm = lm

memory instance-attribute

memory = memory

name instance-attribute

name = name

role instance-attribute

role = role

tools instance-attribute

tools = tools or []

tracer instance-attribute

tracer = tracer

Functions

run abstractmethod

run(task)
Source code in agenthelm/agent/base.py
@abstractmethod
def run(self, task: str): ...

options: show_source: false

ToolAgent

ToolAgent(
    name,
    lm,
    tools=None,
    memory=None,
    tracer=None,
    role=None,
    max_iters=10,
)

Bases: BaseAgent

ReAct-style agent that reasons and executes tools.

Uses DSPy's ReAct pattern to iteratively: 1. Reason about the task 2. Choose and execute a tool 3. Observe the result 4. Repeat until done

Source code in agenthelm/agent/tool_agent.py
def __init__(
    self,
    name: str,
    lm: dspy.LM,
    tools: list[Callable] | None = None,
    memory: MemoryHub | None = None,
    tracer: ExecutionTracer | None = None,
    role: str | None = None,
    max_iters: int = 10,
):
    super().__init__(name, lm, tools, memory, tracer, role)
    self.max_iters = max_iters

    # Build signature with optional role context
    if self.role:
        signature = "task, role -> answer"
    else:
        signature = "task -> answer"

    self._react = dspy.ReAct(
        signature=signature,
        tools=self._wrap_tools_for_tracing(),
        max_iters=self.max_iters,
    )
    self._events = []

Attributes

max_iters instance-attribute

max_iters = max_iters

Functions

run

run(task)

Execute the ReAct loop and return results with traced events.

Source code in agenthelm/agent/tool_agent.py
def run(self, task: str) -> AgentResult:
    """Execute the ReAct loop and return results with traced events."""
    self._events = []  # Reset events for this run
    result = AgentResult(success=False, session_id=self.name)
    try:
        with dspy.context(lm=self.lm):
            if self.role:
                react_result = self._react(task=task, role=self.role)
            else:
                react_result = self._react(task=task)

        result.success = True
        result.answer = react_result.answer

    except Exception as e:
        result.success = False
        result.error = str(e)

    # Collect events from tracer if available
    for event in self._events:
        result.add_event(event)

    return result

options: show_source: false

PlannerAgent

PlannerAgent(
    name,
    lm,
    tools=None,
    memory=None,
    tracer=None,
    role=None,
    max_steps=10,
)

Bases: BaseAgent

Agent that generates structured execution plans.

The PlannerAgent creates plans but does NOT execute them. Execution is handled by the Orchestrator (Week 4).

Source code in agenthelm/agent/planner.py
def __init__(
    self,
    name: str,
    lm: dspy.LM,
    tools: list[Callable] | None = None,
    memory: MemoryHub | None = None,
    tracer: ExecutionTracer | None = None,
    role: str | None = None,
    max_steps: int = 10,
):
    super().__init__(name, lm, tools, memory, tracer, role)
    self.max_steps = max_steps

    # DSPy module for plan generation - include role if provided
    if self.role:
        self._planning = dspy.ChainOfThought(
            "task, available_tools, role -> goal, reasoning, steps_json"
        )
    else:
        self._planning = dspy.ChainOfThought(
            "task, available_tools -> goal, reasoning, steps_json"
        )

Attributes

max_steps instance-attribute

max_steps = max_steps

Functions

plan

plan(task)

Generate an execution plan for the given task.

Parameters:

Name Type Description Default
task str

The task to plan for

required

Returns:

Type Description
Plan

Plan object with steps (not yet executed)

Source code in agenthelm/agent/planner.py
def plan(self, task: str) -> Plan:
    """
    Generate an execution plan for the given task.

    Args:
        task: The task to plan for

    Returns:
        Plan object with steps (not yet executed)
    """
    tool_descriptions = self._get_tool_descriptions()

    with dspy.context(lm=self.lm):
        if self.role:
            result = self._planning(
                task=task,
                available_tools=tool_descriptions,
                role=self.role,
            )
        else:
            result = self._planning(
                task=task,
                available_tools=tool_descriptions,
            )

    # Parse the steps from LLM output
    steps = self._parse_steps(result.steps_json)

    return Plan(
        goal=result.goal,
        reasoning=result.reasoning,
        steps=steps,
    )

run

run(task)

Generate a plan (alias for plan()).

Source code in agenthelm/agent/planner.py
def run(self, task: str) -> Plan:
    """Generate a plan (alias for plan())."""
    return self.plan(task)

options: show_source: false

AgentResult

Bases: BaseModel

Result of an agent execution.

Contains the final answer, execution events, and cost/token metrics.

Attributes

answer class-attribute instance-attribute

answer = Field(
    default=None, description="Final answer from the agent"
)

error class-attribute instance-attribute

error = Field(
    default=None, description="Error message if failed"
)

events class-attribute instance-attribute

events = Field(
    default_factory=list, description="All tool executions"
)

iterations class-attribute instance-attribute

iterations = Field(
    default=0, description="Number of reasoning iterations"
)

session_id class-attribute instance-attribute

session_id = Field(
    default=None, description="Session identifier"
)

success class-attribute instance-attribute

success = Field(
    description="Whether the agent completed successfully"
)

token_usage class-attribute instance-attribute

token_usage = Field(
    default_factory=lambda: TokenUsage(
        input_tokens=0, output_tokens=0
    ),
    description="Aggregated token usage",
)

total_cost_usd class-attribute instance-attribute

total_cost_usd = Field(
    default=0.0, description="Total estimated cost"
)

Functions

add_event

add_event(event)

Add an event and update aggregated metrics.

Source code in agenthelm/agent/result.py
def add_event(self, event: Event) -> None:
    """Add an event and update aggregated metrics."""
    self.events.append(event)
    if event.estimated_cost_usd:
        self.total_cost_usd += event.estimated_cost_usd
    if event.token_usage:
        self.token_usage = TokenUsage(
            input_tokens=self.token_usage.input_tokens
            + event.token_usage.input_tokens,
            output_tokens=self.token_usage.output_tokens
            + event.token_usage.output_tokens,
            model=event.token_usage.model or self.token_usage.model,
        )

options: show_source: false

Plan

Bases: BaseModel

A structured execution plan with potentially parallel steps.

Steps with no dependencies can run in parallel. Steps with dependencies run after their dependencies complete.

Attributes

approved class-attribute instance-attribute

approved = Field(
    default=False,
    description="Whether plan has been approved for execution",
)

goal class-attribute instance-attribute

goal = Field(
    description="The goal this plan aims to achieve"
)

is_complete property

is_complete

Check if all steps are completed or failed.

reasoning class-attribute instance-attribute

reasoning = Field(
    default="", description="LLM reasoning for this plan"
)

steps class-attribute instance-attribute

steps = Field(
    default_factory=list, description="Ordered steps"
)

success property

success

Check if plan completed successfully (all steps completed).

Functions

get_ready_steps

get_ready_steps()

Get all steps that are ready to execute (no pending dependencies).

Source code in agenthelm/agent/plan.py
def get_ready_steps(self) -> list[PlanStep]:
    """Get all steps that are ready to execute (no pending dependencies)."""
    completed_ids = {s.id for s in self.steps if s.status == StepStatus.COMPLETED}

    ready = []
    for step in self.steps:
        if step.status != StepStatus.PENDING:
            continue
        # Check if all dependencies are completed
        if all(dep_id in completed_ids for dep_id in step.depends_on):
            ready.append(step)
    return ready

get_step

get_step(step_id)

Get a step by ID.

Source code in agenthelm/agent/plan.py
def get_step(self, step_id: str) -> PlanStep | None:
    """Get a step by ID."""
    for step in self.steps:
        if step.id == step_id:
            return step
    return None

mark_completed

mark_completed(step_id, result=None)

Mark a step as completed.

Source code in agenthelm/agent/plan.py
def mark_completed(self, step_id: str, result: Any = None) -> None:
    """Mark a step as completed."""
    step = self.get_step(step_id)
    if step:
        step.status = StepStatus.COMPLETED
        step.result = result

mark_failed

mark_failed(step_id, error)

Mark a step as failed.

Source code in agenthelm/agent/plan.py
def mark_failed(self, step_id: str, error: str) -> None:
    """Mark a step as failed."""
    step = self.get_step(step_id)
    if step:
        step.status = StepStatus.FAILED
        step.error = error

to_yaml

to_yaml()

Serialize plan to YAML for human review.

Source code in agenthelm/agent/plan.py
def to_yaml(self) -> str:
    """Serialize plan to YAML for human review."""
    import yaml

    data = {
        "goal": self.goal,
        "reasoning": self.reasoning,
        "steps": [
            {
                "id": s.id,
                "agent": s.agent_name,
                "tool": s.tool_name,
                "description": s.description,
                "args": s.args,
                "depends_on": s.depends_on if s.depends_on else None,
            }
            for s in self.steps
        ],
    }
    # Filter None values
    for step in data["steps"]:
        step = {k: v for k, v in step.items() if v is not None}
    return yaml.dump(data, default_flow_style=False, sort_keys=False)

options: show_source: false

PlanStep

Bases: BaseModel

A single step in an execution plan.

Steps can have dependencies for parallel/sequential execution. Supports Saga pattern with compensating actions on failure.

Attributes

agent_name class-attribute instance-attribute

agent_name = Field(
    default=None, description="Agent to delegate to"
)

args class-attribute instance-attribute

args = Field(
    default_factory=dict,
    description="Arguments to pass to the tool",
)

compensate_args class-attribute instance-attribute

compensate_args = Field(
    default_factory=dict,
    description="Arguments for compensating tool",
)

compensate_tool class-attribute instance-attribute

compensate_tool = Field(
    default=None,
    description="Tool to run on rollback (overrides tool default)",
)

depends_on class-attribute instance-attribute

depends_on = Field(
    default_factory=list,
    description="IDs of steps that must complete first",
)

description class-attribute instance-attribute

description = Field(
    description="Human-readable description of this step"
)

error class-attribute instance-attribute

error = Field(default=None, description='Error if failed')

id class-attribute instance-attribute

id = Field(description='Unique step identifier')

is_ready property

is_ready

Check if step is ready to execute (no pending dependencies).

result class-attribute instance-attribute

result = Field(
    default=None, description="Result after execution"
)

status class-attribute instance-attribute

status = Field(
    default=PENDING, description="Current status"
)

tool_name class-attribute instance-attribute

tool_name = Field(description="Name of the tool to execute")

options: show_source: false