Skip to content

feat(tools): add conversation history support to ToolContext #904 #926

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion src/agents/_run_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ async def execute_tools_and_side_effects(
hooks=hooks,
context_wrapper=context_wrapper,
config=run_config,
original_input=original_input,
pre_step_items=pre_step_items,
new_step_items=new_step_items,
),
cls.execute_computer_actions(
agent=agent,
Expand Down Expand Up @@ -539,12 +542,25 @@ async def execute_function_tool_calls(
hooks: RunHooks[TContext],
context_wrapper: RunContextWrapper[TContext],
config: RunConfig,
original_input: str | list[TResponseInputItem],
pre_step_items: list[RunItem],
new_step_items: list[RunItem],
) -> list[FunctionToolResult]:
async def run_single_tool(
func_tool: FunctionTool, tool_call: ResponseFunctionToolCall
) -> Any:
with function_span(func_tool.name) as span_fn:
tool_context = ToolContext.from_agent_context(context_wrapper, tool_call.call_id)
# Build conversation history from original input and all items generated so far
original_items: list[TResponseInputItem] = ItemHelpers.input_to_new_input_list(
original_input
)
pre_items = [item.to_input_item() for item in pre_step_items]
new_items = [item.to_input_item() for item in new_step_items]
conversation_history = original_items + pre_items + new_items

tool_context = ToolContext.from_agent_context(
context_wrapper, tool_call.call_id, conversation_history=conversation_history
)
if config.trace_include_sensitive_data:
span_fn.span_data.input = tool_call.arguments
try:
Expand Down
24 changes: 22 additions & 2 deletions src/agents/tool_context.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import dataclass, field, fields
from typing import Any

from .items import TResponseInputItem
from .run_context import RunContextWrapper, TContext


Expand All @@ -15,15 +16,34 @@ class ToolContext(RunContextWrapper[TContext]):
tool_call_id: str = field(default_factory=_assert_must_pass_tool_call_id)
"""The ID of the tool call."""

conversation_history: list[TResponseInputItem] = field(default_factory=list)
"""The conversation history available at the time this tool was called.

This includes the original input and all items generated during the agent run
up to the point when this tool was invoked.
"""

@classmethod
def from_agent_context(
cls, context: RunContextWrapper[TContext], tool_call_id: str
cls,
context: RunContextWrapper[TContext],
tool_call_id: str,
conversation_history: list[TResponseInputItem] | None = None,
) -> "ToolContext":
"""
Create a ToolContext from a RunContextWrapper.

Args:
context: The run context wrapper
tool_call_id: The ID of the tool call
conversation_history: The conversation history available at tool invocation time
"""
# Grab the names of the RunContextWrapper's init=True fields
base_values: dict[str, Any] = {
f.name: getattr(context, f.name) for f in fields(RunContextWrapper) if f.init
}
return cls(tool_call_id=tool_call_id, **base_values)
return cls(
tool_call_id=tool_call_id,
conversation_history=list(conversation_history or []),
**base_values,
)
205 changes: 205 additions & 0 deletions tests/test_tool_context_conversation_history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
"""Tests for conversation_history functionality in ToolContext."""

from __future__ import annotations

from typing import cast

import pytest
from openai.types.responses import ResponseFunctionToolCall, ResponseOutputMessage
from openai.types.responses.response_input_item_param import FunctionCallOutput

from agents import (
Agent,
MessageOutputItem,
RunContextWrapper,
RunItem,
ToolCallItem,
ToolCallOutputItem,
Usage,
)
from agents.items import ItemHelpers
from agents.tool_context import ToolContext

from .test_responses import (
get_function_tool_call,
get_text_input_item,
get_text_message,
)


def test_tool_context_has_conversation_history_field():
"""Test that ToolContext has a conversation_history field."""
context = ToolContext(context=None, tool_call_id="test-id")
assert hasattr(context, "conversation_history")
assert isinstance(context.conversation_history, list)
assert len(context.conversation_history) == 0


def test_tool_context_from_agent_context_default_history():
"""Test ToolContext.from_agent_context with no conversation history."""
run_context = RunContextWrapper(context=None, usage=Usage())
tool_context = ToolContext.from_agent_context(run_context, "test-id")

assert tool_context.tool_call_id == "test-id"
assert tool_context.conversation_history == []


def test_tool_context_from_agent_context_with_history():
"""Test ToolContext.from_agent_context with conversation history."""
run_context = RunContextWrapper(context=None, usage=Usage())
history = [get_text_input_item("Hello"), get_text_input_item("How are you?")]

tool_context = ToolContext.from_agent_context(
run_context, "test-id", conversation_history=history
)

assert tool_context.tool_call_id == "test-id"
assert tool_context.conversation_history == history
assert len(tool_context.conversation_history) == 2


@pytest.mark.asyncio
async def test_conversation_history_in_tool_execution():
"""Test that conversation history is properly passed to tools during execution."""

# Create a dummy agent for the items
dummy_agent = Agent[None](name="dummy")

# Test that we can build conversation history manually
original_input = "What's the weather like?"
pre_step_items: list[RunItem] = [
MessageOutputItem(
agent=dummy_agent,
raw_item=cast(
ResponseOutputMessage, get_text_message("I'll check the weather for you.")
),
)
]
new_step_items: list[RunItem] = [
ToolCallItem(
agent=dummy_agent,
raw_item=cast(ResponseFunctionToolCall, get_function_tool_call("test_tool", "")),
)
]

# Test that we can build conversation history manually
original_items = ItemHelpers.input_to_new_input_list(original_input)
pre_items = [item.to_input_item() for item in pre_step_items]
new_items = [item.to_input_item() for item in new_step_items]
expected_history = original_items + pre_items + new_items

assert len(expected_history) >= 1 # Should have at least the original input


@pytest.mark.asyncio
async def test_conversation_history_empty_for_first_turn():
"""Test that conversation history works correctly for the first turn."""

# Create a dummy agent for the items
dummy_agent = Agent[None](name="dummy")

# Simulate first turn - only original input, no pre_step_items
original_input = "Hello"
pre_step_items: list[RunItem] = []
new_step_items: list[RunItem] = [
ToolCallItem(
agent=dummy_agent,
raw_item=cast(ResponseFunctionToolCall, get_function_tool_call("first_turn_tool", "")),
)
]

# Build conversation history as it would be built in the actual execution
original_items = ItemHelpers.input_to_new_input_list(original_input)
pre_items = [item.to_input_item() for item in pre_step_items]
new_items = [item.to_input_item() for item in new_step_items]
conversation_history = original_items + pre_items + new_items

# Should have at least the original input
assert len(conversation_history) >= 1
assert len(original_items) == 1 # Original input becomes one item


@pytest.mark.asyncio
async def test_conversation_history_multi_turn():
"""Test conversation history accumulates correctly across multiple turns."""

# Create a dummy agent for the items
dummy_agent = Agent[None](name="dummy")

# Simulate multiple turns with accumulated history
original_input = "Start conversation"
pre_step_items: list[RunItem] = [
MessageOutputItem(
agent=dummy_agent,
raw_item=cast(ResponseOutputMessage, get_text_message("Response to start")),
),
ToolCallItem(
agent=dummy_agent,
raw_item=cast(ResponseFunctionToolCall, get_function_tool_call("multi_turn_tool", "")),
),
ToolCallOutputItem(
agent=dummy_agent,
raw_item=cast(
FunctionCallOutput,
{
"type": "function_call_output",
"call_id": "call-1",
"output": "Previous tool output",
},
),
output="Previous tool output",
),
MessageOutputItem(
agent=dummy_agent,
raw_item=cast(ResponseOutputMessage, get_text_message("Continuing conversation")),
),
]
new_step_items: list[RunItem] = [
ToolCallItem(
agent=dummy_agent,
raw_item=cast(ResponseFunctionToolCall, get_function_tool_call("multi_turn_tool", "")),
)
]

# Build conversation history
original_items = ItemHelpers.input_to_new_input_list(original_input)
pre_items = [item.to_input_item() for item in pre_step_items]
new_items = [item.to_input_item() for item in new_step_items]
conversation_history = original_items + pre_items + new_items

# Should contain: original input + all previous messages and tool calls + current tool call
assert len(conversation_history) >= 5 # At least 5 items in this conversation


def test_conversation_history_immutable():
"""Test that conversation_history cannot be modified after creation."""
run_context = RunContextWrapper(context=None, usage=Usage())
history = [get_text_input_item("Original message")]

tool_context = ToolContext.from_agent_context(
run_context, "test-id", conversation_history=history
)

# Modifying the original list should not affect the tool context
history.append(get_text_input_item("Should not appear"))

assert len(tool_context.conversation_history) == 1

# The conversation_history should be a new list, not a reference
tool_context.conversation_history.append(get_text_input_item("Direct modification"))

# Create a new tool context to verify it's not affected
new_tool_context = ToolContext.from_agent_context(
run_context, "test-id-2", conversation_history=[get_text_input_item("Original message")]
)
assert len(new_tool_context.conversation_history) == 1


def test_conversation_history_with_none():
"""Test that passing None for conversation_history results in empty list."""
run_context = RunContextWrapper(context=None, usage=Usage())

tool_context = ToolContext.from_agent_context(run_context, "test-id", conversation_history=None)

assert tool_context.conversation_history == []
assert isinstance(tool_context.conversation_history, list)
Loading