diff --git a/docs/agents.md b/docs/agents.md index aa022288..0009564d 100644 --- a/docs/agents.md +++ b/docs/agents.md @@ -108,7 +108,7 @@ You can also pass messages from previous runs to continue a conversation or prov #### Usage Limits -PydanticAI offers a [`settings.UsageLimits`][pydantic_ai.settings.UsageLimits] structure to help you limit your +PydanticAI offers a [`UsageLimits`][pydantic_ai.usage.UsageLimits] structure to help you limit your usage (tokens and/or requests) on model runs. You can apply these settings by passing the `usage_limits` argument to the `run{_sync,_stream}` functions. @@ -118,7 +118,7 @@ Consider the following example, where we limit the number of response tokens: ```py from pydantic_ai import Agent from pydantic_ai.exceptions import UsageLimitExceeded -from pydantic_ai.settings import UsageLimits +from pydantic_ai.usage import UsageLimits agent = Agent('claude-3-5-sonnet-latest') @@ -150,7 +150,7 @@ from typing_extensions import TypedDict from pydantic_ai import Agent, ModelRetry from pydantic_ai.exceptions import UsageLimitExceeded -from pydantic_ai.settings import UsageLimits +from pydantic_ai.usage import UsageLimits class NeverResultType(TypedDict): diff --git a/docs/api/usage.md b/docs/api/usage.md new file mode 100644 index 00000000..71e16208 --- /dev/null +++ b/docs/api/usage.md @@ -0,0 +1,3 @@ +# `pydantic_ai.usage` + +::: pydantic_ai.usage diff --git a/docs/dependencies.md b/docs/dependencies.md index ad6cc535..be533614 100644 --- a/docs/dependencies.md +++ b/docs/dependencies.md @@ -275,6 +275,8 @@ async def application_code(prompt: str) -> str: # (3)! 3. Application code that calls the agent, in a real application this might be an API endpoint. 4. Call the agent from within the application code, in a real application this call might be deep within a call stack. Note `app_deps` here will NOT be used when deps are overridden. +_(This example is complete, it can be run "as is")_ + ```python {title="test_joke_app.py" hl_lines="10-12" call_name="test_application_code"} from joke_app import MyDeps, application_code, joke_agent @@ -296,44 +298,6 @@ async def test_application_code(): 3. Override the dependencies of the agent for the duration of the `with` block, `test_deps` will be used when the agent is run. 4. Now we can safely call our application code, the agent will use the overridden dependencies. -## Agents as dependencies of other Agents - -Since dependencies can be any python type, and agents are just python objects, agents can be dependencies of other agents. - -```python {title="agents_as_dependencies.py"} -from dataclasses import dataclass - -from pydantic_ai import Agent, RunContext - - -@dataclass -class MyDeps: - factory_agent: Agent[None, list[str]] - - -joke_agent = Agent( - 'openai:gpt-4o', - deps_type=MyDeps, - system_prompt=( - 'Use the "joke_factory" to generate some jokes, then choose the best. ' - 'You must return just a single joke.' - ), -) - -factory_agent = Agent('gemini-1.5-pro', result_type=list[str]) - - -@joke_agent.tool -async def joke_factory(ctx: RunContext[MyDeps], count: int) -> str: - r = await ctx.deps.factory_agent.run(f'Please generate {count} jokes.') - return '\n'.join(r.data) - - -result = joke_agent.run_sync('Tell me a joke.', deps=MyDeps(factory_agent)) -print(result.data) -#> Did you hear about the toothpaste scandal? They called it Colgate. -``` - ## Examples The following examples demonstrate how to use dependencies in PydanticAI: diff --git a/docs/examples/flight-booking.md b/docs/examples/flight-booking.md new file mode 100644 index 00000000..7e6f1921 --- /dev/null +++ b/docs/examples/flight-booking.md @@ -0,0 +1,41 @@ +Example of a multi-agent flow where one agent delegates work to another, then hands off control to a third agent. + +Demonstrates: + +* [agent delegation](../multi-agent-applications.md#agent-delegation) +* [programmatic agent hand-off](../multi-agent-applications.md#programmatic-agent-hand-off) +* [usage limits](../agents.md#usage-limits) + +In this scenario, a group of agents work together to find the best flight for a user. + +The control flow for this example can be summarised as follows: + +```mermaid +graph TD + START --> search_agent("search agent") + search_agent --> extraction_agent("extraction agent") + extraction_agent --> search_agent + search_agent --> human_confirm("human confirm") + human_confirm --> search_agent + search_agent --> FAILED + human_confirm --> find_seat_function("find seat function") + find_seat_function --> human_seat_choice("human seat choice") + human_seat_choice --> find_seat_agent("find seat agent") + find_seat_agent --> find_seat_function + find_seat_function --> buy_flights("buy flights") + buy_flights --> SUCCESS +``` + +## Running the Example + +With [dependencies installed and environment variables set](./index.md#usage), run: + +```bash +python/uv-run -m pydantic_ai_examples.flight_booking +``` + +## Example Code + +```python {title="flight_booking.py"} +#! examples/pydantic_ai_examples/flight_booking.py +``` diff --git a/docs/extra/tweaks.css b/docs/extra/tweaks.css index 2e3206f8..c7d69224 100644 --- a/docs/extra/tweaks.css +++ b/docs/extra/tweaks.css @@ -55,3 +55,7 @@ img.index-header { min-height: 120px; margin-bottom: 10px; } + +.mermaid { + text-align: center; +} diff --git a/docs/multi-agent-applications.md b/docs/multi-agent-applications.md new file mode 100644 index 00000000..ae66ec4e --- /dev/null +++ b/docs/multi-agent-applications.md @@ -0,0 +1,344 @@ +from pydantic_ai_examples.sql_gen import system_prompt + +# Multi-agent Applications + +There are roughly four levels of complexity when building applications with PydanticAI: + +1. Single agent workflows — what most of the `pydantic_ai` documentation covers +2. [Agent delegation](#agent-delegation) — agents using another agent via tools +3. [Programmatic agent hand-off](#programmatic-agent-hand-off) — one agent runs, then application code calls another agent +4. [Graph based control flow](#pydanticai-graphs) — for the most complex cases, a graph-based state machine can be used to control the execution of multiple agents + +Of course, you can combine multiple strategies in a single application. + +## Agent delegation + +"Agent delegation" refers to the scenario where an agent delegates work to another agent, then takes back control when the delegate agent (the agent called from within a tool) finishes. + +Since agents are stateless and designed to be global, you do not need to include the agent itself in agent [dependencies](dependencies.md). + +You'll generally want to pass [`ctx.usage`][pydantic_ai.RunContext.usage] to the [`usage`][pydantic_ai.Agent.run] keyword argument of the delegate agent run so usage within that run counts towards the total usage of the parent agent run. + +!!! note "Multiple models" + Agent delegation doesn't need to use the same model for each agent. If you choose to use different models within a run, calculating the monetary cost from the final [`result.usage()`][pydantic_ai.result.RunResult.usage] of the run will not be possible, but you can still use [`UsageLimits`][pydantic_ai.usage.UsageLimits] to avoid unexpected costs. + +```python {title="agent_delegation_simple.py"} +from pydantic_ai import Agent, RunContext +from pydantic_ai.usage import UsageLimits + +joke_selection_agent = Agent( # (1)! + 'openai:gpt-4o', + system_prompt=( + 'Use the `joke_factory` to generate some jokes, then choose the best. ' + 'You must return just a single joke.' + ), +) +joke_generation_agent = Agent('gemini-1.5-flash', result_type=list[str]) # (2)! + + +@joke_selection_agent.tool +async def joke_factory(ctx: RunContext[None], count: int) -> list[str]: + r = await joke_generation_agent.run( # (3)! + f'Please generate {count} jokes.', + usage=ctx.usage, # (4)! + ) + return r.data # (5)! + + +result = joke_selection_agent.run_sync( + 'Tell me a joke.', + usage_limits=UsageLimits(request_limit=5, total_tokens_limit=300), +) +print(result.data) +#> Did you hear about the toothpaste scandal? They called it Colgate. +print(result.usage()) +""" +Usage( + requests=3, request_tokens=204, response_tokens=24, total_tokens=228, details=None +) +""" +``` + +1. The "parent" or controlling agent. +2. The "delegate" agent, which is called from within a tool of the parent agent. +3. Call the delegate agent from within a tool of the parent agent. +4. Pass the usage from the parent agent to the delegate agent so the final [`result.usage()`][pydantic_ai.result.RunResult.usage] includes the usage from both agents. +5. Since the function returns `#!python list[str]`, and the `result_type` of `joke_generation_agent` is also `#!python list[str]`, we can simply return `#!python r.data` from the tool. + +_(This example is complete, it can be run "as is")_ + +The control flow for this example is pretty simple and can be summarised as follows: + +```mermaid +graph TD + START --> joke_selection_agent + joke_selection_agent --> joke_factory["joke_factory (tool)"] + joke_factory --> joke_generation_agent + joke_generation_agent --> joke_factory + joke_factory --> joke_selection_agent + joke_selection_agent --> END +``` + +### Agent delegation and dependencies + +Generally the delegate agent needs to either have the same [dependencies](dependencies.md) as the calling agent, or dependencies which are a subset of the calling agent's dependencies. + +!!! info "Initializing dependencies" + We say "generally" above since there's nothing to stop you initializing dependencies within a tool call and therefore using interdependencies in a delegate agent that are not available on the parent, this should often be avoided since it can be significantly slower than reusing connections etc. from the parent agent. + +```python {title="agent_delegation_deps.py"} +from dataclasses import dataclass + +import httpx + +from pydantic_ai import Agent, RunContext + + +@dataclass +class ClientAndKey: # (1)! + http_client: httpx.AsyncClient + api_key: str + + +joke_selection_agent = Agent( + 'openai:gpt-4o', + deps_type=ClientAndKey, # (2)! + system_prompt=( + 'Use the `joke_factory` tool to generate some jokes on the given subject, ' + 'then choose the best. You must return just a single joke.' + ), +) +joke_generation_agent = Agent( + 'gemini-1.5-flash', + deps_type=ClientAndKey, # (4)! + result_type=list[str], + system_prompt=( + 'Use the "get_jokes" tool to get some jokes on the given subject, ' + 'then extract each joke into a list.' + ), +) + + +@joke_selection_agent.tool +async def joke_factory(ctx: RunContext[ClientAndKey], count: int) -> list[str]: + r = await joke_generation_agent.run( + f'Please generate {count} jokes.', + deps=ctx.deps, # (3)! + usage=ctx.usage, + ) + return r.data + + +@joke_generation_agent.tool # (5)! +async def get_jokes(ctx: RunContext[ClientAndKey], count: int) -> str: + response = await ctx.deps.http_client.get( + 'https://example.com', + params={'count': count}, + headers={'Authorization': f'Bearer {ctx.deps.api_key}'}, + ) + response.raise_for_status() + return response.text + + +async def main(): + async with httpx.AsyncClient() as client: + deps = ClientAndKey(client, 'foobar') + result = await joke_selection_agent.run('Tell me a joke.', deps=deps) + print(result.data) + #> Did you hear about the toothpaste scandal? They called it Colgate. + print(result.usage()) # (6)! + """ + Usage( + requests=4, + request_tokens=310, + response_tokens=32, + total_tokens=342, + details=None, + ) + """ +``` + +1. Define a dataclass to hold the client and API key dependencies. +2. Set the `deps_type` of the calling agent — `joke_selection_agent` here. +3. Pass the dependencies to the delegate agent's run method within the tool call. +4. Also set the `deps_type` of the delegate agent — `joke_generation_agent` here. +5. Define a tool on the delegate agent that uses the dependencies to make an HTTP request. +6. Usage now includes 4 requests — 2 from the calling agent and 2 from the delegate agent. + +_(This example is complete, it can be run "as is")_ + +This example shows how even a fairly simple agent delegation can lead to a complex control flow: + +```mermaid +graph TD + START --> joke_selection_agent + joke_selection_agent --> joke_factory["joke_factory (tool)"] + joke_factory --> joke_generation_agent + joke_generation_agent --> get_jokes["get_jokes (tool)"] + get_jokes --> http_request["HTTP request"] + http_request --> get_jokes + get_jokes --> joke_generation_agent + joke_generation_agent --> joke_factory + joke_factory --> joke_selection_agent + joke_selection_agent --> END +``` + +## Programmatic agent hand-off + +"Programmatic agent hand-off" refers to the scenario where multiple agents are called in succession, with application code and/or a human in the loop responsible for deciding which agent to call next. + +Here agents don't need to use the same deps. + +Here we show two agents used in succession, the first to find a flight and the second to extract the user's seat preference. + +```python {title="programmatic_handoff.py"} +from typing import Literal, Union + +from pydantic import BaseModel, Field +from rich.prompt import Prompt + +from pydantic_ai import Agent, RunContext +from pydantic_ai.messages import ModelMessage +from pydantic_ai.usage import Usage, UsageLimits + + +class FlightDetails(BaseModel): + flight_number: str + + +class Failed(BaseModel): + """Unable to find a satisfactory choice.""" + + +flight_search_agent = Agent[None, Union[FlightDetails, Failed]]( # (1)! + 'openai:gpt-4o', + result_type=Union[FlightDetails, Failed], # type: ignore + system_prompt=( + 'Use the "flight_search" tool to find a flight ' + 'from the given origin to the given destination.' + ), +) + + +@flight_search_agent.tool # (2)! +async def flight_search( + ctx: RunContext[None], origin: str, destination: str +) -> Union[FlightDetails, None]: + # in reality, this would call a flight search API or + # use a browser to scrape a flight search website + return FlightDetails(flight_number='AK456') + + +usage_limits = UsageLimits(request_limit=15) # (3)! + + +async def find_flight(usage: Usage) -> Union[FlightDetails, None]: # (4)! + message_history: Union[list[ModelMessage], None] = None + for _ in range(3): + prompt = Prompt.ask( + 'Where would you like to fly from and to?', + ) + result = await flight_search_agent.run( + prompt, + message_history=message_history, + usage=usage, + usage_limits=usage_limits, + ) + if isinstance(result.data, FlightDetails): + return result.data + else: + message_history = result.all_messages( + result_tool_return_content='Please try again.' + ) + + +class SeatPreference(BaseModel): + row: int = Field(ge=1, le=30) + seat: Literal['A', 'B', 'C', 'D', 'E', 'F'] + + +# This agent is responsible for extracting the user's seat selection +seat_preference_agent = Agent[None, Union[SeatPreference, Failed]]( # (5)! + 'openai:gpt-4o', + result_type=Union[SeatPreference, Failed], # type: ignore + system_prompt=( + "Extract the user's seat preference. " + 'Seats A and F are window seats. ' + 'Row 1 is the front row and has extra leg room. ' + 'Rows 14, and 20 also have extra leg room. ' + ), +) + + +async def find_seat(usage: Usage) -> SeatPreference: # (6)! + message_history: Union[list[ModelMessage], None] = None + while True: + answer = Prompt.ask('What seat would you like?') + + result = await seat_preference_agent.run( + answer, + message_history=message_history, + usage=usage, + usage_limits=usage_limits, + ) + if isinstance(result.data, SeatPreference): + return result.data + else: + print('Could not understand seat preference. Please try again.') + message_history = result.all_messages() + + +async def main(): # (7)! + usage: Usage = Usage() + + opt_flight_details = await find_flight(usage) + if opt_flight_details is not None: + print(f'Flight found: {opt_flight_details.flight_number}') + #> Flight found: AK456 + seat_preference = await find_seat(usage) + print(f'Seat preference: {seat_preference}') + #> Seat preference: row=1 seat='A' +``` + +1. Define the first agent, which finds a flight. We use an explicit type annotation until [PEP-747](https://peps.python.org/pep-0747/) lands, see [structured results](results.md#structured-result-validation). We use a union as the result type so the model can communicate if it's unable to find a satisfactory choice; internally, each member of the union will be registered as a separate tool. +2. Define a tool on the agent to find a flight. In this simple case we could dispense with the tool and just define the agent to return structured data, then search for a flight, but in more complex scenarios the tool would be necessary. +3. Define usage limits for the entire app. +4. Define a function to find a flight, which asks the user for their preferences and then calls the agent to find a flight. +5. As with `flight_search_agent` above, we use an explicit type annotation to define the agent. +6. Define a function to find the user's seat preference, which asks the user for their seat preference and then calls the agent to extract the seat preference. +7. Now that we've put our logic for running each agent into separate functions, our main app becomes very simple. + +_(This example is complete, it can be run "as is")_ + +The control flow for this example can be summarised as follows: + +```mermaid +graph TB + START --> ask_user_flight["ask user for flight"] + + subgraph find_flight + flight_search_agent --> ask_user_flight + ask_user_flight --> flight_search_agent + end + + flight_search_agent --> ask_user_seat["ask user for seat"] + flight_search_agent --> END + + subgraph find_seat + seat_preference_agent --> ask_user_seat + ask_user_seat --> seat_preference_agent + end + + seat_preference_agent --> END +``` + +## PydanticAI Graphs + +!!! example "Work in progress" + This is a work in progress and not yet documented, see [#528](https://github.com/pydantic/pydantic-ai/issues/528) and [#539](https://github.com/pydantic/pydantic-ai/issues/539) + +## Examples + +The following examples demonstrate how to use dependencies in PydanticAI: + +- [Flight booking](examples/flight-booking.md) diff --git a/examples/pydantic_ai_examples/flight_booking.py b/examples/pydantic_ai_examples/flight_booking.py new file mode 100644 index 00000000..209e2adf --- /dev/null +++ b/examples/pydantic_ai_examples/flight_booking.py @@ -0,0 +1,242 @@ +"""Example of a multi-agent flow where one agent delegates work to another. + +In this scenario, a group of agents work together to find flights for a user. +""" + +import datetime +from dataclasses import dataclass +from typing import Literal + +import logfire +from pydantic import BaseModel, Field +from rich.prompt import Prompt + +from pydantic_ai import Agent, ModelRetry, RunContext +from pydantic_ai.messages import ModelMessage +from pydantic_ai.usage import Usage, UsageLimits + +# 'if-token-present' means nothing will be sent (and the example will work) if you don't have logfire configured +logfire.configure(send_to_logfire='if-token-present') + + +class FlightDetails(BaseModel): + """Details of the most suitable flight.""" + + flight_number: str + price: int + origin: str = Field(description='Three-letter airport code') + destination: str = Field(description='Three-letter airport code') + date: datetime.date + + +class NoFlightFound(BaseModel): + """When no valid flight is found.""" + + +@dataclass +class Deps: + web_page_text: str + req_origin: str + req_destination: str + req_date: datetime.date + + +# This agent is responsible for controlling the flow of the conversation. +search_agent = Agent[Deps, FlightDetails | NoFlightFound]( + 'openai:gpt-4o', + result_type=FlightDetails | NoFlightFound, # type: ignore + retries=4, + system_prompt=( + 'Your job is to find the cheapest flight for the user on the given date. ' + ), +) + + +# This agent is responsible for extracting flight details from web page text. +extraction_agent = Agent( + 'openai:gpt-4o', + result_type=list[FlightDetails], + system_prompt='Extract all the flight details from the given text.', +) + + +@search_agent.tool +async def extract_flights(ctx: RunContext[Deps]) -> list[FlightDetails]: + """Get details of all flights.""" + # we pass the usage to the search agent so requests within this agent are counted + result = await extraction_agent.run(ctx.deps.web_page_text, usage=ctx.usage) + logfire.info('found {flight_count} flights', flight_count=len(result.data)) + return result.data + + +@search_agent.result_validator +async def validate_result( + ctx: RunContext[Deps], result: FlightDetails | NoFlightFound +) -> FlightDetails | NoFlightFound: + """Procedural validation that the flight meets the constraints.""" + if isinstance(result, NoFlightFound): + return result + + errors: list[str] = [] + if result.origin != ctx.deps.req_origin: + errors.append( + f'Flight should have origin {ctx.deps.req_origin}, not {result.origin}' + ) + if result.destination != ctx.deps.req_destination: + errors.append( + f'Flight should have destination {ctx.deps.req_destination}, not {result.destination}' + ) + if result.date != ctx.deps.req_date: + errors.append(f'Flight should be on {ctx.deps.req_date}, not {result.date}') + + if errors: + raise ModelRetry('\n'.join(errors)) + else: + return result + + +class SeatPreference(BaseModel): + row: int = Field(ge=1, le=30) + seat: Literal['A', 'B', 'C', 'D', 'E', 'F'] + + +class Failed(BaseModel): + """Unable to extract a seat selection.""" + + +# This agent is responsible for extracting the user's seat selection +seat_preference_agent = Agent[ + None, SeatPreference | Failed +]( + 'openai:gpt-4o', + result_type=SeatPreference | Failed, # type: ignore + system_prompt=( + "Extract the user's seat preference. " + 'Seats A and F are window seats. ' + 'Row 1 is the front row and has extra leg room. ' + 'Rows 14, and 20 also have extra leg room. ' + ), +) + + +# in reality this would be downloaded from a booking site, +# potentially using another agent to navigate the site +flights_web_page = """ +1. Flight SFO-AK123 +- Price: $350 +- Origin: San Francisco International Airport (SFO) +- Destination: Ted Stevens Anchorage International Airport (ANC) +- Date: January 10, 2025 + +2. Flight SFO-AK456 +- Price: $370 +- Origin: San Francisco International Airport (SFO) +- Destination: Fairbanks International Airport (FAI) +- Date: January 10, 2025 + +3. Flight SFO-AK789 +- Price: $400 +- Origin: San Francisco International Airport (SFO) +- Destination: Juneau International Airport (JNU) +- Date: January 20, 2025 + +4. Flight NYC-LA101 +- Price: $250 +- Origin: San Francisco International Airport (SFO) +- Destination: Ted Stevens Anchorage International Airport (ANC) +- Date: January 10, 2025 + +5. Flight CHI-MIA202 +- Price: $200 +- Origin: Chicago O'Hare International Airport (ORD) +- Destination: Miami International Airport (MIA) +- Date: January 12, 2025 + +6. Flight BOS-SEA303 +- Price: $120 +- Origin: Boston Logan International Airport (BOS) +- Destination: Ted Stevens Anchorage International Airport (ANC) +- Date: January 12, 2025 + +7. Flight DFW-DEN404 +- Price: $150 +- Origin: Dallas/Fort Worth International Airport (DFW) +- Destination: Denver International Airport (DEN) +- Date: January 10, 2025 + +8. Flight ATL-HOU505 +- Price: $180 +- Origin: Hartsfield-Jackson Atlanta International Airport (ATL) +- Destination: George Bush Intercontinental Airport (IAH) +- Date: January 10, 2025 +""" + +# restrict how many requests this app can make to the LLM +usage_limits = UsageLimits(request_limit=15) + + +async def main(): + deps = Deps( + web_page_text=flights_web_page, + req_origin='SFO', + req_destination='ANC', + req_date=datetime.date(2025, 1, 10), + ) + message_history: list[ModelMessage] | None = None + usage: Usage = Usage() + # run the agent until a satisfactory flight is found + while True: + result = await search_agent.run( + f'Find me a flight from {deps.req_origin} to {deps.req_destination} on {deps.req_date}', + deps=deps, + usage=usage, + message_history=message_history, + usage_limits=usage_limits, + ) + if isinstance(result.data, NoFlightFound): + print('No flight found') + break + else: + flight = result.data + print(f'Flight found: {flight}') + answer = Prompt.ask( + 'Do you want to buy this flight, or keep searching? (buy/*search)', + choices=['buy', 'search', ''], + show_choices=False, + ) + if answer == 'buy': + seat = await find_seat(usage) + await buy_tickets(flight, seat) + break + else: + message_history = result.all_messages( + result_tool_return_content='Please suggest another flight' + ) + + +async def find_seat(usage: Usage) -> SeatPreference: + message_history: list[ModelMessage] | None = None + while True: + answer = Prompt.ask('What seat would you like?') + + result = await seat_preference_agent.run( + answer, + message_history=message_history, + usage=usage, + usage_limits=usage_limits, + ) + if isinstance(result.data, SeatPreference): + return result.data + else: + print('Could not understand seat preference. Please try again.') + message_history = result.all_messages() + + +async def buy_tickets(flight_details: FlightDetails, seat: SeatPreference): + print(f'Purchasing flight {flight_details=!r} {seat=!r}...') + + +if __name__ == '__main__': + import asyncio + + asyncio.run(main()) diff --git a/mkdocs.yml b/mkdocs.yml index 65ffd155..e60e3a97 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -23,12 +23,14 @@ nav: - message-history.md - testing-evals.md - logfire.md + - multi-agent-applications.md - Examples: - examples/index.md - examples/pydantic-model.md - examples/weather-agent.md - examples/bank-support.md - examples/sql-gen.md + - examples/flight-booking.md - examples/rag.md - examples/stream-markdown.md - examples/stream-whales.md @@ -40,6 +42,7 @@ nav: - api/messages.md - api/exceptions.md - api/settings.md + - api/usage.md - api/models/base.md - api/models/openai.md - api/models/anthropic.md diff --git a/pydantic_ai_slim/pydantic_ai/agent.py b/pydantic_ai_slim/pydantic_ai/agent.py index 1e5b60b8..b4af5db7 100644 --- a/pydantic_ai_slim/pydantic_ai/agent.py +++ b/pydantic_ai_slim/pydantic_ai/agent.py @@ -20,9 +20,10 @@ messages as _messages, models, result, + usage as _usage, ) from .result import ResultData -from .settings import ModelSettings, UsageLimits, merge_model_settings +from .settings import ModelSettings, merge_model_settings from .tools import ( AgentDeps, RunContext, @@ -192,8 +193,8 @@ async def run( model: models.Model | models.KnownModelName | None = None, deps: AgentDeps = None, model_settings: ModelSettings | None = None, - usage_limits: UsageLimits | None = None, - usage: result.Usage | None = None, + usage_limits: _usage.UsageLimits | None = None, + usage: _usage.Usage | None = None, infer_name: bool = True, ) -> result.RunResult[ResultData]: """Run the agent with a user prompt in async mode. @@ -236,7 +237,7 @@ async def run( model_name=model_used.name(), agent_name=self.name or 'agent', ) as run_span: - run_context = RunContext(deps, model_used, usage or result.Usage(), user_prompt) + run_context = RunContext(deps, model_used, usage or _usage.Usage(), user_prompt) messages = await self._prepare_messages(user_prompt, message_history, run_context) run_context.messages = messages @@ -244,7 +245,7 @@ async def run( tool.current_retry = 0 model_settings = merge_model_settings(self.model_settings, model_settings) - usage_limits = usage_limits or UsageLimits() + usage_limits = usage_limits or _usage.UsageLimits() while True: usage_limits.check_before_request(run_context.usage) @@ -272,11 +273,14 @@ async def run( # Check if we got a final result if final_result is not None: result_data = final_result.data + result_tool_name = final_result.tool_name run_span.set_attribute('all_messages', messages) run_span.set_attribute('usage', run_context.usage) handle_span.set_attribute('result', result_data) handle_span.message = 'handle model response -> final result' - return result.RunResult(messages, new_message_index, result_data, run_context.usage) + return result.RunResult( + messages, new_message_index, result_data, result_tool_name, run_context.usage + ) else: # continue the conversation handle_span.set_attribute('tool_responses', tool_responses) @@ -291,8 +295,8 @@ def run_sync( model: models.Model | models.KnownModelName | None = None, deps: AgentDeps = None, model_settings: ModelSettings | None = None, - usage_limits: UsageLimits | None = None, - usage: result.Usage | None = None, + usage_limits: _usage.UsageLimits | None = None, + usage: _usage.Usage | None = None, infer_name: bool = True, ) -> result.RunResult[ResultData]: """Run the agent with a user prompt synchronously. @@ -349,8 +353,8 @@ async def run_stream( model: models.Model | models.KnownModelName | None = None, deps: AgentDeps = None, model_settings: ModelSettings | None = None, - usage_limits: UsageLimits | None = None, - usage: result.Usage | None = None, + usage_limits: _usage.UsageLimits | None = None, + usage: _usage.Usage | None = None, infer_name: bool = True, ) -> AsyncIterator[result.StreamedRunResult[AgentDeps, ResultData]]: """Run the agent with a user prompt in async mode, returning a streamed response. @@ -396,7 +400,7 @@ async def main(): model_name=model_used.name(), agent_name=self.name or 'agent', ) as run_span: - run_context = RunContext(deps, model_used, usage or result.Usage(), user_prompt) + run_context = RunContext(deps, model_used, usage or _usage.Usage(), user_prompt) messages = await self._prepare_messages(user_prompt, message_history, run_context) run_context.messages = messages @@ -404,7 +408,7 @@ async def main(): tool.current_retry = 0 model_settings = merge_model_settings(self.model_settings, model_settings) - usage_limits = usage_limits or UsageLimits() + usage_limits = usage_limits or _usage.UsageLimits() while True: run_context.run_step += 1 diff --git a/pydantic_ai_slim/pydantic_ai/result.py b/pydantic_ai_slim/pydantic_ai/result.py index 14606ea7..74cb9cc0 100644 --- a/pydantic_ai_slim/pydantic_ai/result.py +++ b/pydantic_ai_slim/pydantic_ai/result.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from collections.abc import AsyncIterator, Awaitable, Callable -from copy import copy +from copy import deepcopy from dataclasses import dataclass, field from datetime import datetime from typing import Generic, Union, cast @@ -11,16 +11,10 @@ from typing_extensions import TypeVar from . import _result, _utils, exceptions, messages as _messages, models -from .settings import UsageLimits from .tools import AgentDeps, RunContext +from .usage import Usage, UsageLimits -__all__ = ( - 'ResultData', - 'ResultValidatorFunc', - 'Usage', - 'RunResult', - 'StreamedRunResult', -) +__all__ = 'ResultData', 'ResultValidatorFunc', 'RunResult', 'StreamedRunResult' ResultData = TypeVar('ResultData', default=str) @@ -44,55 +38,6 @@ _logfire = logfire_api.Logfire(otel_scope='pydantic-ai') -@dataclass -class Usage: - """LLM usage associated with a request or run. - - Responsibility for calculating usage is on the model; PydanticAI simply sums the usage information across requests. - - You'll need to look up the documentation of the model you're using to convert usage to monetary costs. - """ - - requests: int = 0 - """Number of requests made to the LLM API.""" - request_tokens: int | None = None - """Tokens used in processing requests.""" - response_tokens: int | None = None - """Tokens used in generating responses.""" - total_tokens: int | None = None - """Total tokens used in the whole run, should generally be equal to `request_tokens + response_tokens`.""" - details: dict[str, int] | None = None - """Any extra details returned by the model.""" - - def incr(self, incr_usage: Usage, *, requests: int = 0) -> None: - """Increment the usage in place. - - Args: - incr_usage: The usage to increment by. - requests: The number of requests to increment by in addition to `incr_usage.requests`. - """ - self.requests += requests - for f in 'requests', 'request_tokens', 'response_tokens', 'total_tokens': - self_value = getattr(self, f) - other_value = getattr(incr_usage, f) - if self_value is not None or other_value is not None: - setattr(self, f, (self_value or 0) + (other_value or 0)) - - if incr_usage.details: - self.details = self.details or {} - for key, value in incr_usage.details.items(): - self.details[key] = self.details.get(key, 0) + value - - def __add__(self, other: Usage) -> Usage: - """Add two Usages together. - - This is provided so it's trivial to sum usage information from multiple requests and runs. - """ - new_usage = copy(self) - new_usage.incr(other) - return new_usage - - @dataclass class _BaseRunResult(ABC, Generic[ResultData]): """Base type for results. @@ -103,25 +48,70 @@ class _BaseRunResult(ABC, Generic[ResultData]): _all_messages: list[_messages.ModelMessage] _new_message_index: int - def all_messages(self) -> list[_messages.ModelMessage]: - """Return the history of _messages.""" + def all_messages(self, *, result_tool_return_content: str | None = None) -> list[_messages.ModelMessage]: + """Return the history of _messages. + + Args: + result_tool_return_content: The return content of the tool call to set in the last message. + This provides a convenient way to modify the content of the result tool call if you want to continue + the conversation and want to set the response to the result tool call. If `None`, the last message will + not be modified. + + Returns: + List of messages. + """ # this is a method to be consistent with the other methods + if result_tool_return_content is not None: + raise NotImplementedError('Setting result tool return content is not supported for this result type.') return self._all_messages - def all_messages_json(self) -> bytes: - """Return all messages from [`all_messages`][pydantic_ai.result._BaseRunResult.all_messages] as JSON bytes.""" - return _messages.ModelMessagesTypeAdapter.dump_json(self.all_messages()) + def all_messages_json(self, *, result_tool_return_content: str | None = None) -> bytes: + """Return all messages from [`all_messages`][pydantic_ai.result._BaseRunResult.all_messages] as JSON bytes. + + Args: + result_tool_return_content: The return content of the tool call to set in the last message. + This provides a convenient way to modify the content of the result tool call if you want to continue + the conversation and want to set the response to the result tool call. If `None`, the last message will + not be modified. + + Returns: + JSON bytes representing the messages. + """ + return _messages.ModelMessagesTypeAdapter.dump_json( + self.all_messages(result_tool_return_content=result_tool_return_content) + ) - def new_messages(self) -> list[_messages.ModelMessage]: + def new_messages(self, *, result_tool_return_content: str | None = None) -> list[_messages.ModelMessage]: """Return new messages associated with this run. - System prompts and any messages from older runs are excluded. + Messages from older runs are excluded. + + Args: + result_tool_return_content: The return content of the tool call to set in the last message. + This provides a convenient way to modify the content of the result tool call if you want to continue + the conversation and want to set the response to the result tool call. If `None`, the last message will + not be modified. + + Returns: + List of new messages. """ - return self.all_messages()[self._new_message_index :] + return self.all_messages(result_tool_return_content=result_tool_return_content)[self._new_message_index :] + + def new_messages_json(self, *, result_tool_return_content: str | None = None) -> bytes: + """Return new messages from [`new_messages`][pydantic_ai.result._BaseRunResult.new_messages] as JSON bytes. - def new_messages_json(self) -> bytes: - """Return new messages from [`new_messages`][pydantic_ai.result._BaseRunResult.new_messages] as JSON bytes.""" - return _messages.ModelMessagesTypeAdapter.dump_json(self.new_messages()) + Args: + result_tool_return_content: The return content of the tool call to set in the last message. + This provides a convenient way to modify the content of the result tool call if you want to continue + the conversation and want to set the response to the result tool call. If `None`, the last message will + not be modified. + + Returns: + JSON bytes representing the new messages. + """ + return _messages.ModelMessagesTypeAdapter.dump_json( + self.new_messages(result_tool_return_content=result_tool_return_content) + ) @abstractmethod def usage(self) -> Usage: @@ -134,12 +124,45 @@ class RunResult(_BaseRunResult[ResultData]): data: ResultData """Data from the final response in the run.""" + _result_tool_name: str | None _usage: Usage def usage(self) -> Usage: """Return the usage of the whole run.""" return self._usage + def all_messages(self, *, result_tool_return_content: str | None = None) -> list[_messages.ModelMessage]: + """Return the history of _messages. + + Args: + result_tool_return_content: The return content of the tool call to set in the last message. + This provides a convenient way to modify the content of the result tool call if you want to continue + the conversation and want to set the response to the result tool call. If `None`, the last message will + not be modified. + + Returns: + List of messages. + """ + if result_tool_return_content is not None: + return self._set_result_tool_return(result_tool_return_content) + else: + return self._all_messages + + def _set_result_tool_return(self, return_content: str) -> list[_messages.ModelMessage]: + """Set return content for the result tool. + + Useful if you want to continue the conversation and want to set the response to the result tool call. + """ + if not self._result_tool_name: + raise ValueError('Cannot set result tool return content when the return type is `str`.') + messages = deepcopy(self._all_messages) + last_message = messages[-1] + for part in last_message.parts: + if isinstance(part, _messages.ToolReturnPart) and part.tool_name == self._result_tool_name: + part.content = return_content + return messages + raise LookupError(f'No tool call found with tool name {self._result_tool_name!r}.') + @dataclass class StreamedRunResult(_BaseRunResult[ResultData], Generic[AgentDeps, ResultData]): diff --git a/pydantic_ai_slim/pydantic_ai/settings.py b/pydantic_ai_slim/pydantic_ai/settings.py index 9fcadf5a..d6728948 100644 --- a/pydantic_ai_slim/pydantic_ai/settings.py +++ b/pydantic_ai_slim/pydantic_ai/settings.py @@ -1,15 +1,12 @@ from __future__ import annotations -from dataclasses import dataclass from typing import TYPE_CHECKING from httpx import Timeout from typing_extensions import TypedDict -from .exceptions import UsageLimitExceeded - if TYPE_CHECKING: - from .result import Usage + pass class ModelSettings(TypedDict, total=False): @@ -82,60 +79,3 @@ def merge_model_settings(base: ModelSettings | None, overrides: ModelSettings | return base | overrides else: return base or overrides - - -@dataclass -class UsageLimits: - """Limits on model usage. - - The request count is tracked by pydantic_ai, and the request limit is checked before each request to the model. - Token counts are provided in responses from the model, and the token limits are checked after each response. - - Each of the limits can be set to `None` to disable that limit. - """ - - request_limit: int | None = 50 - """The maximum number of requests allowed to the model.""" - request_tokens_limit: int | None = None - """The maximum number of tokens allowed in requests to the model.""" - response_tokens_limit: int | None = None - """The maximum number of tokens allowed in responses from the model.""" - total_tokens_limit: int | None = None - """The maximum number of tokens allowed in requests and responses combined.""" - - def has_token_limits(self) -> bool: - """Returns `True` if this instance places any limits on token counts. - - If this returns `False`, the `check_tokens` method will never raise an error. - - This is useful because if we have token limits, we need to check them after receiving each streamed message. - If there are no limits, we can skip that processing in the streaming response iterator. - """ - return any( - limit is not None - for limit in (self.request_tokens_limit, self.response_tokens_limit, self.total_tokens_limit) - ) - - def check_before_request(self, usage: Usage) -> None: - """Raises a `UsageLimitExceeded` exception if the next request would exceed the request_limit.""" - request_limit = self.request_limit - if request_limit is not None and usage.requests >= request_limit: - raise UsageLimitExceeded(f'The next request would exceed the request_limit of {request_limit}') - - def check_tokens(self, usage: Usage) -> None: - """Raises a `UsageLimitExceeded` exception if the usage exceeds any of the token limits.""" - request_tokens = usage.request_tokens or 0 - if self.request_tokens_limit is not None and request_tokens > self.request_tokens_limit: - raise UsageLimitExceeded( - f'Exceeded the request_tokens_limit of {self.request_tokens_limit} ({request_tokens=})' - ) - - response_tokens = usage.response_tokens or 0 - if self.response_tokens_limit is not None and response_tokens > self.response_tokens_limit: - raise UsageLimitExceeded( - f'Exceeded the response_tokens_limit of {self.response_tokens_limit} ({response_tokens=})' - ) - - total_tokens = usage.total_tokens or 0 - if self.total_tokens_limit is not None and total_tokens > self.total_tokens_limit: - raise UsageLimitExceeded(f'Exceeded the total_tokens_limit of {self.total_tokens_limit} ({total_tokens=})') diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py new file mode 100644 index 00000000..054be4e3 --- /dev/null +++ b/pydantic_ai_slim/pydantic_ai/usage.py @@ -0,0 +1,114 @@ +from __future__ import annotations as _annotations + +from copy import copy +from dataclasses import dataclass + +from .exceptions import UsageLimitExceeded + +__all__ = 'Usage', 'UsageLimits' + + +@dataclass +class Usage: + """LLM usage associated with a request or run. + + Responsibility for calculating usage is on the model; PydanticAI simply sums the usage information across requests. + + You'll need to look up the documentation of the model you're using to convert usage to monetary costs. + """ + + requests: int = 0 + """Number of requests made to the LLM API.""" + request_tokens: int | None = None + """Tokens used in processing requests.""" + response_tokens: int | None = None + """Tokens used in generating responses.""" + total_tokens: int | None = None + """Total tokens used in the whole run, should generally be equal to `request_tokens + response_tokens`.""" + details: dict[str, int] | None = None + """Any extra details returned by the model.""" + + def incr(self, incr_usage: Usage, *, requests: int = 0) -> None: + """Increment the usage in place. + + Args: + incr_usage: The usage to increment by. + requests: The number of requests to increment by in addition to `incr_usage.requests`. + """ + self.requests += requests + for f in 'requests', 'request_tokens', 'response_tokens', 'total_tokens': + self_value = getattr(self, f) + other_value = getattr(incr_usage, f) + if self_value is not None or other_value is not None: + setattr(self, f, (self_value or 0) + (other_value or 0)) + + if incr_usage.details: + self.details = self.details or {} + for key, value in incr_usage.details.items(): + self.details[key] = self.details.get(key, 0) + value + + def __add__(self, other: Usage) -> Usage: + """Add two Usages together. + + This is provided so it's trivial to sum usage information from multiple requests and runs. + """ + new_usage = copy(self) + new_usage.incr(other) + return new_usage + + +@dataclass +class UsageLimits: + """Limits on model usage. + + The request count is tracked by pydantic_ai, and the request limit is checked before each request to the model. + Token counts are provided in responses from the model, and the token limits are checked after each response. + + Each of the limits can be set to `None` to disable that limit. + """ + + request_limit: int | None = 50 + """The maximum number of requests allowed to the model.""" + request_tokens_limit: int | None = None + """The maximum number of tokens allowed in requests to the model.""" + response_tokens_limit: int | None = None + """The maximum number of tokens allowed in responses from the model.""" + total_tokens_limit: int | None = None + """The maximum number of tokens allowed in requests and responses combined.""" + + def has_token_limits(self) -> bool: + """Returns `True` if this instance places any limits on token counts. + + If this returns `False`, the `check_tokens` method will never raise an error. + + This is useful because if we have token limits, we need to check them after receiving each streamed message. + If there are no limits, we can skip that processing in the streaming response iterator. + """ + return any( + limit is not None + for limit in (self.request_tokens_limit, self.response_tokens_limit, self.total_tokens_limit) + ) + + def check_before_request(self, usage: Usage) -> None: + """Raises a `UsageLimitExceeded` exception if the next request would exceed the request_limit.""" + request_limit = self.request_limit + if request_limit is not None and usage.requests >= request_limit: + raise UsageLimitExceeded(f'The next request would exceed the request_limit of {request_limit}') + + def check_tokens(self, usage: Usage) -> None: + """Raises a `UsageLimitExceeded` exception if the usage exceeds any of the token limits.""" + request_tokens = usage.request_tokens or 0 + if self.request_tokens_limit is not None and request_tokens > self.request_tokens_limit: + raise UsageLimitExceeded( + f'Exceeded the request_tokens_limit of {self.request_tokens_limit} ({request_tokens=})' + ) + + response_tokens = usage.response_tokens or 0 + if self.response_tokens_limit is not None and response_tokens > self.response_tokens_limit: + raise UsageLimitExceeded( + f'Exceeded the response_tokens_limit of {self.response_tokens_limit} ({response_tokens=})' + ) + + total_tokens = usage.total_tokens or 0 + if self.total_tokens_limit is not None and total_tokens > self.total_tokens_limit: + raise UsageLimitExceeded(f'Exceeded the total_tokens_limit of {self.total_tokens_limit} ({total_tokens=})') diff --git a/tests/test_agent.py b/tests/test_agent.py index 6b232087..277eeeb5 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -1,4 +1,5 @@ import json +import re import sys from datetime import timezone from typing import Any, Callable, Union @@ -227,7 +228,7 @@ def validate_result(ctx: RunContext[None], r: Foo) -> Foo: ) -def test_plain_response(set_event_loop: None): +def test_plain_response_then_tuple(set_event_loop: None): call_index = 0 def return_tuple(_: list[ModelMessage], info: AgentInfo) -> ModelResponse: @@ -271,6 +272,42 @@ def return_tuple(_: list[ModelMessage], info: AgentInfo) -> ModelResponse: ), ] ) + assert result._result_tool_name == 'final_result' # pyright: ignore[reportPrivateUsage] + assert result.all_messages(result_tool_return_content='foobar')[-1] == snapshot( + ModelRequest( + parts=[ToolReturnPart(tool_name='final_result', content='foobar', timestamp=IsNow(tz=timezone.utc))] + ) + ) + assert result.all_messages()[-1] == snapshot( + ModelRequest( + parts=[ + ToolReturnPart( + tool_name='final_result', content='Final result processed.', timestamp=IsNow(tz=timezone.utc) + ) + ] + ) + ) + + +def test_result_tool_return_content_str_return(set_event_loop: None): + agent = Agent('test') + + result = agent.run_sync('Hello') + assert result.data == 'success (no tool calls)' + + msg = re.escape('Cannot set result tool return content when the return type is `str`.') + with pytest.raises(ValueError, match=msg): + result.all_messages(result_tool_return_content='foobar') + + +def test_result_tool_return_content_no_tool(set_event_loop: None): + agent = Agent('test', result_type=int) + + result = agent.run_sync('Hello') + assert result.data == 0 + result._result_tool_name = 'wrong' # pyright: ignore[reportPrivateUsage] + with pytest.raises(LookupError, match=re.escape("No tool call found with tool name 'wrong'.")): + result.all_messages(result_tool_return_content='foobar') def test_response_tuple(set_event_loop: None): @@ -507,6 +544,7 @@ async def ret_a(x: str) -> str: ], _new_message_index=4, data='{"ret_a":"a-apple"}', + _result_tool_name=None, _usage=Usage(requests=1, request_tokens=55, response_tokens=13, total_tokens=68, details=None), ) ) @@ -549,6 +587,7 @@ async def ret_a(x: str) -> str: ], _new_message_index=4, data='{"ret_a":"a-apple"}', + _result_tool_name=None, _usage=Usage(requests=1, request_tokens=55, response_tokens=13, total_tokens=68, details=None), ) ) @@ -648,6 +687,7 @@ async def ret_a(x: str) -> str: ), ], _new_message_index=5, + _result_tool_name='final_result', _usage=Usage(requests=1, request_tokens=59, response_tokens=13, total_tokens=72, details=None), ) ) diff --git a/tests/test_examples.py b/tests/test_examples.py index 6f0e7912..9f296999 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -72,6 +72,7 @@ def test_docs_examples( mocker.patch('httpx.AsyncClient.get', side_effect=async_http_request) mocker.patch('httpx.AsyncClient.post', side_effect=async_http_request) mocker.patch('random.randint', return_value=4) + mocker.patch('rich.prompt.Prompt.ask', side_effect=rich_prompt_ask) env.set('OPENAI_API_KEY', 'testing') env.set('GEMINI_API_KEY', 'testing') @@ -145,6 +146,14 @@ async def async_http_request(url: str, **kwargs: Any) -> httpx.Response: return http_request(url, **kwargs) +def rich_prompt_ask(prompt: str, *_args: Any, **_kwargs: Any) -> str: + if prompt == 'Where would you like to fly from and to?': + return 'SFO to ANC' + else: + assert prompt == 'What seat would you like?', prompt + return 'window seat with leg room' + + text_responses: dict[str, str | ToolCallPart] = { 'What is the weather like in West London and in Wiltshire?': ( 'The weather in West London is raining, while in Wiltshire it is sunny.' @@ -218,21 +227,36 @@ async def async_http_request(url: str, **kwargs: Any) -> httpx.Response: 'Rome is known for its rich history, stunning architecture, and delicious cuisine.' ), 'Begin infinite retry loop!': ToolCallPart(tool_name='infinite_retry_tool', args=ArgsDict({})), + 'Please generate 5 jokes.': ToolCallPart( + tool_name='final_result', + args=ArgsDict({'response': []}), + ), + 'SFO to ANC': ToolCallPart( + tool_name='flight_search', + args=ArgsDict({'origin': 'SFO', 'destination': 'ANC'}), + ), + 'window seat with leg room': ToolCallPart( + tool_name='final_result_SeatPreference', + args=ArgsDict({'row': 1, 'seat': 'A'}), + ), } -async def model_logic(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse: # pragma: no cover +async def model_logic(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse: # pragma: no cover # noqa: C901 m = messages[-1].parts[-1] if isinstance(m, UserPromptPart): - if response := text_responses.get(m.content): + if m.content == 'Tell me a joke.' and any(t.name == 'joke_factory' for t in info.function_tools): + return ModelResponse(parts=[ToolCallPart(tool_name='joke_factory', args=ArgsDict({'count': 5}))]) + elif m.content == 'Please generate 5 jokes.' and any(t.name == 'get_jokes' for t in info.function_tools): + return ModelResponse(parts=[ToolCallPart(tool_name='get_jokes', args=ArgsDict({'count': 5}))]) + elif re.fullmatch(r'sql prompt \d+', m.content): + return ModelResponse.from_text(content='SELECT 1') + elif response := text_responses.get(m.content): if isinstance(response, str): return ModelResponse.from_text(content=response) else: return ModelResponse(parts=[response]) - if re.fullmatch(r'sql prompt \d+', m.content): - return ModelResponse.from_text(content='SELECT 1') - elif isinstance(m, ToolReturnPart) and m.tool_name == 'roulette_wheel': win = m.content == 'winner' return ModelResponse(parts=[ToolCallPart(tool_name='final_result', args=ArgsDict({'response': win}))]) @@ -249,7 +273,7 @@ async def model_logic(messages: list[ModelMessage], info: AgentInfo) -> ModelRes elif isinstance(m, RetryPromptPart) and m.tool_name == 'infinite_retry_tool': return ModelResponse(parts=[ToolCallPart(tool_name='infinite_retry_tool', args=ArgsDict({}))]) elif isinstance(m, ToolReturnPart) and m.tool_name == 'get_user_by_name': - args = { + args: dict[str, Any] = { 'message': 'Hello John, would you be free for coffee sometime next week? Let me know what works for you!', 'user_id': 123, } @@ -263,6 +287,14 @@ async def model_logic(messages: list[ModelMessage], info: AgentInfo) -> ModelRes 'risk': 1, } return ModelResponse(parts=[ToolCallPart(tool_name='final_result', args=ArgsDict(args))]) + elif isinstance(m, ToolReturnPart) and m.tool_name == 'joke_factory': + return ModelResponse.from_text(content='Did you hear about the toothpaste scandal? They called it Colgate.') + elif isinstance(m, ToolReturnPart) and m.tool_name == 'get_jokes': + args = {'response': []} + return ModelResponse(parts=[ToolCallPart(tool_name='final_result', args=ArgsDict(args))]) + elif isinstance(m, ToolReturnPart) and m.tool_name == 'flight_search': + args = {'flight_number': m.content.flight_number} # type: ignore + return ModelResponse(parts=[ToolCallPart(tool_name='final_result_FlightDetails', args=ArgsDict(args))]) else: sys.stdout.write(str(debug.format(messages, info))) raise RuntimeError(f'Unexpected message: {m}') diff --git a/tests/test_usage_limits.py b/tests/test_usage_limits.py index 37b45142..cbbfec34 100644 --- a/tests/test_usage_limits.py +++ b/tests/test_usage_limits.py @@ -16,8 +16,7 @@ UserPromptPart, ) from pydantic_ai.models.test import TestModel -from pydantic_ai.result import Usage -from pydantic_ai.settings import UsageLimits +from pydantic_ai.usage import Usage, UsageLimits from .conftest import IsNow