`pydantic_ai.usage`

Usage `dataclass`

LLM usage associated with a request or run.

Responsibility for calculating usage is on the model; Pydantic AI simply sums the usage information across requests.

You'll need to look up the documentation of the model you're using to convert usage to monetary costs.

Source code in pydantic_ai_slim/pydantic_ai/usage.py

@dataclass(repr=False)
class Usage:
    """LLM usage associated with a request or run.

    Responsibility for calculating usage is on the model; Pydantic AI simply sums the usage information across requests.

    You'll need to look up the documentation of the model you're using to convert usage to monetary costs.
    """

    requests: int = 0
    """Number of requests made to the LLM API."""
    request_tokens: int | None = None
    """Tokens used in processing requests."""
    response_tokens: int | None = None
    """Tokens used in generating responses."""
    total_tokens: int | None = None
    """Total tokens used in the whole run, should generally be equal to `request_tokens + response_tokens`."""
    details: dict[str, int] | None = None
    """Any extra details returned by the model."""

    def incr(self, incr_usage: Usage) -> None:
        """Increment the usage in place.

        Args:
            incr_usage: The usage to increment by.
        """
        for f in 'requests', 'request_tokens', 'response_tokens', 'total_tokens':
            self_value = getattr(self, f)
            other_value = getattr(incr_usage, f)
            if self_value is not None or other_value is not None:
                setattr(self, f, (self_value or 0) + (other_value or 0))

        if incr_usage.details:
            self.details = self.details or {}
            for key, value in incr_usage.details.items():
                self.details[key] = self.details.get(key, 0) + value

    def __add__(self, other: Usage) -> Usage:
        """Add two Usages together.

        This is provided so it's trivial to sum usage information from multiple requests and runs.
        """
        new_usage = copy(self)
        new_usage.incr(other)
        return new_usage

    def opentelemetry_attributes(self) -> dict[str, int]:
        """Get the token limits as OpenTelemetry attributes."""
        result: dict[str, int] = {}
        if self.request_tokens:
            result['gen_ai.usage.input_tokens'] = self.request_tokens
        if self.response_tokens:
            result['gen_ai.usage.output_tokens'] = self.response_tokens
        details = self.details
        if details:
            prefix = 'gen_ai.usage.details.'
            for key, value in details.items():
                # Skipping check for value since spec implies all detail values are relevant
                if value:
                    result[prefix + key] = value
        return result

    def has_values(self) -> bool:
        """Whether any values are set and non-zero."""
        return bool(self.requests or self.request_tokens or self.response_tokens or self.details)

    __repr__ = _utils.dataclasses_no_defaults_repr

requests `class-attribute` `instance-attribute`

requests: int = 0

Number of requests made to the LLM API.

request_tokens `class-attribute` `instance-attribute`

request_tokens: int | None = None

Tokens used in processing requests.

response_tokens `class-attribute` `instance-attribute`

response_tokens: int | None = None

Tokens used in generating responses.

total_tokens `class-attribute` `instance-attribute`

total_tokens: int | None = None

Total tokens used in the whole run, should generally be equal to request_tokens + response_tokens.

details `class-attribute` `instance-attribute`

details: dict[str, int] | None = None

Any extra details returned by the model.

incr

incr(incr_usage: Usage) -> None

Increment the usage in place.

Parameters:

Name	Type	Description	Default
`incr_usage`	`Usage`	The usage to increment by.	required

Source code in pydantic_ai_slim/pydantic_ai/usage.py

def incr(self, incr_usage: Usage) -> None:
    """Increment the usage in place.

    Args:
        incr_usage: The usage to increment by.
    """
    for f in 'requests', 'request_tokens', 'response_tokens', 'total_tokens':
        self_value = getattr(self, f)
        other_value = getattr(incr_usage, f)
        if self_value is not None or other_value is not None:
            setattr(self, f, (self_value or 0) + (other_value or 0))

    if incr_usage.details:
        self.details = self.details or {}
        for key, value in incr_usage.details.items():
            self.details[key] = self.details.get(key, 0) + value

add

__add__(other: Usage) -> Usage

Add two Usages together.

This is provided so it's trivial to sum usage information from multiple requests and runs.

Source code in pydantic_ai_slim/pydantic_ai/usage.py

def __add__(self, other: Usage) -> Usage:
    """Add two Usages together.

    This is provided so it's trivial to sum usage information from multiple requests and runs.
    """
    new_usage = copy(self)
    new_usage.incr(other)
    return new_usage

opentelemetry_attributes

opentelemetry_attributes() -> dict[str, int]

Get the token limits as OpenTelemetry attributes.

Source code in pydantic_ai_slim/pydantic_ai/usage.py

def opentelemetry_attributes(self) -> dict[str, int]:
    """Get the token limits as OpenTelemetry attributes."""
    result: dict[str, int] = {}
    if self.request_tokens:
        result['gen_ai.usage.input_tokens'] = self.request_tokens
    if self.response_tokens:
        result['gen_ai.usage.output_tokens'] = self.response_tokens
    details = self.details
    if details:
        prefix = 'gen_ai.usage.details.'
        for key, value in details.items():
            # Skipping check for value since spec implies all detail values are relevant
            if value:
                result[prefix + key] = value
    return result

has_values

has_values() -> bool

Whether any values are set and non-zero.

Source code in pydantic_ai_slim/pydantic_ai/usage.py

def has_values(self) -> bool:
    """Whether any values are set and non-zero."""
    return bool(self.requests or self.request_tokens or self.response_tokens or self.details)

UsageLimits `dataclass`

Limits on model usage.

The request count is tracked by pydantic_ai, and the request limit is checked before each request to the model. Token counts are provided in responses from the model, and the token limits are checked after each response.

Each of the limits can be set to None to disable that limit.

Source code in pydantic_ai_slim/pydantic_ai/usage.py

@dataclass(repr=False)
class UsageLimits:
    """Limits on model usage.

    The request count is tracked by pydantic_ai, and the request limit is checked before each request to the model.
    Token counts are provided in responses from the model, and the token limits are checked after each response.

    Each of the limits can be set to `None` to disable that limit.
    """

    request_limit: int | None = 50
    """The maximum number of requests allowed to the model."""
    request_tokens_limit: int | None = None
    """The maximum number of tokens allowed in requests to the model."""
    response_tokens_limit: int | None = None
    """The maximum number of tokens allowed in responses from the model."""
    total_tokens_limit: int | None = None
    """The maximum number of tokens allowed in requests and responses combined."""
    count_tokens_before_request: bool = False
    """If True, perform a token counting pass before sending the request to the model,
    to enforce `request_tokens_limit` ahead of time. This may incur additional overhead
    (from calling the model's `count_tokens` API before making the actual request) and is disabled by default."""

    def has_token_limits(self) -> bool:
        """Returns `True` if this instance places any limits on token counts.

        If this returns `False`, the `check_tokens` method will never raise an error.

        This is useful because if we have token limits, we need to check them after receiving each streamed message.
        If there are no limits, we can skip that processing in the streaming response iterator.
        """
        return any(
            limit is not None
            for limit in (self.request_tokens_limit, self.response_tokens_limit, self.total_tokens_limit)
        )

    def check_before_request(self, usage: Usage) -> None:
        """Raises a `UsageLimitExceeded` exception if the next request would exceed any of the limits."""
        request_limit = self.request_limit
        if request_limit is not None and usage.requests >= request_limit:
            raise UsageLimitExceeded(f'The next request would exceed the request_limit of {request_limit}')

        request_tokens = usage.request_tokens or 0
        if self.request_tokens_limit is not None and request_tokens > self.request_tokens_limit:
            raise UsageLimitExceeded(
                f'The next request would exceed the request_tokens_limit of {self.request_tokens_limit} ({request_tokens=})'
            )

        total_tokens = usage.total_tokens or 0
        if self.total_tokens_limit is not None and total_tokens > self.total_tokens_limit:
            raise UsageLimitExceeded(
                f'The next request would exceed the total_tokens_limit of {self.total_tokens_limit} ({total_tokens=})'
            )

    def check_tokens(self, usage: Usage) -> None:
        """Raises a `UsageLimitExceeded` exception if the usage exceeds any of the token limits."""
        request_tokens = usage.request_tokens or 0
        if self.request_tokens_limit is not None and request_tokens > self.request_tokens_limit:
            raise UsageLimitExceeded(
                f'Exceeded the request_tokens_limit of {self.request_tokens_limit} ({request_tokens=})'
            )

        response_tokens = usage.response_tokens or 0
        if self.response_tokens_limit is not None and response_tokens > self.response_tokens_limit:
            raise UsageLimitExceeded(
                f'Exceeded the response_tokens_limit of {self.response_tokens_limit} ({response_tokens=})'
            )

        total_tokens = usage.total_tokens or 0
        if self.total_tokens_limit is not None and total_tokens > self.total_tokens_limit:
            raise UsageLimitExceeded(f'Exceeded the total_tokens_limit of {self.total_tokens_limit} ({total_tokens=})')

    __repr__ = _utils.dataclasses_no_defaults_repr

request_limit `class-attribute` `instance-attribute`

request_limit: int | None = 50

The maximum number of requests allowed to the model.

request_tokens_limit `class-attribute` `instance-attribute`

request_tokens_limit: int | None = None

The maximum number of tokens allowed in requests to the model.

response_tokens_limit `class-attribute` `instance-attribute`

response_tokens_limit: int | None = None

The maximum number of tokens allowed in responses from the model.

total_tokens_limit `class-attribute` `instance-attribute`

total_tokens_limit: int | None = None

The maximum number of tokens allowed in requests and responses combined.

count_tokens_before_request `class-attribute` `instance-attribute`

count_tokens_before_request: bool = False

If True, perform a token counting pass before sending the request to the model, to enforce request_tokens_limit ahead of time. This may incur additional overhead (from calling the model's count_tokens API before making the actual request) and is disabled by default.

has_token_limits

has_token_limits() -> bool

Returns True if this instance places any limits on token counts.

If this returns False, the check_tokens method will never raise an error.

This is useful because if we have token limits, we need to check them after receiving each streamed message. If there are no limits, we can skip that processing in the streaming response iterator.

Source code in pydantic_ai_slim/pydantic_ai/usage.py

def has_token_limits(self) -> bool:
    """Returns `True` if this instance places any limits on token counts.

    If this returns `False`, the `check_tokens` method will never raise an error.

    This is useful because if we have token limits, we need to check them after receiving each streamed message.
    If there are no limits, we can skip that processing in the streaming response iterator.
    """
    return any(
        limit is not None
        for limit in (self.request_tokens_limit, self.response_tokens_limit, self.total_tokens_limit)
    )

check_before_request

check_before_request(usage: Usage) -> None

Raises a UsageLimitExceeded exception if the next request would exceed any of the limits.

Source code in pydantic_ai_slim/pydantic_ai/usage.py

def check_before_request(self, usage: Usage) -> None:
    """Raises a `UsageLimitExceeded` exception if the next request would exceed any of the limits."""
    request_limit = self.request_limit
    if request_limit is not None and usage.requests >= request_limit:
        raise UsageLimitExceeded(f'The next request would exceed the request_limit of {request_limit}')

    request_tokens = usage.request_tokens or 0
    if self.request_tokens_limit is not None and request_tokens > self.request_tokens_limit:
        raise UsageLimitExceeded(
            f'The next request would exceed the request_tokens_limit of {self.request_tokens_limit} ({request_tokens=})'
        )

    total_tokens = usage.total_tokens or 0
    if self.total_tokens_limit is not None and total_tokens > self.total_tokens_limit:
        raise UsageLimitExceeded(
            f'The next request would exceed the total_tokens_limit of {self.total_tokens_limit} ({total_tokens=})'
        )

check_tokens

check_tokens(usage: Usage) -> None

Raises a UsageLimitExceeded exception if the usage exceeds any of the token limits.

Source code in pydantic_ai_slim/pydantic_ai/usage.py

def check_tokens(self, usage: Usage) -> None:
    """Raises a `UsageLimitExceeded` exception if the usage exceeds any of the token limits."""
    request_tokens = usage.request_tokens or 0
    if self.request_tokens_limit is not None and request_tokens > self.request_tokens_limit:
        raise UsageLimitExceeded(
            f'Exceeded the request_tokens_limit of {self.request_tokens_limit} ({request_tokens=})'
        )

    response_tokens = usage.response_tokens or 0
    if self.response_tokens_limit is not None and response_tokens > self.response_tokens_limit:
        raise UsageLimitExceeded(
            f'Exceeded the response_tokens_limit of {self.response_tokens_limit} ({response_tokens=})'
        )

    total_tokens = usage.total_tokens or 0
    if self.total_tokens_limit is not None and total_tokens > self.total_tokens_limit:
        raise UsageLimitExceeded(f'Exceeded the total_tokens_limit of {self.total_tokens_limit} ({total_tokens=})')

pydantic_ai.usage

Usage dataclass

requests class-attribute instance-attribute

request_tokens class-attribute instance-attribute

response_tokens class-attribute instance-attribute

total_tokens class-attribute instance-attribute

details class-attribute instance-attribute

incr

__add__

opentelemetry_attributes

has_values

UsageLimits dataclass

request_limit class-attribute instance-attribute

request_tokens_limit class-attribute instance-attribute

response_tokens_limit class-attribute instance-attribute

total_tokens_limit class-attribute instance-attribute

count_tokens_before_request class-attribute instance-attribute

has_token_limits

check_before_request

check_tokens

`pydantic_ai.usage`

Usage `dataclass`

requests `class-attribute` `instance-attribute`

request_tokens `class-attribute` `instance-attribute`

response_tokens `class-attribute` `instance-attribute`

total_tokens `class-attribute` `instance-attribute`

details `class-attribute` `instance-attribute`

add

UsageLimits `dataclass`

request_limit `class-attribute` `instance-attribute`

request_tokens_limit `class-attribute` `instance-attribute`

response_tokens_limit `class-attribute` `instance-attribute`

total_tokens_limit `class-attribute` `instance-attribute`

count_tokens_before_request `class-attribute` `instance-attribute`