`pydantic_ai.models.outlines`

Setup

For details on how to set up this model, see model configuration for Outlines.

OutlinesModel `dataclass`

Bases: Model

A model that relies on the Outlines library to run non API-based models.

Source code in pydantic_ai_slim/pydantic_ai/models/outlines.py

@dataclass(init=False)
class OutlinesModel(Model):
    """A model that relies on the Outlines library to run non API-based models."""

    def __init__(
        self,
        model: OutlinesBaseModel | OutlinesAsyncBaseModel,
        *,
        provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
        profile: ModelProfileSpec | None = None,
        settings: ModelSettings | None = None,
    ):
        """Initialize an Outlines model.

        Args:
            model: The Outlines model used for the model.
            provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
                instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
            profile: The model profile to use. Defaults to a profile picked by the provider.
            settings: Default model settings for this model instance.
        """
        self.model: OutlinesBaseModel | OutlinesAsyncBaseModel = model
        self._model_name: str = 'outlines-model'

        if isinstance(provider, str):
            provider = infer_provider(provider)

        super().__init__(settings=settings, profile=profile or provider.model_profile)

    @classmethod
    def from_transformers(
        cls,
        hf_model: transformers.modeling_utils.PreTrainedModel,
        hf_tokenizer_or_processor: transformers.tokenization_utils.PreTrainedTokenizer
        | transformers.processing_utils.ProcessorMixin,
        *,
        provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
        profile: ModelProfileSpec | None = None,
        settings: ModelSettings | None = None,
    ):
        """Create an Outlines model from a Hugging Face model and tokenizer.

        Args:
            hf_model: The Hugging Face PreTrainedModel or any model that is compatible with the
                `transformers` API.
            hf_tokenizer_or_processor: Either a HuggingFace `PreTrainedTokenizer` or any tokenizer that is compatible
                with the `transformers` API, or a HuggingFace processor inheriting from `ProcessorMixin`. If a
                tokenizer is provided, a regular model will be used, while if you provide a processor, it will be a
                multimodal model.
            provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
                instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
            profile: The model profile to use. Defaults to a profile picked by the provider.
            settings: Default model settings for this model instance.
        """
        outlines_model: OutlinesBaseModel = from_transformers(hf_model, hf_tokenizer_or_processor)
        return cls(outlines_model, provider=provider, profile=profile, settings=settings)

    @classmethod
    def from_llamacpp(
        cls,
        llama_model: llama_cpp.Llama,
        *,
        provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
        profile: ModelProfileSpec | None = None,
        settings: ModelSettings | None = None,
    ):
        """Create an Outlines model from a LlamaCpp model.

        Args:
            llama_model: The llama_cpp.Llama model to use.
            provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
                instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
            profile: The model profile to use. Defaults to a profile picked by the provider.
            settings: Default model settings for this model instance.
        """
        outlines_model: OutlinesBaseModel = from_llamacpp(llama_model)
        return cls(outlines_model, provider=provider, profile=profile, settings=settings)

    @classmethod
    def from_mlxlm(  # pragma: no cover
        cls,
        mlx_model: nn.Module,  # pyright: ignore[reportUnknownParameterType, reportUnknownMemberType]
        mlx_tokenizer: transformers.tokenization_utils.PreTrainedTokenizer,
        *,
        provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
        profile: ModelProfileSpec | None = None,
        settings: ModelSettings | None = None,
    ):
        """Create an Outlines model from a MLXLM model.

        Args:
            mlx_model: The nn.Module model to use.
            mlx_tokenizer: The PreTrainedTokenizer to use.
            provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
                instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
            profile: The model profile to use. Defaults to a profile picked by the provider.
            settings: Default model settings for this model instance.
        """
        outlines_model: OutlinesBaseModel = from_mlxlm(mlx_model, mlx_tokenizer)  # pyright: ignore[reportUnknownArgumentType]
        return cls(outlines_model, provider=provider, profile=profile, settings=settings)

    @classmethod
    def from_sglang(
        cls,
        base_url: str,
        api_key: str | None = None,
        model_name: str | None = None,
        *,
        provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
        profile: ModelProfileSpec | None = None,
        settings: ModelSettings | None = None,
    ):
        """Create an Outlines model to send requests to an SGLang server.

        Args:
            base_url: The url of the SGLang server.
            api_key: The API key to use for authenticating requests to the SGLang server.
            model_name: The name of the model to use.
            provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
                instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
            profile: The model profile to use. Defaults to a profile picked by the provider.
            settings: Default model settings for this model instance.
        """
        try:
            from openai import AsyncOpenAI
        except ImportError as _import_error:
            raise ImportError(
                'Please install `openai` to use the Outlines SGLang model, '
                'you can use the `openai` optional group — `pip install "pydantic-ai-slim[openai]"`'
            ) from _import_error

        openai_client = AsyncOpenAI(base_url=base_url, api_key=api_key)
        outlines_model: OutlinesBaseModel | OutlinesAsyncBaseModel = from_sglang(openai_client, model_name)
        return cls(outlines_model, provider=provider, profile=profile, settings=settings)

    @classmethod
    def from_vllm_offline(  # pragma: no cover
        cls,
        vllm_model: Any,
        *,
        provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
        profile: ModelProfileSpec | None = None,
        settings: ModelSettings | None = None,
    ):
        """Create an Outlines model from a vLLM offline inference model.

        Args:
            vllm_model: The vllm.LLM local model to use.
            provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
                instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
            profile: The model profile to use. Defaults to a profile picked by the provider.
            settings: Default model settings for this model instance.
        """
        outlines_model: OutlinesBaseModel | OutlinesAsyncBaseModel = from_vllm_offline(vllm_model)
        return cls(outlines_model, provider=provider, profile=profile, settings=settings)

    @property
    def model_name(self) -> str:
        return self._model_name

    @property
    def system(self) -> str:
        return 'outlines'

    async def request(
        self,
        messages: list[ModelMessage],
        model_settings: ModelSettings | None,
        model_request_parameters: ModelRequestParameters,
    ) -> ModelResponse:
        model_settings, model_request_parameters = self.prepare_request(
            model_settings,
            model_request_parameters,
        )
        """Make a request to the model."""
        prompt, output_type, inference_kwargs = await self._build_generation_arguments(
            messages, model_settings, model_request_parameters
        )
        # Async is available for SgLang
        response: str
        if isinstance(self.model, OutlinesAsyncBaseModel):
            response = await self.model(prompt, output_type, None, **inference_kwargs)
        else:
            response = self.model(prompt, output_type, None, **inference_kwargs)
        return self._process_response(response)

    @asynccontextmanager
    async def request_stream(
        self,
        messages: list[ModelMessage],
        model_settings: ModelSettings | None,
        model_request_parameters: ModelRequestParameters,
        run_context: RunContext[Any] | None = None,
    ) -> AsyncIterator[StreamedResponse]:
        model_settings, model_request_parameters = self.prepare_request(
            model_settings,
            model_request_parameters,
        )

        prompt, output_type, inference_kwargs = await self._build_generation_arguments(
            messages, model_settings, model_request_parameters
        )
        # Async is available for SgLang
        if isinstance(self.model, OutlinesAsyncBaseModel):
            response = self.model.stream(prompt, output_type, None, **inference_kwargs)
            yield await self._process_streamed_response(response, model_request_parameters)
        else:
            response = self.model.stream(prompt, output_type, None, **inference_kwargs)

            async def async_response():
                for chunk in response:
                    yield chunk

            yield await self._process_streamed_response(async_response(), model_request_parameters)

    async def _build_generation_arguments(
        self,
        messages: list[ModelMessage],
        model_settings: ModelSettings | None,
        model_request_parameters: ModelRequestParameters,
    ) -> tuple[Chat, JsonSchema | None, dict[str, Any]]:
        """Build the generation arguments for the model."""
        if (
            model_request_parameters.function_tools
            or model_request_parameters.builtin_tools
            or model_request_parameters.output_tools
        ):
            raise UserError('Outlines does not support function tools and builtin tools yet.')

        if model_request_parameters.output_object:
            output_type = JsonSchema(model_request_parameters.output_object.json_schema)
        else:
            output_type = None

        prompt = await self._format_prompt(messages, model_request_parameters)
        inference_kwargs = self.format_inference_kwargs(model_settings)

        return prompt, output_type, inference_kwargs

    def format_inference_kwargs(self, model_settings: ModelSettings | None) -> dict[str, Any]:
        """Format the model settings for the inference kwargs."""
        settings_dict: dict[str, Any] = dict(model_settings) if model_settings else {}

        if isinstance(self.model, Transformers):
            settings_dict = self._format_transformers_inference_kwargs(settings_dict)
        elif isinstance(self.model, LlamaCpp):
            settings_dict = self._format_llama_cpp_inference_kwargs(settings_dict)
        elif isinstance(self.model, MLXLM):  # pragma: no cover
            settings_dict = self._format_mlxlm_inference_kwargs(settings_dict)
        elif isinstance(self.model, SGLang | AsyncSGLang):
            settings_dict = self._format_sglang_inference_kwargs(settings_dict)
        elif isinstance(self.model, VLLMOffline):  # pragma: no cover
            settings_dict = self._format_vllm_offline_inference_kwargs(settings_dict)

        extra_body = settings_dict.pop('extra_body', {})
        settings_dict.update(extra_body)

        return settings_dict

    def _format_transformers_inference_kwargs(self, model_settings: dict[str, Any]) -> dict[str, Any]:
        """Select the model settings supported by the Transformers model."""
        supported_args = [
            'max_tokens',
            'temperature',
            'top_p',
            'logit_bias',
            'extra_body',
        ]
        filtered_settings = {k: model_settings[k] for k in supported_args if k in model_settings}

        return filtered_settings

    def _format_llama_cpp_inference_kwargs(self, model_settings: dict[str, Any]) -> dict[str, Any]:
        """Select the model settings supported by the LlamaCpp model."""
        supported_args = [
            'max_tokens',
            'temperature',
            'top_p',
            'seed',
            'presence_penalty',
            'frequency_penalty',
            'logit_bias',
            'extra_body',
        ]
        filtered_settings = {k: model_settings[k] for k in supported_args if k in model_settings}

        return filtered_settings

    def _format_mlxlm_inference_kwargs(  # pragma: no cover
        self, model_settings: dict[str, Any]
    ) -> dict[str, Any]:
        """Select the model settings supported by the MLXLM model."""
        supported_args = [
            'extra_body',
        ]
        filtered_settings = {k: model_settings[k] for k in supported_args if k in model_settings}

        return filtered_settings

    def _format_sglang_inference_kwargs(self, model_settings: dict[str, Any]) -> dict[str, Any]:
        """Select the model settings supported by the SGLang model."""
        supported_args = [
            'max_tokens',
            'temperature',
            'top_p',
            'presence_penalty',
            'frequency_penalty',
            'extra_body',
        ]
        filtered_settings = {k: model_settings[k] for k in supported_args if k in model_settings}

        return filtered_settings

    def _format_vllm_offline_inference_kwargs(  # pragma: no cover
        self, model_settings: dict[str, Any]
    ) -> dict[str, Any]:
        """Select the model settings supported by the vLLMOffline model."""
        from vllm.sampling_params import (  # pyright: ignore[reportMissingImports]
            SamplingParams,  # pyright: ignore[reportUnknownVariableType]
        )

        supported_args = [
            'max_tokens',
            'temperature',
            'top_p',
            'seed',
            'presence_penalty',
            'frequency_penalty',
            'logit_bias',
            'extra_body',
        ]
        # The arguments that are part of the fields of `ModelSettings` must be put in a `SamplingParams` object and
        # provided through the `sampling_params` argument to vLLM
        sampling_params = model_settings.get('extra_body', {}).pop('sampling_params', SamplingParams())

        for key in supported_args:
            setattr(sampling_params, key, model_settings.get(key, None))

        filtered_settings = {
            'sampling_params': sampling_params,
            **model_settings.get('extra_body', {}),
        }

        return filtered_settings

    async def _format_prompt(  # noqa: C901
        self, messages: list[ModelMessage], model_request_parameters: ModelRequestParameters
    ) -> Chat:
        """Turn the model messages into an Outlines Chat instance."""
        chat = Chat()

        if instructions := self._get_instructions(messages, model_request_parameters):
            chat.add_system_message(instructions)

        for message in messages:
            if isinstance(message, ModelRequest):
                for part in message.parts:
                    if isinstance(part, SystemPromptPart):
                        chat.add_system_message(part.content)
                    elif isinstance(part, UserPromptPart):
                        if isinstance(part.content, str):
                            chat.add_user_message(part.content)
                        elif isinstance(part.content, Sequence):
                            outlines_input: Sequence[str | Image] = []
                            for item in part.content:
                                if isinstance(item, str):
                                    outlines_input.append(item)
                                elif isinstance(item, ImageUrl):
                                    image_content: DownloadedItem[bytes] = await download_item(
                                        item, data_format='bytes', type_format='mime'
                                    )
                                    image = self._create_PIL_image(image_content['data'], image_content['data_type'])
                                    outlines_input.append(Image(image))
                                elif isinstance(item, BinaryContent) and item.is_image:
                                    image = self._create_PIL_image(item.data, item.media_type)
                                    outlines_input.append(Image(image))
                                else:
                                    raise UserError(
                                        'Each element of the content sequence must be a string, an `ImageUrl`'
                                        + ' or a `BinaryImage`.'
                                    )
                            chat.add_user_message(outlines_input)
                        else:
                            assert_never(part.content)
                    elif isinstance(part, RetryPromptPart):
                        chat.add_user_message(part.model_response())
                    elif isinstance(part, ToolReturnPart):
                        raise UserError('Tool calls are not supported for Outlines models yet.')
                    else:
                        assert_never(part)
            elif isinstance(message, ModelResponse):
                text_parts: list[str] = []
                image_parts: list[Image] = []
                for part in message.parts:
                    if isinstance(part, TextPart):
                        text_parts.append(part.content)
                    elif isinstance(part, ThinkingPart):
                        # NOTE: We don't send ThinkingPart to the providers yet.
                        pass
                    elif isinstance(part, ToolCallPart | BuiltinToolCallPart | BuiltinToolReturnPart):
                        raise UserError('Tool calls are not supported for Outlines models yet.')
                    elif isinstance(part, FilePart):
                        if isinstance(part.content, BinaryContent) and part.content.is_image:
                            image = self._create_PIL_image(part.content.data, part.content.media_type)
                            image_parts.append(Image(image))
                        else:
                            raise UserError(
                                'File parts other than `BinaryImage` are not supported for Outlines models yet.'
                            )
                    else:
                        assert_never(part)
                if len(text_parts) == 1 and len(image_parts) == 0:
                    chat.add_assistant_message(text_parts[0])
                else:
                    chat.add_assistant_message([*text_parts, *image_parts])
            else:
                assert_never(message)
        return chat

    def _create_PIL_image(self, data: bytes, data_type: str) -> PILImage.Image:
        """Create a PIL Image from the data and data type."""
        image = PILImage.open(io.BytesIO(data))
        image.format = data_type.split('/')[-1]
        return image

    def _process_response(self, response: str) -> ModelResponse:
        """Turn the Outlines text response into a Pydantic AI model response instance."""
        return ModelResponse(
            parts=cast(
                list[ModelResponsePart], split_content_into_text_and_thinking(response, self.profile.thinking_tags)
            ),
        )

    async def _process_streamed_response(
        self, response: AsyncIterable[str], model_request_parameters: ModelRequestParameters
    ) -> StreamedResponse:
        """Turn the Outlines text response into a Pydantic AI streamed response instance."""
        peekable_response = _utils.PeekableAsyncStream(response)
        first_chunk = await peekable_response.peek()
        if isinstance(first_chunk, _utils.Unset):  # pragma: no cover
            raise UnexpectedModelBehavior('Streamed response ended without content or tool calls')

        timestamp = datetime.now(tz=timezone.utc)
        return OutlinesStreamedResponse(
            model_request_parameters=model_request_parameters,
            _model_name=self._model_name,
            _model_profile=self.profile,
            _response=peekable_response,
            _timestamp=timestamp,
            _provider_name='outlines',
        )

    def customize_request_parameters(self, model_request_parameters: ModelRequestParameters) -> ModelRequestParameters:
        """Customize the model request parameters for the model."""
        if model_request_parameters.output_mode in ('auto', 'native'):
            # This way the JSON schema will be included in the instructions.
            return replace(model_request_parameters, output_mode='prompted')
        else:
            return model_request_parameters

init

__init__(
    model: Model | AsyncModel,
    *,
    provider: (
        Literal["outlines"] | Provider[Model]
    ) = "outlines",
    profile: ModelProfileSpec | None = None,
    settings: ModelSettings | None = None
)

Initialize an Outlines model.

Parameters:

Name	Type	Description	Default
`model`	`Model \| AsyncModel`	The Outlines model used for the model.	required
`provider`	`Literal['outlines'] \| Provider[Model]`	The provider to use for OutlinesModel. Can be either the string 'outlines' or an instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.	`'outlines'`
`profile`	`ModelProfileSpec \| None`	The model profile to use. Defaults to a profile picked by the provider.	`None`
`settings`	`ModelSettings \| None`	Default model settings for this model instance.	`None`

Source code in pydantic_ai_slim/pydantic_ai/models/outlines.py

def __init__(
    self,
    model: OutlinesBaseModel | OutlinesAsyncBaseModel,
    *,
    provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
    profile: ModelProfileSpec | None = None,
    settings: ModelSettings | None = None,
):
    """Initialize an Outlines model.

    Args:
        model: The Outlines model used for the model.
        provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
            instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
        profile: The model profile to use. Defaults to a profile picked by the provider.
        settings: Default model settings for this model instance.
    """
    self.model: OutlinesBaseModel | OutlinesAsyncBaseModel = model
    self._model_name: str = 'outlines-model'

    if isinstance(provider, str):
        provider = infer_provider(provider)

    super().__init__(settings=settings, profile=profile or provider.model_profile)

from_transformers `classmethod`

from_transformers(
    hf_model: PreTrainedModel,
    hf_tokenizer_or_processor: (
        PreTrainedTokenizer | ProcessorMixin
    ),
    *,
    provider: (
        Literal["outlines"] | Provider[Model]
    ) = "outlines",
    profile: ModelProfileSpec | None = None,
    settings: ModelSettings | None = None
)

Create an Outlines model from a Hugging Face model and tokenizer.

Parameters:

Name	Type	Description	Default
`hf_model`	`PreTrainedModel`	The Hugging Face PreTrainedModel or any model that is compatible with the `transformers` API.	required
`hf_tokenizer_or_processor`	`PreTrainedTokenizer \| ProcessorMixin`	Either a HuggingFace `PreTrainedTokenizer` or any tokenizer that is compatible with the `transformers` API, or a HuggingFace processor inheriting from `ProcessorMixin`. If a tokenizer is provided, a regular model will be used, while if you provide a processor, it will be a multimodal model.	required
`provider`	`Literal['outlines'] \| Provider[Model]`	The provider to use for OutlinesModel. Can be either the string 'outlines' or an instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.	`'outlines'`
`profile`	`ModelProfileSpec \| None`	The model profile to use. Defaults to a profile picked by the provider.	`None`
`settings`	`ModelSettings \| None`	Default model settings for this model instance.	`None`

Source code in pydantic_ai_slim/pydantic_ai/models/outlines.py

@classmethod
def from_transformers(
    cls,
    hf_model: transformers.modeling_utils.PreTrainedModel,
    hf_tokenizer_or_processor: transformers.tokenization_utils.PreTrainedTokenizer
    | transformers.processing_utils.ProcessorMixin,
    *,
    provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
    profile: ModelProfileSpec | None = None,
    settings: ModelSettings | None = None,
):
    """Create an Outlines model from a Hugging Face model and tokenizer.

    Args:
        hf_model: The Hugging Face PreTrainedModel or any model that is compatible with the
            `transformers` API.
        hf_tokenizer_or_processor: Either a HuggingFace `PreTrainedTokenizer` or any tokenizer that is compatible
            with the `transformers` API, or a HuggingFace processor inheriting from `ProcessorMixin`. If a
            tokenizer is provided, a regular model will be used, while if you provide a processor, it will be a
            multimodal model.
        provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
            instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
        profile: The model profile to use. Defaults to a profile picked by the provider.
        settings: Default model settings for this model instance.
    """
    outlines_model: OutlinesBaseModel = from_transformers(hf_model, hf_tokenizer_or_processor)
    return cls(outlines_model, provider=provider, profile=profile, settings=settings)

from_llamacpp `classmethod`

from_llamacpp(
    llama_model: Llama,
    *,
    provider: (
        Literal["outlines"] | Provider[Model]
    ) = "outlines",
    profile: ModelProfileSpec | None = None,
    settings: ModelSettings | None = None
)

Create an Outlines model from a LlamaCpp model.

Parameters:

Name	Type	Description	Default
`llama_model`	`Llama`	The llama_cpp.Llama model to use.	required
`provider`	`Literal['outlines'] \| Provider[Model]`	The provider to use for OutlinesModel. Can be either the string 'outlines' or an instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.	`'outlines'`
`profile`	`ModelProfileSpec \| None`	The model profile to use. Defaults to a profile picked by the provider.	`None`
`settings`	`ModelSettings \| None`	Default model settings for this model instance.	`None`

Source code in pydantic_ai_slim/pydantic_ai/models/outlines.py

@classmethod
def from_llamacpp(
    cls,
    llama_model: llama_cpp.Llama,
    *,
    provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
    profile: ModelProfileSpec | None = None,
    settings: ModelSettings | None = None,
):
    """Create an Outlines model from a LlamaCpp model.

    Args:
        llama_model: The llama_cpp.Llama model to use.
        provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
            instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
        profile: The model profile to use. Defaults to a profile picked by the provider.
        settings: Default model settings for this model instance.
    """
    outlines_model: OutlinesBaseModel = from_llamacpp(llama_model)
    return cls(outlines_model, provider=provider, profile=profile, settings=settings)

from_mlxlm `classmethod`

from_mlxlm(
    mlx_model: Module,
    mlx_tokenizer: PreTrainedTokenizer,
    *,
    provider: (
        Literal["outlines"] | Provider[Model]
    ) = "outlines",
    profile: ModelProfileSpec | None = None,
    settings: ModelSettings | None = None
)

Create an Outlines model from a MLXLM model.

Parameters:

Name	Type	Description	Default
`mlx_model`	`Module`	The nn.Module model to use.	required
`mlx_tokenizer`	`PreTrainedTokenizer`	The PreTrainedTokenizer to use.	required
`provider`	`Literal['outlines'] \| Provider[Model]`	The provider to use for OutlinesModel. Can be either the string 'outlines' or an instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.	`'outlines'`
`profile`	`ModelProfileSpec \| None`	The model profile to use. Defaults to a profile picked by the provider.	`None`
`settings`	`ModelSettings \| None`	Default model settings for this model instance.	`None`

Source code in pydantic_ai_slim/pydantic_ai/models/outlines.py

@classmethod
def from_mlxlm(  # pragma: no cover
    cls,
    mlx_model: nn.Module,  # pyright: ignore[reportUnknownParameterType, reportUnknownMemberType]
    mlx_tokenizer: transformers.tokenization_utils.PreTrainedTokenizer,
    *,
    provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
    profile: ModelProfileSpec | None = None,
    settings: ModelSettings | None = None,
):
    """Create an Outlines model from a MLXLM model.

    Args:
        mlx_model: The nn.Module model to use.
        mlx_tokenizer: The PreTrainedTokenizer to use.
        provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
            instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
        profile: The model profile to use. Defaults to a profile picked by the provider.
        settings: Default model settings for this model instance.
    """
    outlines_model: OutlinesBaseModel = from_mlxlm(mlx_model, mlx_tokenizer)  # pyright: ignore[reportUnknownArgumentType]
    return cls(outlines_model, provider=provider, profile=profile, settings=settings)

from_sglang `classmethod`

from_sglang(
    base_url: str,
    api_key: str | None = None,
    model_name: str | None = None,
    *,
    provider: (
        Literal["outlines"] | Provider[Model]
    ) = "outlines",
    profile: ModelProfileSpec | None = None,
    settings: ModelSettings | None = None
)

Create an Outlines model to send requests to an SGLang server.

Parameters:

Name	Type	Description	Default
`base_url`	`str`	The url of the SGLang server.	required
`api_key`	`str \| None`	The API key to use for authenticating requests to the SGLang server.	`None`
`model_name`	`str \| None`	The name of the model to use.	`None`
`provider`	`Literal['outlines'] \| Provider[Model]`	The provider to use for OutlinesModel. Can be either the string 'outlines' or an instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.	`'outlines'`
`profile`	`ModelProfileSpec \| None`	The model profile to use. Defaults to a profile picked by the provider.	`None`
`settings`	`ModelSettings \| None`	Default model settings for this model instance.	`None`

Source code in pydantic_ai_slim/pydantic_ai/models/outlines.py

@classmethod
def from_sglang(
    cls,
    base_url: str,
    api_key: str | None = None,
    model_name: str | None = None,
    *,
    provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
    profile: ModelProfileSpec | None = None,
    settings: ModelSettings | None = None,
):
    """Create an Outlines model to send requests to an SGLang server.

    Args:
        base_url: The url of the SGLang server.
        api_key: The API key to use for authenticating requests to the SGLang server.
        model_name: The name of the model to use.
        provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
            instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
        profile: The model profile to use. Defaults to a profile picked by the provider.
        settings: Default model settings for this model instance.
    """
    try:
        from openai import AsyncOpenAI
    except ImportError as _import_error:
        raise ImportError(
            'Please install `openai` to use the Outlines SGLang model, '
            'you can use the `openai` optional group — `pip install "pydantic-ai-slim[openai]"`'
        ) from _import_error

    openai_client = AsyncOpenAI(base_url=base_url, api_key=api_key)
    outlines_model: OutlinesBaseModel | OutlinesAsyncBaseModel = from_sglang(openai_client, model_name)
    return cls(outlines_model, provider=provider, profile=profile, settings=settings)

from_vllm_offline `classmethod`

from_vllm_offline(
    vllm_model: Any,
    *,
    provider: (
        Literal["outlines"] | Provider[Model]
    ) = "outlines",
    profile: ModelProfileSpec | None = None,
    settings: ModelSettings | None = None
)

Create an Outlines model from a vLLM offline inference model.

Parameters:

Name	Type	Description	Default
`vllm_model`	`Any`	The vllm.LLM local model to use.	required
`provider`	`Literal['outlines'] \| Provider[Model]`	The provider to use for OutlinesModel. Can be either the string 'outlines' or an instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.	`'outlines'`
`profile`	`ModelProfileSpec \| None`	The model profile to use. Defaults to a profile picked by the provider.	`None`
`settings`	`ModelSettings \| None`	Default model settings for this model instance.	`None`

Source code in pydantic_ai_slim/pydantic_ai/models/outlines.py

@classmethod
def from_vllm_offline(  # pragma: no cover
    cls,
    vllm_model: Any,
    *,
    provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
    profile: ModelProfileSpec | None = None,
    settings: ModelSettings | None = None,
):
    """Create an Outlines model from a vLLM offline inference model.

    Args:
        vllm_model: The vllm.LLM local model to use.
        provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
            instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
        profile: The model profile to use. Defaults to a profile picked by the provider.
        settings: Default model settings for this model instance.
    """
    outlines_model: OutlinesBaseModel | OutlinesAsyncBaseModel = from_vllm_offline(vllm_model)
    return cls(outlines_model, provider=provider, profile=profile, settings=settings)

format_inference_kwargs

format_inference_kwargs(
    model_settings: ModelSettings | None,
) -> dict[str, Any]

Format the model settings for the inference kwargs.

Source code in pydantic_ai_slim/pydantic_ai/models/outlines.py

def format_inference_kwargs(self, model_settings: ModelSettings | None) -> dict[str, Any]:
    """Format the model settings for the inference kwargs."""
    settings_dict: dict[str, Any] = dict(model_settings) if model_settings else {}

    if isinstance(self.model, Transformers):
        settings_dict = self._format_transformers_inference_kwargs(settings_dict)
    elif isinstance(self.model, LlamaCpp):
        settings_dict = self._format_llama_cpp_inference_kwargs(settings_dict)
    elif isinstance(self.model, MLXLM):  # pragma: no cover
        settings_dict = self._format_mlxlm_inference_kwargs(settings_dict)
    elif isinstance(self.model, SGLang | AsyncSGLang):
        settings_dict = self._format_sglang_inference_kwargs(settings_dict)
    elif isinstance(self.model, VLLMOffline):  # pragma: no cover
        settings_dict = self._format_vllm_offline_inference_kwargs(settings_dict)

    extra_body = settings_dict.pop('extra_body', {})
    settings_dict.update(extra_body)

    return settings_dict

customize_request_parameters

customize_request_parameters(
    model_request_parameters: ModelRequestParameters,
) -> ModelRequestParameters

Customize the model request parameters for the model.

Source code in pydantic_ai_slim/pydantic_ai/models/outlines.py

def customize_request_parameters(self, model_request_parameters: ModelRequestParameters) -> ModelRequestParameters:
    """Customize the model request parameters for the model."""
    if model_request_parameters.output_mode in ('auto', 'native'):
        # This way the JSON schema will be included in the instructions.
        return replace(model_request_parameters, output_mode='prompted')
    else:
        return model_request_parameters

OutlinesStreamedResponse `dataclass`

Bases: StreamedResponse

Implementation of StreamedResponse for Outlines models.

Source code in pydantic_ai_slim/pydantic_ai/models/outlines.py

@dataclass
class OutlinesStreamedResponse(StreamedResponse):
    """Implementation of `StreamedResponse` for Outlines models."""

    _model_name: str
    _model_profile: ModelProfile
    _response: AsyncIterable[str]
    _timestamp: datetime
    _provider_name: str
    _provider_url: str | None = None

    async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
        async for content in self._response:
            for event in self._parts_manager.handle_text_delta(
                vendor_part_id='content',
                content=content,
                thinking_tags=self._model_profile.thinking_tags,
                ignore_leading_whitespace=self._model_profile.ignore_streamed_leading_whitespace,
            ):
                yield event

    @property
    def model_name(self) -> str:
        """Get the model name of the response."""
        return self._model_name

    @property
    def provider_name(self) -> str:
        """Get the provider name."""
        return self._provider_name

    @property
    def provider_url(self) -> str | None:
        """Get the provider base URL."""
        return self._provider_url

    @property
    def timestamp(self) -> datetime:
        """Get the timestamp of the response."""
        return self._timestamp

model_name `property`

model_name: str

Get the model name of the response.

provider_name `property`

provider_name: str

Get the provider name.

provider_url `property`

provider_url: str | None

Get the provider base URL.

timestamp `property`

timestamp: datetime

Get the timestamp of the response.

pydantic_ai.models.outlines

Setup

OutlinesModel dataclass

__init__

from_transformers classmethod

from_llamacpp classmethod

from_mlxlm classmethod

from_sglang classmethod

from_vllm_offline classmethod

format_inference_kwargs

customize_request_parameters

OutlinesStreamedResponse dataclass

model_name property

provider_name property

provider_url property

timestamp property

`pydantic_ai.models.outlines`

OutlinesModel `dataclass`

init

from_transformers `classmethod`

from_llamacpp `classmethod`

from_mlxlm `classmethod`

from_sglang `classmethod`

from_vllm_offline `classmethod`

OutlinesStreamedResponse `dataclass`

model_name `property`

provider_name `property`

provider_url `property`

timestamp `property`