OpenAICompletionsClient

autogen.llm_clients.openai_completions_client.OpenAICompletionsClient #

OpenAICompletionsClient(api_key=None, base_url=None, timeout=60.0, response_format=None, **kwargs)

Bases: ModelClient

OpenAI Chat Completions API client implementing ModelClientV2 protocol.

This client works with OpenAI's Chat Completions API (client.chat.completions.create) which returns structured output with reasoning blocks (o1/o3 models), tool calls, and more.

Key Features: - Preserves reasoning blocks as ReasoningContent (o1/o3 models) - Handles tool calls and results - Supports multimodal content - Provides backward compatibility via create_v1_compatible()

Example

client = OpenAICompletionsClient(api_key="...")

Get rich response with reasoning#

response = client.create({ "model": "o1-preview", "messages": [{"role": "user", "content": "Explain quantum computing"}] })

Access reasoning blocks#

for reasoning in response.reasoning: print(f"Reasoning: {reasoning.reasoning}")

Get text response#

print(f"Answer: {response.text}")

Initialize OpenAI Chat Completions API client.

PARAMETER	DESCRIPTION
`api_key`	OpenAI API key (or set OPENAI_API_KEY env var) TYPE: `str \| None` DEFAULT: `None`
`base_url`	Custom base URL for OpenAI API TYPE: `str \| None` DEFAULT: `None`
`timeout`	Request timeout in seconds TYPE: `float` DEFAULT: `60.0`
`response_format`	Optional response format (Pydantic model or JSON schema) TYPE: `Any` DEFAULT: `None`
`**kwargs`	Additional arguments passed to OpenAI client TYPE: `Any` DEFAULT: `{}`

Source code in autogen/llm_clients/openai_completions_client.py

def __init__(
    self,
    api_key: str | None = None,
    base_url: str | None = None,
    timeout: float = 60.0,
    response_format: Any = None,
    **kwargs: Any,
):
    """
    Initialize OpenAI Chat Completions API client.

    Args:
        api_key: OpenAI API key (or set OPENAI_API_KEY env var)
        base_url: Custom base URL for OpenAI API
        timeout: Request timeout in seconds
        response_format: Optional response format (Pydantic model or JSON schema)
        **kwargs: Additional arguments passed to OpenAI client
    """
    if openai_import_exception is not None:
        raise openai_import_exception

    self.client = OpenAI(api_key=api_key, base_url=base_url, timeout=timeout, **kwargs)  # type: ignore[misc]
    self._default_response_format = response_format
    self._cost_per_token = {
        # GPT-5 series - Latest flagship models (per million tokens)
        "gpt-5": {"prompt": 1.25 / 1_000_000, "completion": 10.00 / 1_000_000},
        "gpt-5-mini": {"prompt": 0.25 / 1_000_000, "completion": 2.00 / 1_000_000},
        "gpt-5-nano": {"prompt": 0.05 / 1_000_000, "completion": 0.40 / 1_000_000},
        # GPT-4o series - Multimodal flagship (per million tokens)
        "gpt-4o": {"prompt": 2.50 / 1_000_000, "completion": 10.00 / 1_000_000},
        "gpt-4o-mini": {"prompt": 0.15 / 1_000_000, "completion": 0.60 / 1_000_000},
        # GPT-4 Turbo (per million tokens)
        "gpt-4-turbo": {"prompt": 10.00 / 1_000_000, "completion": 30.00 / 1_000_000},
        # GPT-4 legacy (per million tokens)
        "gpt-4": {"prompt": 10.00 / 1_000_000, "completion": 30.00 / 1_000_000},
        # GPT-3.5 Turbo (per million tokens)
        "gpt-3.5-turbo": {"prompt": 0.50 / 1_000_000, "completion": 1.50 / 1_000_000},
        # o1 series - Reasoning models (keep existing if still valid)
        "o1-preview": {"prompt": 0.015 / 1000, "completion": 0.060 / 1000},
        "o1-mini": {"prompt": 0.003 / 1000, "completion": 0.012 / 1000},
        "o3-mini": {"prompt": 0.003 / 1000, "completion": 0.012 / 1000},
    }

RESPONSE_USAGE_KEYS `class-attribute` `instance-attribute` #

RESPONSE_USAGE_KEYS = ['prompt_tokens', 'completion_tokens', 'total_tokens', 'cost', 'model']

client `instance-attribute` #

client = OpenAI(api_key=api_key, base_url=base_url, timeout=timeout, **kwargs)

ModelClientResponseProtocol #

Bases: Protocol

choices `instance-attribute` #

choices

model `instance-attribute` #

model

Choice #

Bases: Protocol

message `instance-attribute` #

message

Message #

Bases: Protocol

content `instance-attribute` #

content

create #

create(params)

Create a completion and return UnifiedResponse with all features preserved.

This method implements ModelClient.create() but returns UnifiedResponse instead of ModelClientResponseProtocol. The rich UnifiedResponse structure is compatible via duck typing - it has .model attribute and works with message_retrieval().

PARAMETER	DESCRIPTION
`params`	Request parameters including: - model: Model name (e.g., "o1-preview") - messages: List of message dicts - temperature: Optional temperature (not supported by o1 models) - max_tokens: Optional max completion tokens - tools: Optional tool definitions - response_format: Optional Pydantic BaseModel or JSON schema dict - other OpenAI parameters TYPE:** `dict[str, Any]`

RETURNS	DESCRIPTION
`UnifiedResponse`	UnifiedResponse with reasoning blocks, citations, and all content preserved

Source code in autogen/llm_clients/openai_completions_client.py

def create(self, params: dict[str, Any]) -> UnifiedResponse:  # type: ignore[override]
    """
    Create a completion and return UnifiedResponse with all features preserved.

    This method implements ModelClient.create() but returns UnifiedResponse instead
    of ModelClientResponseProtocol. The rich UnifiedResponse structure is compatible
    via duck typing - it has .model attribute and works with message_retrieval().

    Args:
        params: Request parameters including:
            - model: Model name (e.g., "o1-preview")
            - messages: List of message dicts
            - temperature: Optional temperature (not supported by o1 models)
            - max_tokens: Optional max completion tokens
            - tools: Optional tool definitions
            - response_format: Optional Pydantic BaseModel or JSON schema dict
            - **other OpenAI parameters

    Returns:
        UnifiedResponse with reasoning blocks, citations, and all content preserved
    """
    # Make a copy of params to avoid mutating the original
    params = params.copy()

    # Merge default response_format if not already in params
    if self._default_response_format is not None and "response_format" not in params:
        params["response_format"] = self._default_response_format

    # Process reasoning model parameters (o1/o3 models)
    if self._is_reasoning_model(params.get("model")):
        self._process_reasoning_model_params(params)

    # Check if response_format is a Pydantic BaseModel
    response_format = params.get("response_format")
    use_parse = self._is_pydantic_model(response_format)

    # Call OpenAI API - use parse() for Pydantic models, create() otherwise
    if use_parse:
        # parse() doesn't support stream parameter - remove it if present
        parse_params = params.copy()
        parse_params.pop("stream", None)
        response = self.client.chat.completions.parse(**parse_params)
    else:
        response = self.client.chat.completions.create(**params)

    # Transform to UnifiedResponse
    return self._transform_response(response, params.get("model", "unknown"), use_parse=use_parse)

create_v1_compatible #

create_v1_compatible(params)

Create completion in backward-compatible ChatCompletionExtended format.

This method provides compatibility with existing AG2 code that expects ChatCompletionExtended format. Note that reasoning blocks and citations will be lost in this format.

PARAMETER	DESCRIPTION
`params`	Same parameters as create() TYPE: `dict[str, Any]`

RETURNS	DESCRIPTION
`Any`	ChatCompletionExtended-compatible dict (flattened response)

Warning

This method loses information (reasoning blocks, citations) when converting to the legacy format. Prefer create() for new code.

Source code in autogen/llm_clients/openai_completions_client.py

def create_v1_compatible(self, params: dict[str, Any]) -> Any:
    """
    Create completion in backward-compatible ChatCompletionExtended format.

    This method provides compatibility with existing AG2 code that expects
    ChatCompletionExtended format. Note that reasoning blocks and citations
    will be lost in this format.

    Args:
        params: Same parameters as create()

    Returns:
        ChatCompletionExtended-compatible dict (flattened response)

    Warning:
        This method loses information (reasoning blocks, citations) when
        converting to the legacy format. Prefer create() for new code.
    """
    # Get rich response
    unified_response = self.create(params)

    # Convert to legacy format (simplified - would need full ChatCompletionExtended in practice)
    # Extract role and convert UserRoleEnum to string
    role = unified_response.messages[0].role if unified_response.messages else UserRoleEnum.ASSISTANT
    role_str = role.value if isinstance(role, UserRoleEnum) else role

    return {
        "id": unified_response.id,
        "model": unified_response.model,
        "created": unified_response.provider_metadata.get("created"),
        "object": "chat.completion",
        "choices": [
            {
                "index": 0,
                "message": {
                    "role": role_str,
                    "content": unified_response.text,
                },
                "finish_reason": unified_response.finish_reason,
            }
        ],
        "usage": unified_response.usage,
        "cost": unified_response.cost,
    }

cost #

cost(response)

Calculate cost from response usage.

Implements ModelClient.cost() but accepts UnifiedResponse via duck typing.

PARAMETER	DESCRIPTION
`response`	UnifiedResponse with usage information TYPE: `UnifiedResponse`

RETURNS	DESCRIPTION
`float`	Cost in USD for the API call

Source code in autogen/llm_clients/openai_completions_client.py

def cost(self, response: UnifiedResponse) -> float:  # type: ignore[override]
    """
    Calculate cost from response usage.

    Implements ModelClient.cost() but accepts UnifiedResponse via duck typing.

    Args:
        response: UnifiedResponse with usage information

    Returns:
        Cost in USD for the API call
    """
    if not response.usage:
        return 0.0

    model = response.model
    prompt_tokens = response.usage.get("prompt_tokens", 0)
    completion_tokens = response.usage.get("completion_tokens", 0)

    # Find pricing for model (exact match or prefix)
    pricing = None
    for model_key in self._cost_per_token:
        if model.startswith(model_key):
            pricing = self._cost_per_token[model_key]
            break

    if not pricing:
        # Unknown model - use default pricing (GPT-4 Turbo level, per million tokens)
        pricing = {"prompt": 10.00 / 1_000_000, "completion": 30.00 / 1_000_000}

    return (prompt_tokens * pricing["prompt"]) + (completion_tokens * pricing["completion"])

get_usage `staticmethod` #

get_usage(response)

Extract usage statistics from response.

Implements ModelClient.get_usage() but accepts UnifiedResponse via duck typing.

PARAMETER	DESCRIPTION
`response`	UnifiedResponse from create() TYPE: `UnifiedResponse`

RETURNS	DESCRIPTION
`dict[str, Any]`	Dict with keys from RESPONSE_USAGE_KEYS

Source code in autogen/llm_clients/openai_completions_client.py

@staticmethod
def get_usage(response: UnifiedResponse) -> dict[str, Any]:  # type: ignore[override]
    """
    Extract usage statistics from response.

    Implements ModelClient.get_usage() but accepts UnifiedResponse via duck typing.

    Args:
        response: UnifiedResponse from create()

    Returns:
        Dict with keys from RESPONSE_USAGE_KEYS
    """
    return {
        "prompt_tokens": response.usage.get("prompt_tokens", 0),
        "completion_tokens": response.usage.get("completion_tokens", 0),
        "total_tokens": response.usage.get("total_tokens", 0),
        "cost": response.cost or 0.0,
        "model": response.model,
    }

message_retrieval #

message_retrieval(response)

Retrieve messages from response in OpenAI-compatible format.

Returns list of strings for text-only messages, or list of dicts when tool calls, function calls, or complex content is present.

This matches the behavior of the legacy OpenAIClient which returns: - Strings for simple text responses - ChatCompletionMessage objects (as dicts) when tool_calls/function_call present

The returned dicts follow OpenAI's ChatCompletion message format: { "role": "assistant", "content": "text content or None", "tool_calls": [{"id": "...", "type": "function", "function": {"name": "...", "arguments": "..."}}], "name": "agent_name" (optional) }

PARAMETER	DESCRIPTION
`response`	UnifiedResponse from create() TYPE: `UnifiedResponse`

RETURNS	DESCRIPTION
`list[str] \| list[dict[str, Any]]`	List of strings (for text-only) OR list of message dicts (for tool calls/complex content)

Source code in autogen/llm_clients/openai_completions_client.py

def message_retrieval(self, response: UnifiedResponse) -> list[str] | list[dict[str, Any]]:  # type: ignore[override]
    """
    Retrieve messages from response in OpenAI-compatible format.

    Returns list of strings for text-only messages, or list of dicts when
    tool calls, function calls, or complex content is present.

    This matches the behavior of the legacy OpenAIClient which returns:
    - Strings for simple text responses
    - ChatCompletionMessage objects (as dicts) when tool_calls/function_call present

    The returned dicts follow OpenAI's ChatCompletion message format:
    {
        "role": "assistant",
        "content": "text content or None",
        "tool_calls": [{"id": "...", "type": "function", "function": {"name": "...", "arguments": "..."}}],
        "name": "agent_name" (optional)
    }

    Args:
        response: UnifiedResponse from create()

    Returns:
        List of strings (for text-only) OR list of message dicts (for tool calls/complex content)
    """
    result: list[str] | list[dict[str, Any]] = []

    for msg in response.messages:
        # Check for tool calls
        tool_calls = msg.get_tool_calls()

        # Check for complex/multimodal content that needs dict format
        has_complex_content = any(
            isinstance(block, (ImageContent, AudioContent, VideoContent)) for block in msg.content
        )

        if tool_calls or has_complex_content:
            # Return OpenAI-compatible dict format
            message_dict: dict[str, Any] = {
                "role": msg.role.value if hasattr(msg.role, "value") else msg.role,
                "content": msg.get_text() or None,
            }

            # Add optional fields
            if msg.name:
                message_dict["name"] = msg.name

            # Add tool calls in OpenAI format
            if tool_calls:
                message_dict["tool_calls"] = [
                    {"id": tc.id, "type": "function", "function": {"name": tc.name, "arguments": tc.arguments}}
                    for tc in tool_calls
                ]

            # Handle multimodal content - convert to OpenAI content array format
            if has_complex_content:
                message_dict["content"] = self._convert_to_openai_content_array(msg)

            result.append(message_dict)
        else:
            # Simple text content - return string
            result.append(msg.get_text())

    return result

OpenAICompletionsClient

autogen.llm_clients.openai_completions_client.OpenAICompletionsClient #

Get rich response with reasoning#

Access reasoning blocks#

Get text response#

RESPONSE_USAGE_KEYS class-attribute instance-attribute #

client instance-attribute #

ModelClientResponseProtocol #

choices instance-attribute #

model instance-attribute #

Choice #

message instance-attribute #

Message #

content instance-attribute #

create #

create_v1_compatible #

cost #

get_usage staticmethod #

message_retrieval #

RESPONSE_USAGE_KEYS `class-attribute` `instance-attribute` #

client `instance-attribute` #

choices `instance-attribute` #

model `instance-attribute` #

message `instance-attribute` #

content `instance-attribute` #

get_usage `staticmethod` #