Skip to content

OpenAICompletionsClient

autogen.llm_clients.openai_completions_client.OpenAICompletionsClient #

OpenAICompletionsClient(api_key=None, base_url=None, timeout=60.0, response_format=None, **kwargs)

Bases: ModelClient

OpenAI Chat Completions API client implementing ModelClientV2 protocol.

This client works with OpenAI's Chat Completions API (client.chat.completions.create) which returns structured output with reasoning blocks (o1/o3 models), tool calls, and more.

Key Features: - Preserves reasoning blocks as ReasoningContent (o1/o3 models) - Handles tool calls and results - Supports multimodal content - Provides backward compatibility via create_v1_compatible()

Example

client = OpenAICompletionsClient(api_key="...")

Get rich response with reasoning#

response = client.create({ "model": "o1-preview", "messages": [{"role": "user", "content": "Explain quantum computing"}] })

Access reasoning blocks#

for reasoning in response.reasoning: print(f"Reasoning: {reasoning.reasoning}")

Get text response#

print(f"Answer: {response.text}")

Initialize OpenAI Chat Completions API client.

PARAMETER DESCRIPTION
api_key

OpenAI API key (or set OPENAI_API_KEY env var)

TYPE: str | None DEFAULT: None

base_url

Custom base URL for OpenAI API

TYPE: str | None DEFAULT: None

timeout

Request timeout in seconds

TYPE: float DEFAULT: 60.0

response_format

Optional response format (Pydantic model or JSON schema)

TYPE: Any DEFAULT: None

**kwargs

Additional arguments passed to OpenAI client

TYPE: Any DEFAULT: {}

Source code in autogen/llm_clients/openai_completions_client.py
def __init__(
    self,
    api_key: str | None = None,
    base_url: str | None = None,
    timeout: float = 60.0,
    response_format: Any = None,
    **kwargs: Any,
):
    """
    Initialize OpenAI Chat Completions API client.

    Args:
        api_key: OpenAI API key (or set OPENAI_API_KEY env var)
        base_url: Custom base URL for OpenAI API
        timeout: Request timeout in seconds
        response_format: Optional response format (Pydantic model or JSON schema)
        **kwargs: Additional arguments passed to OpenAI client
    """
    if openai_import_exception is not None:
        raise openai_import_exception

    self.client = OpenAI(api_key=api_key, base_url=base_url, timeout=timeout, **kwargs)  # type: ignore[misc]
    self._default_response_format = response_format
    self._cost_per_token = {
        # GPT-5 series - Latest flagship models (per million tokens)
        "gpt-5": {"prompt": 1.25 / 1_000_000, "completion": 10.00 / 1_000_000},
        "gpt-5-mini": {"prompt": 0.25 / 1_000_000, "completion": 2.00 / 1_000_000},
        "gpt-5-nano": {"prompt": 0.05 / 1_000_000, "completion": 0.40 / 1_000_000},
        # GPT-4o series - Multimodal flagship (per million tokens)
        "gpt-4o": {"prompt": 2.50 / 1_000_000, "completion": 10.00 / 1_000_000},
        "gpt-4o-mini": {"prompt": 0.15 / 1_000_000, "completion": 0.60 / 1_000_000},
        # GPT-4 Turbo (per million tokens)
        "gpt-4-turbo": {"prompt": 10.00 / 1_000_000, "completion": 30.00 / 1_000_000},
        # GPT-4 legacy (per million tokens)
        "gpt-4": {"prompt": 10.00 / 1_000_000, "completion": 30.00 / 1_000_000},
        # GPT-3.5 Turbo (per million tokens)
        "gpt-3.5-turbo": {"prompt": 0.50 / 1_000_000, "completion": 1.50 / 1_000_000},
        # o1 series - Reasoning models (keep existing if still valid)
        "o1-preview": {"prompt": 0.015 / 1000, "completion": 0.060 / 1000},
        "o1-mini": {"prompt": 0.003 / 1000, "completion": 0.012 / 1000},
        "o3-mini": {"prompt": 0.003 / 1000, "completion": 0.012 / 1000},
    }

RESPONSE_USAGE_KEYS class-attribute instance-attribute #

RESPONSE_USAGE_KEYS = ['prompt_tokens', 'completion_tokens', 'total_tokens', 'cost', 'model']

client instance-attribute #

client = OpenAI(api_key=api_key, base_url=base_url, timeout=timeout, **kwargs)

ModelClientResponseProtocol #

Bases: Protocol

choices instance-attribute #

choices

model instance-attribute #

model

Choice #

Bases: Protocol

message instance-attribute #
message
Message #

Bases: Protocol

content instance-attribute #
content

create #

create(params)

Create a completion and return UnifiedResponse with all features preserved.

This method implements ModelClient.create() but returns UnifiedResponse instead of ModelClientResponseProtocol. The rich UnifiedResponse structure is compatible via duck typing - it has .model attribute and works with message_retrieval().

PARAMETER DESCRIPTION
params

Request parameters including: - model: Model name (e.g., "o1-preview") - messages: List of message dicts - temperature: Optional temperature (not supported by o1 models) - max_tokens: Optional max completion tokens - tools: Optional tool definitions - response_format: Optional Pydantic BaseModel or JSON schema dict - **other OpenAI parameters

TYPE: dict[str, Any]

RETURNS DESCRIPTION
UnifiedResponse

UnifiedResponse with reasoning blocks, citations, and all content preserved

Source code in autogen/llm_clients/openai_completions_client.py
def create(self, params: dict[str, Any]) -> UnifiedResponse:  # type: ignore[override]
    """
    Create a completion and return UnifiedResponse with all features preserved.

    This method implements ModelClient.create() but returns UnifiedResponse instead
    of ModelClientResponseProtocol. The rich UnifiedResponse structure is compatible
    via duck typing - it has .model attribute and works with message_retrieval().

    Args:
        params: Request parameters including:
            - model: Model name (e.g., "o1-preview")
            - messages: List of message dicts
            - temperature: Optional temperature (not supported by o1 models)
            - max_tokens: Optional max completion tokens
            - tools: Optional tool definitions
            - response_format: Optional Pydantic BaseModel or JSON schema dict
            - **other OpenAI parameters

    Returns:
        UnifiedResponse with reasoning blocks, citations, and all content preserved
    """
    # Make a copy of params to avoid mutating the original
    params = params.copy()

    # Merge default response_format if not already in params
    if self._default_response_format is not None and "response_format" not in params:
        params["response_format"] = self._default_response_format

    # Process reasoning model parameters (o1/o3 models)
    if self._is_reasoning_model(params.get("model")):
        self._process_reasoning_model_params(params)

    # Check if response_format is a Pydantic BaseModel
    response_format = params.get("response_format")
    use_parse = self._is_pydantic_model(response_format)

    # Call OpenAI API - use parse() for Pydantic models, create() otherwise
    if use_parse:
        # parse() doesn't support stream parameter - remove it if present
        parse_params = params.copy()
        parse_params.pop("stream", None)
        response = self.client.chat.completions.parse(**parse_params)
    else:
        response = self.client.chat.completions.create(**params)

    # Transform to UnifiedResponse
    return self._transform_response(response, params.get("model", "unknown"), use_parse=use_parse)

create_v1_compatible #

create_v1_compatible(params)

Create completion in backward-compatible ChatCompletionExtended format.

This method provides compatibility with existing AG2 code that expects ChatCompletionExtended format. Note that reasoning blocks and citations will be lost in this format.

PARAMETER DESCRIPTION
params

Same parameters as create()

TYPE: dict[str, Any]

RETURNS DESCRIPTION
Any

ChatCompletionExtended-compatible dict (flattened response)

Warning

This method loses information (reasoning blocks, citations) when converting to the legacy format. Prefer create() for new code.

Source code in autogen/llm_clients/openai_completions_client.py
def create_v1_compatible(self, params: dict[str, Any]) -> Any:
    """
    Create completion in backward-compatible ChatCompletionExtended format.

    This method provides compatibility with existing AG2 code that expects
    ChatCompletionExtended format. Note that reasoning blocks and citations
    will be lost in this format.

    Args:
        params: Same parameters as create()

    Returns:
        ChatCompletionExtended-compatible dict (flattened response)

    Warning:
        This method loses information (reasoning blocks, citations) when
        converting to the legacy format. Prefer create() for new code.
    """
    # Get rich response
    unified_response = self.create(params)

    # Convert to legacy format (simplified - would need full ChatCompletionExtended in practice)
    # Extract role and convert UserRoleEnum to string
    role = unified_response.messages[0].role if unified_response.messages else UserRoleEnum.ASSISTANT
    role_str = role.value if isinstance(role, UserRoleEnum) else role

    return {
        "id": unified_response.id,
        "model": unified_response.model,
        "created": unified_response.provider_metadata.get("created"),
        "object": "chat.completion",
        "choices": [
            {
                "index": 0,
                "message": {
                    "role": role_str,
                    "content": unified_response.text,
                },
                "finish_reason": unified_response.finish_reason,
            }
        ],
        "usage": unified_response.usage,
        "cost": unified_response.cost,
    }

cost #

cost(response)

Calculate cost from response usage.

Implements ModelClient.cost() but accepts UnifiedResponse via duck typing.

PARAMETER DESCRIPTION
response

UnifiedResponse with usage information

TYPE: UnifiedResponse

RETURNS DESCRIPTION
float

Cost in USD for the API call

Source code in autogen/llm_clients/openai_completions_client.py
def cost(self, response: UnifiedResponse) -> float:  # type: ignore[override]
    """
    Calculate cost from response usage.

    Implements ModelClient.cost() but accepts UnifiedResponse via duck typing.

    Args:
        response: UnifiedResponse with usage information

    Returns:
        Cost in USD for the API call
    """
    if not response.usage:
        return 0.0

    model = response.model
    prompt_tokens = response.usage.get("prompt_tokens", 0)
    completion_tokens = response.usage.get("completion_tokens", 0)

    # Find pricing for model (exact match or prefix)
    pricing = None
    for model_key in self._cost_per_token:
        if model.startswith(model_key):
            pricing = self._cost_per_token[model_key]
            break

    if not pricing:
        # Unknown model - use default pricing (GPT-4 Turbo level, per million tokens)
        pricing = {"prompt": 10.00 / 1_000_000, "completion": 30.00 / 1_000_000}

    return (prompt_tokens * pricing["prompt"]) + (completion_tokens * pricing["completion"])

get_usage staticmethod #

get_usage(response)

Extract usage statistics from response.

Implements ModelClient.get_usage() but accepts UnifiedResponse via duck typing.

PARAMETER DESCRIPTION
response

UnifiedResponse from create()

TYPE: UnifiedResponse

RETURNS DESCRIPTION
dict[str, Any]

Dict with keys from RESPONSE_USAGE_KEYS

Source code in autogen/llm_clients/openai_completions_client.py
@staticmethod
def get_usage(response: UnifiedResponse) -> dict[str, Any]:  # type: ignore[override]
    """
    Extract usage statistics from response.

    Implements ModelClient.get_usage() but accepts UnifiedResponse via duck typing.

    Args:
        response: UnifiedResponse from create()

    Returns:
        Dict with keys from RESPONSE_USAGE_KEYS
    """
    return {
        "prompt_tokens": response.usage.get("prompt_tokens", 0),
        "completion_tokens": response.usage.get("completion_tokens", 0),
        "total_tokens": response.usage.get("total_tokens", 0),
        "cost": response.cost or 0.0,
        "model": response.model,
    }

message_retrieval #

message_retrieval(response)

Retrieve messages from response in OpenAI-compatible format.

Returns list of strings for text-only messages, or list of dicts when tool calls, function calls, or complex content is present.

This matches the behavior of the legacy OpenAIClient which returns: - Strings for simple text responses - ChatCompletionMessage objects (as dicts) when tool_calls/function_call present

The returned dicts follow OpenAI's ChatCompletion message format: { "role": "assistant", "content": "text content or None", "tool_calls": [{"id": "...", "type": "function", "function": {"name": "...", "arguments": "..."}}], "name": "agent_name" (optional) }

PARAMETER DESCRIPTION
response

UnifiedResponse from create()

TYPE: UnifiedResponse

RETURNS DESCRIPTION
list[str] | list[dict[str, Any]]

List of strings (for text-only) OR list of message dicts (for tool calls/complex content)

Source code in autogen/llm_clients/openai_completions_client.py
def message_retrieval(self, response: UnifiedResponse) -> list[str] | list[dict[str, Any]]:  # type: ignore[override]
    """
    Retrieve messages from response in OpenAI-compatible format.

    Returns list of strings for text-only messages, or list of dicts when
    tool calls, function calls, or complex content is present.

    This matches the behavior of the legacy OpenAIClient which returns:
    - Strings for simple text responses
    - ChatCompletionMessage objects (as dicts) when tool_calls/function_call present

    The returned dicts follow OpenAI's ChatCompletion message format:
    {
        "role": "assistant",
        "content": "text content or None",
        "tool_calls": [{"id": "...", "type": "function", "function": {"name": "...", "arguments": "..."}}],
        "name": "agent_name" (optional)
    }

    Args:
        response: UnifiedResponse from create()

    Returns:
        List of strings (for text-only) OR list of message dicts (for tool calls/complex content)
    """
    result: list[str] | list[dict[str, Any]] = []

    for msg in response.messages:
        # Check for tool calls
        tool_calls = msg.get_tool_calls()

        # Check for complex/multimodal content that needs dict format
        has_complex_content = any(
            isinstance(block, (ImageContent, AudioContent, VideoContent)) for block in msg.content
        )

        if tool_calls or has_complex_content:
            # Return OpenAI-compatible dict format
            message_dict: dict[str, Any] = {
                "role": msg.role.value if hasattr(msg.role, "value") else msg.role,
                "content": msg.get_text() or None,
            }

            # Add optional fields
            if msg.name:
                message_dict["name"] = msg.name

            # Add tool calls in OpenAI format
            if tool_calls:
                message_dict["tool_calls"] = [
                    {"id": tc.id, "type": "function", "function": {"name": tc.name, "arguments": tc.arguments}}
                    for tc in tool_calls
                ]

            # Handle multimodal content - convert to OpenAI content array format
            if has_complex_content:
                message_dict["content"] = self._convert_to_openai_content_array(msg)

            result.append(message_dict)
        else:
            # Simple text content - return string
            result.append(msg.get_text())

    return result