RealTimeConfig(model, *, output=None, input=None, max_output_tokens=None, tool_choice=None, tracing=None, session=None, client=None)
Bases: RealtimeConfig
Realtime STT config backed by OpenAI's bidirectional realtime API.
Implements the RealtimeConfig protocol — call session(...) to open a connection that pumps captured audio into the API and emits transcription events on the supplied context.
Source code in autogen/beta/live/openai.py
| def __init__(
self,
model: "ModelName | str",
*,
output: AudioOutput | TextOutput | None = None,
input: InputConfig | None = None,
max_output_tokens: int | Literal["inf"] | None = None,
tool_choice: RealtimeToolChoiceConfigParam | None = None,
tracing: RealtimeTracingConfigParam | None = None,
session: RealtimeSessionCreateRequestParam | None = None,
client: AsyncOpenAI | None = None,
) -> None:
self.model = model
if output is None:
output = AudioOutput()
if input is None:
input = InputConfig()
self._session: RealtimeSessionCreateRequestParam = {"type": "realtime"}
if max_output_tokens is not None:
self._session["max_output_tokens"] = max_output_tokens
if tool_choice is not None:
self._session["tool_choice"] = tool_choice
if tracing is not None:
self._session["tracing"] = tracing
audio_config: RealtimeAudioConfigParam = {}
modality: Literal["text", "audio"]
if isinstance(output, AudioOutput):
modality = "audio"
audio_config["output"] = RealtimeAudioConfigOutputParam(
voice=output.voice,
format=output.format,
speed=output.speed,
)
else:
modality = "text"
input_param: RealtimeAudioConfigInputParam = {
"format": input.format,
"turn_detection": input.turn_detection,
}
if input.transcription is not None:
input_param["transcription"] = input.transcription
if input.noise_reduction is not None:
input_param["noise_reduction"] = input.noise_reduction
audio_config["input"] = input_param
self._session["audio"] = audio_config
self._session["output_modalities"] = [modality]
self._session_overrides: RealtimeSessionCreateRequestParam = session or {"type": "realtime"}
self.client = client or AsyncOpenAI()
|
client instance-attribute
client = client or AsyncOpenAI()
session async
session(context, *, instructions=(), tools=(), serializer)
Source code in autogen/beta/live/openai.py
| @asynccontextmanager
async def session(
self,
context: ConversationContext,
*,
instructions: Iterable[str] = (),
tools: Iterable[ToolSchema] = (),
serializer: SerializerProto,
) -> AsyncIterator[None]:
final_session = self._build_session(instructions=instructions, tools=tools)
async with self.client.realtime.connect(model=self.model) as conn:
await conn.session.update(session=final_session)
async def _pump_audio(event: RecordedAudioEvent) -> None:
await conn.input_audio_buffer.append(audio=base64.b64encode(event.content).decode())
async def _forward_tool_result(event: ToolResultEvent) -> None:
await _send_tool_result(conn, event, serializer)
with (
context.stream.where(RecordedAudioEvent).sub_scope(_pump_audio),
context.stream.where(ToolResultEvent).sub_scope(_forward_tool_result),
):
recv_task = asyncio.create_task(_pump_events(conn, context))
try:
yield
finally:
recv_task.cancel()
with suppress(asyncio.CancelledError):
await recv_task
|