Skip to content

DocumentInput

autogen.beta.events.input_events.DocumentInput #

DocumentInput(url: str) -> DocumentUrlInput
DocumentInput(*, file_id: str, filename: str | None = None) -> FileIdInput
DocumentInput(*, data: bytes, media_type: DocumentMediaType) -> BinaryInput
DocumentInput(*, path: str | PathLike[str], media_type: DocumentMediaType | None = None) -> BinaryInput
DocumentInput(url=None, *, file_id=None, filename=None, data=None, media_type=None, path=None)

Factory for creating document input events.

Usage

DocumentInput("https://example.com/doc.pdf") # URL DocumentInput(file_id="file-abc123") # pre-uploaded file DocumentInput(data=raw_bytes, media_type="application/pdf") # raw binary DocumentInput(path="report.pdf") # local file

Source code in autogen/beta/events/input_events.py
def DocumentInput(  # noqa: N802
    url: str | None = None,
    *,
    file_id: str | None = None,
    filename: str | None = None,
    data: bytes | None = None,
    media_type: DocumentMediaType | None = None,
    path: str | PathLike[str] | None = None,
) -> DocumentUrlInput | FileIdInput | BinaryInput:
    """Factory for creating document input events.

    Usage:
        DocumentInput("https://example.com/doc.pdf")               # URL
        DocumentInput(file_id="file-abc123")                       # pre-uploaded file
        DocumentInput(data=raw_bytes, media_type="application/pdf")  # raw binary
        DocumentInput(path="report.pdf")                            # local file
    """
    if url is not None:
        return DocumentUrlInput(url)

    if file_id is not None:
        return FileIdInput(file_id, filename=filename)

    if path is not None:
        p = Path(path)
        suffix = p.suffix.lower()
        resolved_type = _DOC_EXTENSION_TO_MEDIA_TYPE.get(suffix)

        if resolved_type is None:
            if media_type is None:
                raise ValueError(
                    f"Cannot infer document media type from extension '{suffix}'. Provide 'media_type' explicitly."
                )

            resolved_type = media_type

        return BinaryInput(
            p.read_bytes(),
            media_type=resolved_type,
            vendor_metadata={"filename": p.name},
            kind=BinaryType.DOCUMENT,
        )

    if data is not None:
        if media_type is None:
            raise ValueError("'media_type' is required when using 'data'")
        return BinaryInput(
            data,
            media_type=media_type,
            kind=BinaryType.DOCUMENT,
        )

    raise ValueError("DocumentInput() requires one of: 'url', 'file_id', 'data' + 'media_type', or 'path'")