Skip to content

threshold

autogen.beta.eval.scorers.threshold.threshold #

threshold(scorer, *, at_least=None, at_most=None, key=None)

Wrap a numeric scorer into a Pass/Fail gate.

PARAMETER DESCRIPTION
scorer

The scorer to gate. Its numeric feedback is converted to a boolean pass/fail; already-boolean, categorical, and no-signal feedback passes through unchanged.

TYPE: Scorer

at_least

Inclusive lower bound — pass requires score >= at_least.

TYPE: float | None DEFAULT: None

at_most

Inclusive upper bound — pass requires score <= at_most.

TYPE: float | None DEFAULT: None

key

Feedback key for the gate. Defaults to the source feedback's key (the column simply becomes pass/fail).

TYPE: str | None DEFAULT: None

At least one of at_least / at_most must be set. A numeric feedback becomes Feedback(score=<pass bool>, detail={"score": <n>, ...}) (the raw number is kept in detail); a None score (ungradeable) becomes False (fail).

Source code in autogen/beta/eval/scorers/threshold.py
def threshold(
    scorer: Scorer,
    *,
    at_least: float | None = None,
    at_most: float | None = None,
    key: str | None = None,
) -> Scorer:
    """Wrap a numeric ``scorer`` into a Pass/Fail gate.

    Args:
        scorer: The scorer to gate. Its numeric feedback is converted to a boolean
            pass/fail; already-boolean, categorical, and no-signal feedback passes
            through unchanged.
        at_least: Inclusive lower bound — pass requires ``score >= at_least``.
        at_most: Inclusive upper bound — pass requires ``score <= at_most``.
        key: Feedback key for the gate. Defaults to the source feedback's key (the
            column simply becomes pass/fail).

    At least one of ``at_least`` / ``at_most`` must be set. A numeric feedback becomes
    ``Feedback(score=<pass bool>, detail={"score": <n>, ...})`` (the raw number is kept in
    ``detail``); a ``None`` score (ungradeable) becomes ``False`` (fail).
    """
    if at_least is None and at_most is None:
        raise ValueError("threshold(): set at_least and/or at_most")

    async def _threshold(
        inputs: dict[str, Any],
        outputs: dict[str, Any],
        reference_outputs: dict[str, Any] | None,
        trace: Trace,
        task: Task,
    ) -> list[Feedback]:
        feedbacks = await scorer(
            inputs=inputs,
            outputs=outputs,
            reference_outputs=reference_outputs,
            trace=trace,
            task=task,
        )
        return [_gate(fb, at_least, at_most, key) for fb in feedbacks]

    return Scorer(_threshold, key=key or scorer.key)