export_pairwise_cases(source_a, source_b, *, criteria, out, suite=None, seed=None)
Write a blinded JSONL labeling manifest for the paired traces.
One line per (task, criterion): {case_id, task_id, criterion, task_input, response_1, response_2, first_variant}. first_variant records which variant is Response 1 (de-blinding key — a labeling UI must not show it). A labeler adds preferred ("1"/"2"/"tie") per line; feed the result to :func:human_labels.
Source code in autogen/beta/eval/scorers/human_pairwise.py
| async def export_pairwise_cases(
source_a: TraceSource,
source_b: TraceSource,
*,
criteria: Iterable[str],
out: str,
suite: Suite | None = None,
seed: int | None = None,
) -> Path:
"""Write a blinded JSONL labeling manifest for the paired traces.
One line per (task, criterion): ``{case_id, task_id, criterion, task_input,
response_1, response_2, first_variant}``. ``first_variant`` records which
variant is Response 1 (de-blinding key — a labeling UI must not show it).
A labeler adds ``preferred`` ("1"/"2"/"tie") per line; feed the result to
:func:`human_labels`.
"""
criteria = list(criteria)
rng = random.Random(seed)
tasks_by_id = {task.task_id: task for task in suite} if suite is not None else {}
refs_a = [ref async for ref in source_a.list()]
b_by_task: dict[str, TraceRef] = {}
async for ref in source_b.list():
if ref.task_id is not None:
b_by_task[ref.task_id] = ref
lines: list[dict[str, Any]] = []
for ref_a in refs_a:
if ref_a.task_id is None or ref_a.task_id not in b_by_task:
continue
answer_a = _final_text(await source_a.load(ref_a))
answer_b = _final_text(await source_b.load(b_by_task[ref_a.task_id]))
task = tasks_by_id.get(ref_a.task_id) or Task(task_id=ref_a.task_id, inputs={})
first_variant = rng.choice(["a", "b"])
response_1, response_2 = (answer_a, answer_b) if first_variant == "a" else (answer_b, answer_a)
for criterion in criteria:
lines.append({
"case_id": f"{ref_a.task_id}::{criterion}",
"task_id": ref_a.task_id,
"criterion": criterion,
"task_input": task.inputs.get("input"),
"response_1": response_1,
"response_2": response_2,
"first_variant": first_variant,
})
path = Path(out)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("".join(json.dumps(line) + "\n" for line in lines), encoding="utf-8")
return path
|