Using ReliableTool to Generate Sub-Questions (Synchronous Version)#

This notebook demonstrates how to use the ReliableTool synchronously.

import logging
from typing import Annotated

from autogen import LLMConfig, config_list_from_json
from autogen.tools.experimental.reliable import ReliableTool

# Configure logging
# Set level to DEBUG to see more internal details if needed
# logging.basicConfig(level=logging.DEBUG, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
logger = logging.getLogger(__name__)

print("Successfully imported components from autogen.")

1. Define the Core Functions#

def generate_sub_questions_list(
    sub_questions: Annotated[list[str], "A list of sub-questions related to the main question."],
) -> list[str]:
    """
    Receives and returns a list of generated sub-questions.
    """
    logger.info(f"Core function received sub_questions: {sub_questions}")
    return sub_questions

def grade_sub_question(
    grade: Annotated[int, "A grade for the sub_question."],
    grade_justification: Annotated[str, "A justification for the grade for the sub_question."],
) -> tuple[int, str]:
    """
    Used to grade a sub question.
    """
    logger.info(f"Core function received grade: {grade}")
    # Note, the left return value is just the grade!
    return grade, f"Grade: {grade} \n Grade Justification: \n {grade_justification}"

2. Configure LLM and ReliableTool#

llm_config: LLMConfig | None = None
try:
    config_list = config_list_from_json(
        "OAI_CONFIG_LIST",
    )
    llm_config = LLMConfig(
        config_list=config_list,
        temperature=0.7,
    )
    print(f"Using LLM config: {config_list[0].get('base_url', 'Default Endpoint')}, Model: {config_list[0]['model']}")
except Exception as e_config:
    print(f"Error creating LLM config: {e_config}. Tools cannot be initialized.")
    llm_config = None

sub_question_system_message_addition_for_tool_calling = "You are an assistant that helps break down questions. Your goal is to generate exactly 3 relevant sub-questions based on the main task. Use the provided `generate_sub_questions_list` tool to output the list you generate. Provide the generated list as the 'sub_questions' argument."
sub_question_system_message_addition_for_result_validation = """You are a quality control assistant. Your task is to validate the output received, which should be a list of sub-questions generated by another assistant.

**Validation Criteria:**
1.  **Correct Format:** The output MUST be a list of strings.
2.  **Correct Quantity:** The list MUST contain exactly 3 sub-questions.
3.  **Relevance:** Each sub-question MUST be clearly relevant to the original main question described in the initial task.
"""

sub_question_tool: ReliableTool | None = None
if llm_config:
    try:
        sub_question_tool = ReliableTool(
            name="SubQuestionGenerator",
            func_or_tool=generate_sub_questions_list,
            description="Reliably generates exactly 3 relevant sub-questions for a given main question.",
            runner_llm_config=llm_config,
            validator_llm_config=llm_config,
            system_message_addition_for_tool_calling=sub_question_system_message_addition_for_tool_calling,
            system_message_addition_for_result_validation=sub_question_system_message_addition_for_result_validation,
            max_tool_invocations=5,
        )
        print("Sub Question ReliableTool instance created successfully.")
    except Exception as e:
        print(f"Error creating Sub Question ReliableTool: {e}")
        logger.error("Failed to instantiate Sub Question ReliableTool", exc_info=True)
else:
    print("LLM Configuration not loaded. Cannot create Sub Question ReliableTool.")

grader_system_message_addition_for_tool_calling = """You are an AI Evaluator. Assess the provided Sub Question based on its relevance and clarity in the context of the main Question.

Assign an integer score (0-100 inclusive) reflecting how well the Sub Question relates to and logically follows from the main Question.
- 0 = Completely irrelevant or nonsensical Sub Question.
- 100 = Perfectly relevant, clear, and logical Sub Question.

Provide a BRIEF justification explaining your score.

**Input & Expected Output Examples:**

1.  **Input:**
    Question: Explain the process of photosynthesis in plants.
    Sub Question: What are the primary reactants required for photosynthesis?
    **Example Output:**
    Score: 100, Justification: "Directly relevant, clear, and asks about a key component of the main question."

2.  **Input:**
    Question: Discuss the major causes leading to the outbreak of World War II in Europe.
    Sub Question: How did the Treaty of Versailles contribute to the tensions?
    **Example Output:**
    Score: 95, Justification: "Highly relevant sub-topic that directly addresses a major cause mentioned in the main question."

3.  **Input:**
    Question: Analyze the main themes in George Orwell's "1984".
    Sub Question: What is the population of London?
    **Example Output:**
    Score: 10, Justification: "Sub Question is fact-based but almost entirely irrelevant to analyzing themes in the novel."

4.  **Input:**
    Question: Solve the system: 2x+y=5, x-y=1.
    Sub Question: Who was the first US president?
    **Example Output:**
    Score: 0, Justification: "Completely irrelevant to the mathematical problem posed in the main question."

5.  **Input:**
    Question: Describe the geography and climate of Brazil.
    Sub Question: Talk about cities.
    **Example Output:**
    Score: 50, Justification: "Relevant topic (cities are part of geography) but the sub-question is very vague and lacks focus."

Now, evaluate the provided Sub Question based on the main Question, following this format and reasoning."""

grader_system_message_addition_for_result_validation = (
    """Validate that the justification for the grade is clear and relevant."""
)

grade_tool: ReliableTool | None = None
if llm_config:
    try:
        grade_tool = ReliableTool(
            name="SubQuestionGrader",
            func_or_tool=grade_sub_question,
            description="Grades a sub question in reference to a task.",
            runner_llm_config=llm_config,
            validator_llm_config=llm_config,
            system_message_addition_for_tool_calling=grader_system_message_addition_for_tool_calling,
            system_message_addition_for_result_validation=grader_system_message_addition_for_result_validation,
            max_tool_invocations=5,
        )
        print("Grade ReliableTool instance created successfully.")
    except Exception as e:
        print(f"Error creating Grade ReliableTool: {e}")
        logger.error("Failed to instantiate Grade ReliableTool", exc_info=True)
else:
    print("LLM Configuration not loaded. Cannot create Grade ReliableTool.")

3. Get User Input and Run the Tool Synchronously#

"""Gets user input and runs the ReliableTool synchronously."""

main_question = "How does photosynthesis work in plants?"  # Example question

sub_questions: list[str] | None = sub_question_tool.run(
    task=f"Generate exactly 3 relevant sub-questions for the main question: '{main_question}'"
)

graded_sub_questions = []
for sub_question in sub_questions:
    grade = grade_tool.run(task=f"Question: {main_question}\n Sub Question: {sub_question}")
    graded_sub_questions.append((sub_question, grade))

logger.info("\n✅ Successfully generated sub-questions:")
for i, (sub_question, grade) in enumerate(graded_sub_questions):
    if grade > 50:
        logger.info(f"   {i + 1}. ✅ Grade: {grade} - {sub_question}")
    else:
        logger.info(f"   {i + 1}. ❌ Grade: {grade} - {sub_question}")