Skip to content

SafeguardEnforcer

autogen.agentchat.group.safeguards.enforcer.SafeguardEnforcer #

SafeguardEnforcer(policy, safeguard_llm_config=None, mask_llm_config=None, groupchat_manager=None, agents=None)

Main safeguard enforcer - executes safeguard policies

Initialize the safeguard enforcer.

PARAMETER DESCRIPTION
policy

Safeguard policy dict or path to JSON file

TYPE: dict[str, Any] | str

safeguard_llm_config

LLM configuration for safeguard checks

TYPE: LLMConfig | dict[str, Any] | None DEFAULT: None

mask_llm_config

LLM configuration for masking

TYPE: LLMConfig | dict[str, Any] | None DEFAULT: None

groupchat_manager

GroupChat manager instance for group chat scenarios

TYPE: GroupChatManager | None DEFAULT: None

agents

List of conversable agents to apply safeguards to

TYPE: list[ConversableAgent] | None DEFAULT: None

Source code in autogen/agentchat/group/safeguards/enforcer.py
def __init__(
    self,
    policy: dict[str, Any] | str,
    safeguard_llm_config: LLMConfig | dict[str, Any] | None = None,
    mask_llm_config: LLMConfig | dict[str, Any] | None = None,
    groupchat_manager: GroupChatManager | None = None,
    agents: list[ConversableAgent] | None = None,
):
    """Initialize the safeguard enforcer.

    Args:
        policy: Safeguard policy dict or path to JSON file
        safeguard_llm_config: LLM configuration for safeguard checks
        mask_llm_config: LLM configuration for masking
        groupchat_manager: GroupChat manager instance for group chat scenarios
        agents: List of conversable agents to apply safeguards to
    """
    self.policy = self._load_policy(policy)
    self.safeguard_llm_config = safeguard_llm_config
    self.mask_llm_config = mask_llm_config
    self.groupchat_manager = groupchat_manager
    self.agents = agents
    self.group_tool_executor = None
    if self.groupchat_manager:
        for agent in self.groupchat_manager.groupchat.agents:
            if agent.name == "_Group_Tool_Executor":
                self.group_tool_executor = agent  # type: ignore[assignment]
                break

    # Validate policy format before proceeding
    self._validate_policy()

    # Create mask agent for content masking
    if self.mask_llm_config:
        from ...conversable_agent import ConversableAgent

        self.mask_agent = ConversableAgent(
            name="mask_agent",
            system_message="You are a agent responsible for masking sensitive information.",
            llm_config=self.mask_llm_config,
            human_input_mode="NEVER",
            max_consecutive_auto_reply=1,
        )

    # Parse safeguard rules
    self.inter_agent_rules = self._parse_inter_agent_rules()
    self.environment_rules = self._parse_environment_rules()

    # Send load event
    self._send_safeguard_event(
        event_type="load",
        message=f"Loaded {len(self.inter_agent_rules)} inter-agent and {len(self.environment_rules)} environment safeguard rules",
    )

policy instance-attribute #

policy = _load_policy(policy)

safeguard_llm_config instance-attribute #

safeguard_llm_config = safeguard_llm_config

mask_llm_config instance-attribute #

mask_llm_config = mask_llm_config

groupchat_manager instance-attribute #

groupchat_manager = groupchat_manager

agents instance-attribute #

agents = agents

group_tool_executor instance-attribute #

group_tool_executor = None

mask_agent instance-attribute #

mask_agent = ConversableAgent(name='mask_agent', system_message='You are a agent responsible for masking sensitive information.', llm_config=mask_llm_config, human_input_mode='NEVER', max_consecutive_auto_reply=1)

inter_agent_rules instance-attribute #

inter_agent_rules = _parse_inter_agent_rules()

environment_rules instance-attribute #

environment_rules = _parse_environment_rules()

create_agent_hooks #

create_agent_hooks(agent_name)

Create hook functions for a specific agent, only for rule types that exist.

Source code in autogen/agentchat/group/safeguards/enforcer.py
def create_agent_hooks(self, agent_name: str) -> dict[str, Callable[..., Any]]:
    """Create hook functions for a specific agent, only for rule types that exist."""
    hooks = {}

    # Check if we have any tool interaction rules that apply to this agent
    if agent_name == "_Group_Tool_Executor":
        # group tool executor is running all tools, so we need to check all tool interaction rules
        agent_tool_rules = [rule for rule in self.environment_rules if rule["type"] == "tool_interaction"]
    else:
        agent_tool_rules = [
            rule
            for rule in self.environment_rules
            if rule["type"] == "tool_interaction"
            and (
                rule.get("message_destination") == agent_name
                or rule.get("message_source") == agent_name
                or rule.get("agent_name") == agent_name
                or "message_destination" not in rule
            )
        ]
    if agent_tool_rules:

        def tool_input_hook(tool_input: dict[str, Any]) -> dict[str, Any] | None:
            result = self._check_tool_interaction(agent_name, tool_input, "input")
            return result if result is not None else tool_input

        def tool_output_hook(tool_input: dict[str, Any]) -> dict[str, Any] | None:
            result = self._check_tool_interaction(agent_name, tool_input, "output")
            return result if result is not None else tool_input

        hooks["safeguard_tool_inputs"] = tool_input_hook
        hooks["safeguard_tool_outputs"] = tool_output_hook

    # Check if we have any LLM interaction rules that apply to this agent
    agent_llm_rules = [
        rule
        for rule in self.environment_rules
        if rule["type"] == "llm_interaction"
        and (
            rule.get("message_destination") == agent_name
            or rule.get("message_source") == agent_name
            or rule.get("agent_name") == agent_name
            or "message_destination" not in rule
        )
    ]  # Simple pattern rules apply to all

    if agent_llm_rules:

        def llm_input_hook(tool_input: dict[str, Any]) -> dict[str, Any] | None:
            # Extract messages from the data structure if needed
            messages = tool_input if isinstance(tool_input, list) else tool_input.get("messages", tool_input)
            result = self._check_llm_interaction(agent_name, messages, "input")
            if isinstance(result, list) and isinstance(tool_input, dict) and "messages" in tool_input:
                return {**tool_input, "messages": result}
            elif isinstance(result, dict):
                return result
            elif result is not None and not isinstance(result, dict):
                # Convert string or other types to dict format
                return {"content": str(result), "role": "function"}
            elif result is not None and isinstance(result, dict) and result != tool_input:
                # Return the modified dict result
                return result
            return tool_input

        def llm_output_hook(tool_input: dict[str, Any]) -> dict[str, Any] | None:
            result = self._check_llm_interaction(agent_name, tool_input, "output")
            if isinstance(result, dict):
                return result
            elif result is not None and not isinstance(result, dict):
                # Convert string or other types to dict format
                return {"content": str(result), "role": "function"}
            elif result is not None and isinstance(result, dict) and result != tool_input:
                # Return the modified dict result
                return result
            return tool_input

        hooks["safeguard_llm_inputs"] = llm_input_hook
        hooks["safeguard_llm_outputs"] = llm_output_hook

    # Check if we have any user interaction rules that apply to this agent
    agent_user_rules = [
        rule
        for rule in self.environment_rules
        if rule["type"] == "user_interaction" and rule.get("message_destination") == agent_name
    ]

    if agent_user_rules:

        def human_input_hook(tool_input: dict[str, Any]) -> dict[str, Any] | None:
            # Extract human input from data structure
            human_input = tool_input.get("content", str(tool_input))
            result = self._check_user_interaction(agent_name, human_input)
            if result != human_input and isinstance(tool_input, dict):
                return {**tool_input, "content": result}
            return tool_input if result == human_input else {"content": result}

        hooks["safeguard_human_inputs"] = human_input_hook

    # Check if we have any inter-agent rules that apply to this agent
    # Note: For group chats, inter-agent communication is handled by GroupChat._run_inter_agent_guardrails()
    # But for direct agent-to-agent communication, we need the process_message_before_send hook
    agent_inter_rules = [
        rule
        for rule in self.inter_agent_rules
        if (
            rule.get("source") == agent_name
            or rule.get("target") == agent_name
            or rule.get("source") == "*"
            or rule.get("target") == "*"
        )
    ]

    if agent_inter_rules:

        def message_before_send_hook(
            sender: Any, message: dict[str, Any] | str, recipient: Any, silent: Any = None
        ) -> dict[str, Any] | str:
            _ = silent  # Unused parameter
            result = self._check_inter_agent_communication(sender.name, recipient.name, message)
            return result

        hooks["process_message_before_send"] = message_before_send_hook  # type: ignore[assignment]

    return hooks

check_and_act #

check_and_act(src_agent_name, dst_agent_name, message_content)

Check and act on inter-agent communication for GroupChat integration.

This method is called by GroupChat._run_inter_agent_guardrails to check messages between agents and potentially modify or block them.

PARAMETER DESCRIPTION
src_agent_name

Name of the source agent

TYPE: str

dst_agent_name

Name of the destination agent

TYPE: str

message_content

The message content to check

TYPE: str | dict[str, Any]

RETURNS DESCRIPTION
str | dict[str, Any] | None

Optional replacement message if a safeguard triggers, None otherwise

Source code in autogen/agentchat/group/safeguards/enforcer.py
def check_and_act(
    self, src_agent_name: str, dst_agent_name: str, message_content: str | dict[str, Any]
) -> str | dict[str, Any] | None:
    """Check and act on inter-agent communication for GroupChat integration.

    This method is called by GroupChat._run_inter_agent_guardrails to check
    messages between agents and potentially modify or block them.

    Args:
        src_agent_name: Name of the source agent
        dst_agent_name: Name of the destination agent
        message_content: The message content to check

    Returns:
        Optional replacement message if a safeguard triggers, None otherwise
    """
    # Handle GroupToolExecutor transparency for safeguards
    if src_agent_name == "_Group_Tool_Executor":
        actual_src_agent_name = self._resolve_tool_executor_source(src_agent_name, self.group_tool_executor)
    else:
        actual_src_agent_name = src_agent_name

    # Store original message for comparison
    original_message = message_content

    result = self._check_inter_agent_communication(actual_src_agent_name, dst_agent_name, message_content)

    # Check if the result is different from the original
    if result != original_message:
        return result

    return None