Skip to content

SafeguardEnforcer

autogen.agentchat.group.safeguards.enforcer.SafeguardEnforcer #

SafeguardEnforcer(policy, safeguard_llm_config=None, mask_llm_config=None)

Main safeguard enforcer - executes safeguard policies

Initialize the safeguard enforcer.

PARAMETER DESCRIPTION
policy

Safeguard policy dict or path to JSON file

TYPE: dict[str, Any] | str

safeguard_llm_config

LLM configuration for safeguard checks

TYPE: LLMConfig | dict[str, Any] | None DEFAULT: None

mask_llm_config

LLM configuration for masking

TYPE: LLMConfig | dict[str, Any] | None DEFAULT: None

Source code in autogen/agentchat/group/safeguards/enforcer.py
def __init__(
    self,
    policy: dict[str, Any] | str,
    safeguard_llm_config: LLMConfig | dict[str, Any] | None = None,
    mask_llm_config: LLMConfig | dict[str, Any] | None = None,
):
    """Initialize the safeguard enforcer.

    Args:
        policy: Safeguard policy dict or path to JSON file
        safeguard_llm_config: LLM configuration for safeguard checks
        mask_llm_config: LLM configuration for masking
    """
    self.policy = self._load_policy(policy)
    self.safeguard_llm_config = safeguard_llm_config
    self.mask_llm_config = mask_llm_config

    # Validate policy format before proceeding
    self._validate_policy()

    # Create mask agent for content masking
    if self.mask_llm_config:
        from ...conversable_agent import ConversableAgent

        self.mask_agent = ConversableAgent(
            name="mask_agent",
            system_message="You are a agent responsible for masking sensitive information.",
            llm_config=self.mask_llm_config,
            human_input_mode="NEVER",
            max_consecutive_auto_reply=1,
        )

    # Parse safeguard rules
    self.inter_agent_rules = self._parse_inter_agent_rules()
    self.environment_rules = self._parse_environment_rules()

    # Send load event
    self._send_safeguard_event(
        event_type="load",
        message=f"Loaded {len(self.inter_agent_rules)} inter-agent and {len(self.environment_rules)} environment safeguard rules",
    )

policy instance-attribute #

policy = _load_policy(policy)

safeguard_llm_config instance-attribute #

safeguard_llm_config = safeguard_llm_config

mask_llm_config instance-attribute #

mask_llm_config = mask_llm_config

mask_agent instance-attribute #

mask_agent = ConversableAgent(name='mask_agent', system_message='You are a agent responsible for masking sensitive information.', llm_config=mask_llm_config, human_input_mode='NEVER', max_consecutive_auto_reply=1)

inter_agent_rules instance-attribute #

inter_agent_rules = _parse_inter_agent_rules()

environment_rules instance-attribute #

environment_rules = _parse_environment_rules()

create_agent_hooks #

create_agent_hooks(agent_name)

Create hook functions for a specific agent, only for rule types that exist.

Source code in autogen/agentchat/group/safeguards/enforcer.py
def create_agent_hooks(self, agent_name: str) -> dict[str, Callable[..., Any]]:
    """Create hook functions for a specific agent, only for rule types that exist."""
    hooks = {}

    # Check if we have any tool interaction rules that apply to this agent
    agent_tool_rules = [
        rule
        for rule in self.environment_rules
        if rule["type"] == "tool_interaction"
        and (
            rule.get("message_destination") == agent_name
            or rule.get("message_source") == agent_name
            or rule.get("agent_name") == agent_name
            or "message_destination" not in rule
        )
    ]  # Simple pattern rules apply to all

    if agent_tool_rules:

        def tool_input_hook(tool_input: dict[str, Any]) -> dict[str, Any] | None:
            result = self._check_tool_interaction(agent_name, tool_input, "input")
            return result if result is not None else tool_input

        def tool_output_hook(tool_input: dict[str, Any]) -> dict[str, Any] | None:
            result = self._check_tool_interaction(agent_name, tool_input, "output")
            return result if result is not None else tool_input

        hooks["safeguard_tool_inputs"] = tool_input_hook
        hooks["safeguard_tool_outputs"] = tool_output_hook

    # Check if we have any LLM interaction rules that apply to this agent
    agent_llm_rules = [
        rule
        for rule in self.environment_rules
        if rule["type"] == "llm_interaction"
        and (
            rule.get("message_destination") == agent_name
            or rule.get("message_source") == agent_name
            or rule.get("agent_name") == agent_name
            or "message_destination" not in rule
        )
    ]  # Simple pattern rules apply to all

    if agent_llm_rules:

        def llm_input_hook(tool_input: dict[str, Any]) -> dict[str, Any] | None:
            # Extract messages from the data structure if needed
            messages = tool_input if isinstance(tool_input, list) else tool_input.get("messages", tool_input)
            result = self._check_llm_interaction(agent_name, messages, "input")
            if isinstance(result, list) and isinstance(tool_input, dict) and "messages" in tool_input:
                return {**tool_input, "messages": result}
            elif isinstance(result, dict):
                return result
            elif result is not None and not isinstance(result, dict):
                # Convert string or other types to dict format
                return {"content": str(result), "role": "function"}
            elif result is not None and isinstance(result, dict) and result != tool_input:
                # Return the modified dict result
                return result
            return tool_input

        def llm_output_hook(tool_input: dict[str, Any]) -> dict[str, Any] | None:
            result = self._check_llm_interaction(agent_name, tool_input, "output")
            if isinstance(result, dict):
                return result
            elif result is not None and not isinstance(result, dict):
                # Convert string or other types to dict format
                return {"content": str(result), "role": "function"}
            elif result is not None and isinstance(result, dict) and result != tool_input:
                # Return the modified dict result
                return result
            return tool_input

        hooks["safeguard_llm_inputs"] = llm_input_hook
        hooks["safeguard_llm_outputs"] = llm_output_hook

    # Check if we have any user interaction rules that apply to this agent
    agent_user_rules = [
        rule
        for rule in self.environment_rules
        if rule["type"] == "user_interaction" and rule.get("message_destination") == agent_name
    ]

    if agent_user_rules:

        def human_input_hook(tool_input: dict[str, Any]) -> dict[str, Any] | None:
            # Extract human input from data structure
            human_input = tool_input.get("content", str(tool_input))
            result = self._check_user_interaction(agent_name, human_input)
            if result != human_input and isinstance(tool_input, dict):
                return {**tool_input, "content": result}
            return tool_input if result == human_input else {"content": result}

        hooks["safeguard_human_inputs"] = human_input_hook

    # Check if we have any inter-agent rules that apply to this agent
    # Note: For group chats, inter-agent communication is handled by GroupChat._run_inter_agent_guardrails()
    # But for direct agent-to-agent communication, we need the process_message_before_send hook
    agent_inter_rules = [
        rule
        for rule in self.inter_agent_rules
        if (
            rule.get("source") == agent_name
            or rule.get("target") == agent_name
            or rule.get("source") == "*"
            or rule.get("target") == "*"
        )
    ]

    if agent_inter_rules:

        def message_before_send_hook(
            sender: Any, message: dict[str, Any] | str, recipient: Any, silent: Any = None
        ) -> dict[str, Any] | str:
            _ = silent  # Unused parameter
            result = self._check_inter_agent_communication(sender.name, recipient.name, message)
            return result

        hooks["process_message_before_send"] = message_before_send_hook  # type: ignore[assignment]

    return hooks

check_and_act #

check_and_act(src_agent_name, dst_agent_name, message_content)

Check and act on inter-agent communication for GroupChat integration.

This method is called by GroupChat._run_inter_agent_guardrails to check messages between agents and potentially modify or block them.

PARAMETER DESCRIPTION
src_agent_name

Name of the source agent

TYPE: str

dst_agent_name

Name of the destination agent

TYPE: str

message_content

The message content to check

TYPE: str | dict[str, Any]

RETURNS DESCRIPTION
str | dict[str, Any] | None

Optional replacement message if a safeguard triggers, None otherwise

Source code in autogen/agentchat/group/safeguards/enforcer.py
def check_and_act(
    self, src_agent_name: str, dst_agent_name: str, message_content: str | dict[str, Any]
) -> str | dict[str, Any] | None:
    """Check and act on inter-agent communication for GroupChat integration.

    This method is called by GroupChat._run_inter_agent_guardrails to check
    messages between agents and potentially modify or block them.

    Args:
        src_agent_name: Name of the source agent
        dst_agent_name: Name of the destination agent
        message_content: The message content to check

    Returns:
        Optional replacement message if a safeguard triggers, None otherwise
    """
    # Store original content for comparison
    original_content = (
        message_content.get("content", "") if isinstance(message_content, dict) else str(message_content)
    )

    result = self._check_inter_agent_communication(src_agent_name, dst_agent_name, message_content)

    if result != original_content:
        # Return the complete modified message structure to preserve tool_calls/tool_responses pairing
        return result

    return None