fast-next-template/backend/app/services/context/adapters/claude.py

"""
Claude Model Adapter.

Provides Claude-specific context formatting using XML tags
which Claude models understand natively.
"""

from typing import Any, ClassVar

from ..types import BaseContext, ContextType
from .base import ModelAdapter


class ClaudeAdapter(ModelAdapter):
    """
    Claude-specific context formatting adapter.

    Claude models have native understanding of XML structure,
    so we use XML tags for clear delineation of context types.

    Features:
    - XML tags for each context type
    - Document structure for knowledge contexts
    - Role-based message formatting for conversations
    - Tool result wrapping with tool names
    """

    MODEL_PATTERNS: ClassVar[list[str]] = ["claude", "anthropic"]

    def format(
        self,
        contexts: list[BaseContext],
        **kwargs: Any,
    ) -> str:
        """
        Format contexts for Claude models.

        Uses XML tags for structured content that Claude
        understands natively.

        Args:
            contexts: List of contexts to format
            **kwargs: Additional formatting options

        Returns:
            XML-structured context string
        """
        if not contexts:
            return ""

        by_type = self.group_by_type(contexts)
        parts: list[str] = []

        for ct in self.get_type_order():
            if ct in by_type:
                formatted = self.format_type(by_type[ct], ct, **kwargs)
                if formatted:
                    parts.append(formatted)

        return self.get_separator().join(parts)

    def format_type(
        self,
        contexts: list[BaseContext],
        context_type: ContextType,
        **kwargs: Any,
    ) -> str:
        """
        Format contexts of a specific type for Claude.

        Args:
            contexts: List of contexts of the same type
            context_type: The type of contexts
            **kwargs: Additional formatting options

        Returns:
            XML-formatted string for this context type
        """
        if not contexts:
            return ""

        if context_type == ContextType.SYSTEM:
            return self._format_system(contexts)
        elif context_type == ContextType.TASK:
            return self._format_task(contexts)
        elif context_type == ContextType.KNOWLEDGE:
            return self._format_knowledge(contexts)
        elif context_type == ContextType.CONVERSATION:
            return self._format_conversation(contexts)
        elif context_type == ContextType.TOOL:
            return self._format_tool(contexts)

        # Fallback for any unhandled context types - still escape content
        # to prevent XML injection if new types are added without updating adapter
        return "\n".join(self._escape_xml_content(c.content) for c in contexts)

    def _format_system(self, contexts: list[BaseContext]) -> str:
        """Format system contexts."""
        # System prompts are typically admin-controlled, but escape for safety
        content = "\n\n".join(self._escape_xml_content(c.content) for c in contexts)
        return f"<system_instructions>\n{content}\n</system_instructions>"

    def _format_task(self, contexts: list[BaseContext]) -> str:
        """Format task contexts."""
        content = "\n\n".join(self._escape_xml_content(c.content) for c in contexts)
        return f"<current_task>\n{content}\n</current_task>"

    def _format_knowledge(self, contexts: list[BaseContext]) -> str:
        """
        Format knowledge contexts as structured documents.

        Each knowledge context becomes a document with source attribution.
        All content is XML-escaped to prevent injection attacks.
        """
        parts = ["<reference_documents>"]

        for ctx in contexts:
            source = self._escape_xml(ctx.source)
            # Escape content to prevent XML injection
            content = self._escape_xml_content(ctx.content)
            score = ctx.metadata.get("score", ctx.metadata.get("relevance_score", ""))

            if score:
                # Escape score to prevent XML injection via metadata
                escaped_score = self._escape_xml(str(score))
                parts.append(
                    f'<document source="{source}" relevance="{escaped_score}">'
                )
            else:
                parts.append(f'<document source="{source}">')

            parts.append(content)
            parts.append("</document>")

        parts.append("</reference_documents>")
        return "\n".join(parts)

    def _format_conversation(self, contexts: list[BaseContext]) -> str:
        """
        Format conversation contexts as message history.

        Uses role-based message tags for clear turn delineation.
        All content is XML-escaped to prevent prompt injection.
        """
        parts = ["<conversation_history>"]

        for ctx in contexts:
            role = self._escape_xml(ctx.metadata.get("role", "user"))
            # Escape content to prevent prompt injection via fake XML tags
            content = self._escape_xml_content(ctx.content)
            parts.append(f'<message role="{role}">')
            parts.append(content)
            parts.append("</message>")

        parts.append("</conversation_history>")
        return "\n".join(parts)

    def _format_tool(self, contexts: list[BaseContext]) -> str:
        """
        Format tool contexts as tool results.

        Each tool result is wrapped with the tool name.
        All content is XML-escaped to prevent injection.
        """
        parts = ["<tool_results>"]

        for ctx in contexts:
            tool_name = self._escape_xml(ctx.metadata.get("tool_name", "unknown"))
            status = ctx.metadata.get("status", "")

            if status:
                parts.append(
                    f'<tool_result name="{tool_name}" status="{self._escape_xml(status)}">'
                )
            else:
                parts.append(f'<tool_result name="{tool_name}">')

            # Escape content to prevent injection
            parts.append(self._escape_xml_content(ctx.content))
            parts.append("</tool_result>")

        parts.append("</tool_results>")
        return "\n".join(parts)

    @staticmethod
    def _escape_xml(text: str) -> str:
        """Escape XML special characters in attribute values."""
        return (
            text.replace("&", "&amp;")
            .replace("<", "&lt;")
            .replace(">", "&gt;")
            .replace('"', "&quot;")
            .replace("'", "&apos;")
        )

    @staticmethod
    def _escape_xml_content(text: str) -> str:
        """
        Escape XML special characters in element content.

        This prevents XML injection attacks where malicious content
        could break out of XML tags or inject fake tags for prompt injection.

        Only escapes &, <, > since quotes don't need escaping in content.

        Args:
            text: Content text to escape

        Returns:
            XML-safe content string
        """
        return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")