feat(memory): integrate memory system with context engine (#97)

## Changes ### New Context Type - Add MEMORY to ContextType enum for agent memory context - Create MemoryContext class with subtypes (working, episodic, semantic, procedural) - Factory methods: from_working_memory, from_episodic_memory, from_semantic_memory, from_procedural_memory ### Memory Context Source - MemoryContextSource service fetches relevant memories for context assembly - Configurable fetch limits per memory type - Parallel fetching from all memory types ### Agent Lifecycle Hooks - AgentLifecycleManager handles spawn, pause, resume, terminate events - spawn: Initialize working memory with optional initial state - pause: Create checkpoint of working memory - resume: Restore from checkpoint - terminate: Consolidate working memory to episodic memory - LifecycleHooks for custom extension points ### Context Engine Integration - Add memory_query parameter to assemble_context() - Add session_id and agent_type_id for memory scoping - Memory budget allocation (15% by default) - set_memory_source() for runtime configuration ### Tests - 48 new tests for MemoryContext, MemoryContextSource, and lifecycle hooks - All 108 memory-related tests passing - mypy and ruff checks passing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 03:49:22 +01:00
parent 0b24d4c6cc
commit 30e5c68304
13 changed files with 2509 additions and 6 deletions
--- a/backend/app/services/context/init.py
+++ b/backend/app/services/context/init.py
@@ -114,6 +114,8 @@ from .types import (
    ContextType,
    ConversationContext,
    KnowledgeContext,
+    MemoryContext,
+    MemorySubtype,
    MessageRole,
    SystemContext,
    TaskComplexity,
@@ -149,6 +151,8 @@ __all__ = [
    "FormattingError",
    "InvalidContextError",
    "KnowledgeContext",
+    "MemoryContext",
+    "MemorySubtype",
    "MessageRole",
    "ModelAdapter",
    "OpenAIAdapter",
--- a/backend/app/services/context/budget/allocator.py
+++ b/backend/app/services/context/budget/allocator.py
@@ -30,6 +30,7 @@ class TokenBudget:
    knowledge: int = 0
    conversation: int = 0
    tools: int = 0
+    memory: int = 0  # Agent memory (working, episodic, semantic, procedural)
    response_reserve: int = 0
    buffer: int = 0

@@ -60,6 +61,7 @@ class TokenBudget:
            "knowledge": self.knowledge,
            "conversation": self.conversation,
            "tool": self.tools,
+            "memory": self.memory,
        }
        return allocation_map.get(context_type, 0)

@@ -211,6 +213,7 @@ class TokenBudget:
                "knowledge": self.knowledge,
                "conversation": self.conversation,
                "tools": self.tools,
+                "memory": self.memory,
                "response_reserve": self.response_reserve,
                "buffer": self.buffer,
            },
@@ -264,9 +267,10 @@ class BudgetAllocator:
            total=total_tokens,
            system=int(total_tokens * alloc.get("system", 0.05)),
            task=int(total_tokens * alloc.get("task", 0.10)),
-            knowledge=int(total_tokens * alloc.get("knowledge", 0.40)),
-            conversation=int(total_tokens * alloc.get("conversation", 0.20)),
+            knowledge=int(total_tokens * alloc.get("knowledge", 0.30)),
+            conversation=int(total_tokens * alloc.get("conversation", 0.15)),
            tools=int(total_tokens * alloc.get("tools", 0.05)),
+            memory=int(total_tokens * alloc.get("memory", 0.15)),
            response_reserve=int(total_tokens * alloc.get("response", 0.15)),
            buffer=int(total_tokens * alloc.get("buffer", 0.05)),
        )
@@ -317,6 +321,8 @@ class BudgetAllocator:
            budget.conversation = max(0, budget.conversation + actual_adjustment)
        elif context_type == "tool":
            budget.tools = max(0, budget.tools + actual_adjustment)
+        elif context_type == "memory":
+            budget.memory = max(0, budget.memory + actual_adjustment)

        return budget

@@ -338,7 +344,7 @@ class BudgetAllocator:
            Rebalanced budget
        """
        if prioritize is None:
-            prioritize = [ContextType.KNOWLEDGE, ContextType.TASK, ContextType.SYSTEM]
+            prioritize = [ContextType.KNOWLEDGE, ContextType.MEMORY, ContextType.TASK, ContextType.SYSTEM]

        # Calculate unused tokens per type
        unused: dict[str, int] = {}
--- a/backend/app/services/context/engine.py
+++ b/backend/app/services/context/engine.py
@@ -7,6 +7,7 @@ Provides a high-level API for assembling optimized context for LLM requests.

 import logging
 from typing import TYPE_CHECKING, Any
+from uuid import UUID

 from .assembly import ContextPipeline
 from .budget import BudgetAllocator, TokenBudget, TokenCalculator
@@ -20,6 +21,7 @@ from .types import (
    BaseContext,
    ConversationContext,
    KnowledgeContext,
+    MemoryContext,
    MessageRole,
    SystemContext,
    TaskContext,
@@ -30,6 +32,7 @@ if TYPE_CHECKING:
    from redis.asyncio import Redis

    from app.services.mcp.client_manager import MCPClientManager
+    from app.services.memory.integration import MemoryContextSource

 logger = logging.getLogger(__name__)

@@ -64,6 +67,7 @@ class ContextEngine:
        mcp_manager: "MCPClientManager | None" = None,
        redis: "Redis | None" = None,
        settings: ContextSettings | None = None,
+        memory_source: "MemoryContextSource | None" = None,
    ) -> None:
        """
        Initialize the context engine.
@@ -72,9 +76,11 @@ class ContextEngine:
            mcp_manager: MCP client manager for LLM Gateway/Knowledge Base
            redis: Redis connection for caching
            settings: Context settings
+            memory_source: Optional memory context source for agent memory
        """
        self._mcp = mcp_manager
        self._settings = settings or get_context_settings()
+        self._memory_source = memory_source

        # Initialize components
        self._calculator = TokenCalculator(mcp_manager=mcp_manager)
@@ -115,6 +121,15 @@ class ContextEngine:
        """
        self._cache.set_redis(redis)

+    def set_memory_source(self, memory_source: "MemoryContextSource") -> None:
+        """
+        Set memory context source for agent memory integration.
+
+        Args:
+            memory_source: Memory context source
+        """
+        self._memory_source = memory_source
+
    async def assemble_context(
        self,
        project_id: str,
@@ -126,6 +141,10 @@ class ContextEngine:
        task_description: str | None = None,
        knowledge_query: str | None = None,
        knowledge_limit: int = 10,
+        memory_query: str | None = None,
+        memory_limit: int = 20,
+        session_id: str | None = None,
+        agent_type_id: str | None = None,
        conversation_history: list[dict[str, str]] | None = None,
        tool_results: list[dict[str, Any]] | None = None,
        custom_contexts: list[BaseContext] | None = None,
@@ -151,6 +170,10 @@ class ContextEngine:
            task_description: Current task description
            knowledge_query: Query for knowledge base search
            knowledge_limit: Max number of knowledge results
+            memory_query: Query for agent memory search
+            memory_limit: Max number of memory results
+            session_id: Session ID for working memory access
+            agent_type_id: Agent type ID for procedural memory
            conversation_history: List of {"role": str, "content": str}
            tool_results: List of tool results to include
            custom_contexts: Additional custom contexts
@@ -197,15 +220,27 @@ class ContextEngine:
            )
            contexts.extend(knowledge_contexts)

-        # 4. Conversation history
+        # 4. Memory context from Agent Memory System
+        if memory_query and self._memory_source:
+            memory_contexts = await self._fetch_memory(
+                project_id=project_id,
+                agent_id=agent_id,
+                query=memory_query,
+                limit=memory_limit,
+                session_id=session_id,
+                agent_type_id=agent_type_id,
+            )
+            contexts.extend(memory_contexts)
+
+        # 5. Conversation history
        if conversation_history:
            contexts.extend(self._convert_conversation(conversation_history))

-        # 5. Tool results
+        # 6. Tool results
        if tool_results:
            contexts.extend(self._convert_tool_results(tool_results))

-        # 6. Custom contexts
+        # 7. Custom contexts
        if custom_contexts:
            contexts.extend(custom_contexts)

@@ -308,6 +343,65 @@ class ContextEngine:
            logger.warning(f"Failed to fetch knowledge: {e}")
            return []

+    async def _fetch_memory(
+        self,
+        project_id: str,
+        agent_id: str,
+        query: str,
+        limit: int = 20,
+        session_id: str | None = None,
+        agent_type_id: str | None = None,
+    ) -> list[MemoryContext]:
+        """
+        Fetch relevant memories from Agent Memory System.
+
+        Args:
+            project_id: Project identifier
+            agent_id: Agent identifier
+            query: Search query
+            limit: Maximum results
+            session_id: Session ID for working memory
+            agent_type_id: Agent type ID for procedural memory
+
+        Returns:
+            List of MemoryContext instances
+        """
+        if not self._memory_source:
+            return []
+
+        try:
+            # Import here to avoid circular imports
+
+            # Configure fetch limits
+            from app.services.memory.integration.context_source import MemoryFetchConfig
+
+            config = MemoryFetchConfig(
+                working_limit=min(limit // 4, 5),
+                episodic_limit=min(limit // 2, 10),
+                semantic_limit=min(limit // 2, 10),
+                procedural_limit=min(limit // 4, 5),
+                include_working=session_id is not None,
+            )
+
+            result = await self._memory_source.fetch_context(
+                query=query,
+                project_id=UUID(project_id),
+                agent_instance_id=UUID(agent_id) if agent_id else None,
+                agent_type_id=UUID(agent_type_id) if agent_type_id else None,
+                session_id=session_id,
+                config=config,
+            )
+
+            logger.debug(
+                f"Fetched {len(result.contexts)} memory contexts for query: {query}, "
+                f"by_type: {result.by_type}"
+            )
+            return result.contexts[:limit]
+
+        except Exception as e:
+            logger.warning(f"Failed to fetch memory: {e}")
+            return []
+
    def _convert_conversation(
        self,
        history: list[dict[str, str]],
@@ -466,6 +560,7 @@ def create_context_engine(
    mcp_manager: "MCPClientManager | None" = None,
    redis: "Redis | None" = None,
    settings: ContextSettings | None = None,
+    memory_source: "MemoryContextSource | None" = None,
 ) -> ContextEngine:
    """
    Create a context engine instance.
@@ -474,6 +569,7 @@ def create_context_engine(
        mcp_manager: MCP client manager
        redis: Redis connection
        settings: Context settings
+        memory_source: Optional memory context source

    Returns:
        Configured ContextEngine instance
@@ -482,4 +578,5 @@ def create_context_engine(
        mcp_manager=mcp_manager,
        redis=redis,
        settings=settings,
+        memory_source=memory_source,
    )
--- a/backend/app/services/context/types/init.py
+++ b/backend/app/services/context/types/init.py
@@ -15,6 +15,10 @@ from .conversation import (
    MessageRole,
 )
 from .knowledge import KnowledgeContext
+from .memory import (
+    MemoryContext,
+    MemorySubtype,
+)
 from .system import SystemContext
 from .task import (
    TaskComplexity,
@@ -33,6 +37,8 @@ __all__ = [
    "ContextType",
    "ConversationContext",
    "KnowledgeContext",
+    "MemoryContext",
+    "MemorySubtype",
    "MessageRole",
    "SystemContext",
    "TaskComplexity",
--- a/backend/app/services/context/types/base.py
+++ b/backend/app/services/context/types/base.py
@@ -26,6 +26,7 @@ class ContextType(str, Enum):
    KNOWLEDGE = "knowledge"
    CONVERSATION = "conversation"
    TOOL = "tool"
+    MEMORY = "memory"  # Agent memory (working, episodic, semantic, procedural)

    @classmethod
    def from_string(cls, value: str) -> "ContextType":
--- a/backend/app/services/context/types/memory.py
+++ b/backend/app/services/context/types/memory.py
@@ -0,0 +1,282 @@
+"""
+Memory Context Type.
+
+Represents agent memory as context for LLM requests.
+Includes working, episodic, semantic, and procedural memories.
+"""
+
+from dataclasses import dataclass, field
+from datetime import UTC, datetime
+from enum import Enum
+from typing import Any
+
+from .base import BaseContext, ContextPriority, ContextType
+
+
+class MemorySubtype(str, Enum):
+    """Types of agent memory."""
+
+    WORKING = "working"  # Session-scoped temporary data
+    EPISODIC = "episodic"  # Task history and outcomes
+    SEMANTIC = "semantic"  # Facts and knowledge
+    PROCEDURAL = "procedural"  # Learned procedures
+
+
+@dataclass(eq=False)
+class MemoryContext(BaseContext):
+    """
+    Context from agent memory system.
+
+    Memory context represents data retrieved from the agent
+    memory system, including:
+    - Working memory: Current session state
+    - Episodic memory: Past task experiences
+    - Semantic memory: Learned facts and knowledge
+    - Procedural memory: Known procedures and workflows
+
+    Each memory item includes relevance scoring from search.
+    """
+
+    # Memory-specific fields
+    memory_subtype: MemorySubtype = field(default=MemorySubtype.EPISODIC)
+    memory_id: str | None = field(default=None)
+    relevance_score: float = field(default=0.0)
+    importance: float = field(default=0.5)
+    search_query: str = field(default="")
+
+    # Type-specific fields (populated based on memory_subtype)
+    key: str | None = field(default=None)  # For working memory
+    task_type: str | None = field(default=None)  # For episodic
+    outcome: str | None = field(default=None)  # For episodic
+    subject: str | None = field(default=None)  # For semantic
+    predicate: str | None = field(default=None)  # For semantic
+    object_value: str | None = field(default=None)  # For semantic
+    trigger: str | None = field(default=None)  # For procedural
+    success_rate: float | None = field(default=None)  # For procedural
+
+    def get_type(self) -> ContextType:
+        """Return MEMORY context type."""
+        return ContextType.MEMORY
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary with memory-specific fields."""
+        base = super().to_dict()
+        base.update(
+            {
+                "memory_subtype": self.memory_subtype.value,
+                "memory_id": self.memory_id,
+                "relevance_score": self.relevance_score,
+                "importance": self.importance,
+                "search_query": self.search_query,
+                "key": self.key,
+                "task_type": self.task_type,
+                "outcome": self.outcome,
+                "subject": self.subject,
+                "predicate": self.predicate,
+                "object_value": self.object_value,
+                "trigger": self.trigger,
+                "success_rate": self.success_rate,
+            }
+        )
+        return base
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "MemoryContext":
+        """Create MemoryContext from dictionary."""
+        return cls(
+            id=data.get("id", ""),
+            content=data["content"],
+            source=data["source"],
+            timestamp=datetime.fromisoformat(data["timestamp"])
+            if isinstance(data.get("timestamp"), str)
+            else data.get("timestamp", datetime.now(UTC)),
+            priority=data.get("priority", ContextPriority.NORMAL.value),
+            metadata=data.get("metadata", {}),
+            memory_subtype=MemorySubtype(data.get("memory_subtype", "episodic")),
+            memory_id=data.get("memory_id"),
+            relevance_score=data.get("relevance_score", 0.0),
+            importance=data.get("importance", 0.5),
+            search_query=data.get("search_query", ""),
+            key=data.get("key"),
+            task_type=data.get("task_type"),
+            outcome=data.get("outcome"),
+            subject=data.get("subject"),
+            predicate=data.get("predicate"),
+            object_value=data.get("object_value"),
+            trigger=data.get("trigger"),
+            success_rate=data.get("success_rate"),
+        )
+
+    @classmethod
+    def from_working_memory(
+        cls,
+        key: str,
+        value: Any,
+        source: str = "working_memory",
+        query: str = "",
+    ) -> "MemoryContext":
+        """
+        Create MemoryContext from working memory entry.
+
+        Args:
+            key: Working memory key
+            value: Value stored at key
+            source: Source identifier
+            query: Search query used
+
+        Returns:
+            MemoryContext instance
+        """
+        return cls(
+            content=str(value),
+            source=source,
+            memory_subtype=MemorySubtype.WORKING,
+            key=key,
+            relevance_score=1.0,  # Working memory is always relevant
+            importance=0.8,  # Higher importance for current session state
+            search_query=query,
+            priority=ContextPriority.HIGH.value,
+        )
+
+    @classmethod
+    def from_episodic_memory(
+        cls,
+        episode: Any,
+        query: str = "",
+    ) -> "MemoryContext":
+        """
+        Create MemoryContext from episodic memory episode.
+
+        Args:
+            episode: Episode object from episodic memory
+            query: Search query used
+
+        Returns:
+            MemoryContext instance
+        """
+        outcome_val = None
+        if hasattr(episode, "outcome") and episode.outcome:
+            outcome_val = (
+                episode.outcome.value
+                if hasattr(episode.outcome, "value")
+                else str(episode.outcome)
+            )
+
+        return cls(
+            content=episode.task_description,
+            source=f"episodic:{episode.id}",
+            memory_subtype=MemorySubtype.EPISODIC,
+            memory_id=str(episode.id),
+            relevance_score=getattr(episode, "importance_score", 0.5),
+            importance=getattr(episode, "importance_score", 0.5),
+            search_query=query,
+            task_type=getattr(episode, "task_type", None),
+            outcome=outcome_val,
+            metadata={
+                "session_id": getattr(episode, "session_id", None),
+                "occurred_at": episode.occurred_at.isoformat()
+                if hasattr(episode, "occurred_at") and episode.occurred_at
+                else None,
+                "lessons_learned": getattr(episode, "lessons_learned", []),
+            },
+        )
+
+    @classmethod
+    def from_semantic_memory(
+        cls,
+        fact: Any,
+        query: str = "",
+    ) -> "MemoryContext":
+        """
+        Create MemoryContext from semantic memory fact.
+
+        Args:
+            fact: Fact object from semantic memory
+            query: Search query used
+
+        Returns:
+            MemoryContext instance
+        """
+        triple = f"{fact.subject} {fact.predicate} {fact.object}"
+        return cls(
+            content=triple,
+            source=f"semantic:{fact.id}",
+            memory_subtype=MemorySubtype.SEMANTIC,
+            memory_id=str(fact.id),
+            relevance_score=getattr(fact, "confidence", 0.5),
+            importance=getattr(fact, "confidence", 0.5),
+            search_query=query,
+            subject=fact.subject,
+            predicate=fact.predicate,
+            object_value=fact.object,
+            priority=ContextPriority.NORMAL.value,
+        )
+
+    @classmethod
+    def from_procedural_memory(
+        cls,
+        procedure: Any,
+        query: str = "",
+    ) -> "MemoryContext":
+        """
+        Create MemoryContext from procedural memory procedure.
+
+        Args:
+            procedure: Procedure object from procedural memory
+            query: Search query used
+
+        Returns:
+            MemoryContext instance
+        """
+        # Format steps as content
+        steps = getattr(procedure, "steps", [])
+        steps_content = "\n".join(
+            f"  {i + 1}. {step.get('action', step) if isinstance(step, dict) else step}"
+            for i, step in enumerate(steps)
+        )
+        content = f"Procedure: {procedure.name}\nTrigger: {procedure.trigger_pattern}\nSteps:\n{steps_content}"
+
+        return cls(
+            content=content,
+            source=f"procedural:{procedure.id}",
+            memory_subtype=MemorySubtype.PROCEDURAL,
+            memory_id=str(procedure.id),
+            relevance_score=getattr(procedure, "success_rate", 0.5),
+            importance=0.7,  # Procedures are moderately important
+            search_query=query,
+            trigger=procedure.trigger_pattern,
+            success_rate=getattr(procedure, "success_rate", None),
+            metadata={
+                "steps_count": len(steps),
+                "execution_count": getattr(procedure, "success_count", 0)
+                + getattr(procedure, "failure_count", 0),
+            },
+        )
+
+    def is_working_memory(self) -> bool:
+        """Check if this is working memory."""
+        return self.memory_subtype == MemorySubtype.WORKING
+
+    def is_episodic_memory(self) -> bool:
+        """Check if this is episodic memory."""
+        return self.memory_subtype == MemorySubtype.EPISODIC
+
+    def is_semantic_memory(self) -> bool:
+        """Check if this is semantic memory."""
+        return self.memory_subtype == MemorySubtype.SEMANTIC
+
+    def is_procedural_memory(self) -> bool:
+        """Check if this is procedural memory."""
+        return self.memory_subtype == MemorySubtype.PROCEDURAL
+
+    def get_formatted_source(self) -> str:
+        """
+        Get a formatted source string for display.
+
+        Returns:
+            Formatted source string
+        """
+        parts = [f"[{self.memory_subtype.value}]", self.source]
+        if self.memory_id:
+            parts.append(f"({self.memory_id[:8]}...)")
+        return " ".join(parts)