feat(memory): add database schema and storage layer (Issue #88)

Add SQLAlchemy models for the Agent Memory System: - WorkingMemory: Key-value storage with TTL for active sessions - Episode: Experiential memories from task executions - Fact: Semantic knowledge triples with confidence scores - Procedure: Learned skills and procedures with success tracking - MemoryConsolidationLog: Tracks consolidation jobs between memory tiers Create enums for memory system: - ScopeType: global, project, agent_type, agent_instance, session - EpisodeOutcome: success, failure, partial - ConsolidationType: working_to_episodic, episodic_to_semantic, etc. - ConsolidationStatus: pending, running, completed, failed Add Alembic migration (0005) for all memory tables with: - Foreign key relationships to projects, agent_instances, agent_types - Comprehensive indexes for query patterns - Unique constraints for key lookups and triple uniqueness - Vector embedding column placeholders (Text fallback until pgvector enabled) Fix timezone-naive datetime.now() in types.py TaskState (review feedback) Includes 30 unit tests for models and enums. Closes #88 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 01:37:58 +01:00
parent 085a748929
commit c9d8c0835c
14 changed files with 1383 additions and 7 deletions
--- a/backend/app/models/memory/init.py
+++ b/backend/app/models/memory/init.py
@@ -0,0 +1,32 @@
+# app/models/memory/__init__.py
+"""
+Memory System Database Models.
+
+Provides SQLAlchemy models for the Agent Memory System:
+- WorkingMemory: Key-value storage with TTL
+- Episode: Experiential memories
+- Fact: Semantic knowledge triples
+- Procedure: Learned skills
+- MemoryConsolidationLog: Consolidation job tracking
+"""
+
+from .consolidation import MemoryConsolidationLog
+from .enums import ConsolidationStatus, ConsolidationType, EpisodeOutcome, ScopeType
+from .episode import Episode
+from .fact import Fact
+from .procedure import Procedure
+from .working_memory import WorkingMemory
+
+__all__ = [
+    # Enums
+    "ConsolidationStatus",
+    "ConsolidationType",
+    # Models
+    "Episode",
+    "EpisodeOutcome",
+    "Fact",
+    "MemoryConsolidationLog",
+    "Procedure",
+    "ScopeType",
+    "WorkingMemory",
+]
--- a/backend/app/models/memory/consolidation.py
+++ b/backend/app/models/memory/consolidation.py
@@ -0,0 +1,72 @@
+# app/models/memory/consolidation.py
+"""
+Memory Consolidation Log database model.
+
+Tracks memory consolidation jobs that transfer knowledge
+between memory tiers.
+"""
+
+from sqlalchemy import Column, DateTime, Enum, Index, Integer, Text
+
+from app.models.base import Base, TimestampMixin, UUIDMixin
+
+from .enums import ConsolidationStatus, ConsolidationType
+
+
+class MemoryConsolidationLog(Base, UUIDMixin, TimestampMixin):
+    """
+    Memory consolidation job log.
+
+    Tracks consolidation operations:
+    - Working -> Episodic (session end)
+    - Episodic -> Semantic (fact extraction)
+    - Episodic -> Procedural (procedure learning)
+    - Pruning (removing low-value memories)
+    """
+
+    __tablename__ = "memory_consolidation_log"
+
+    # Consolidation type
+    consolidation_type: Column[ConsolidationType] = Column(
+        Enum(ConsolidationType),
+        nullable=False,
+        index=True,
+    )
+
+    # Counts
+    source_count = Column(Integer, nullable=False, default=0)
+    result_count = Column(Integer, nullable=False, default=0)
+
+    # Timing
+    started_at = Column(DateTime(timezone=True), nullable=False)
+    completed_at = Column(DateTime(timezone=True), nullable=True)
+
+    # Status
+    status: Column[ConsolidationStatus] = Column(
+        Enum(ConsolidationStatus),
+        nullable=False,
+        default=ConsolidationStatus.PENDING,
+        index=True,
+    )
+
+    # Error details if failed
+    error = Column(Text, nullable=True)
+
+    __table_args__ = (
+        # Query patterns
+        Index("ix_consolidation_type_status", "consolidation_type", "status"),
+        Index("ix_consolidation_started", "started_at"),
+    )
+
+    @property
+    def duration_seconds(self) -> float | None:
+        """Calculate duration of the consolidation job."""
+        if self.completed_at is None or self.started_at is None:
+            return None
+        return (self.completed_at - self.started_at).total_seconds()
+
+    def __repr__(self) -> str:
+        return (
+            f"<MemoryConsolidationLog {self.id} "
+            f"type={self.consolidation_type.value} status={self.status.value}>"
+        )
--- a/backend/app/models/memory/enums.py
+++ b/backend/app/models/memory/enums.py
@@ -0,0 +1,73 @@
+# app/models/memory/enums.py
+"""
+Enums for Memory System database models.
+
+These enums define the database-level constraints for memory types
+and scoping levels.
+"""
+
+from enum import Enum as PyEnum
+
+
+class ScopeType(str, PyEnum):
+    """
+    Memory scope levels matching the memory service types.
+
+    GLOBAL: System-wide memories accessible by all
+    PROJECT: Project-scoped memories
+    AGENT_TYPE: Type-specific memories (shared by instances of same type)
+    AGENT_INSTANCE: Instance-specific memories
+    SESSION: Session-scoped ephemeral memories
+    """
+
+    GLOBAL = "global"
+    PROJECT = "project"
+    AGENT_TYPE = "agent_type"
+    AGENT_INSTANCE = "agent_instance"
+    SESSION = "session"
+
+
+class EpisodeOutcome(str, PyEnum):
+    """
+    Outcome of an episode (task execution).
+
+    SUCCESS: Task completed successfully
+    FAILURE: Task failed
+    PARTIAL: Task partially completed
+    """
+
+    SUCCESS = "success"
+    FAILURE = "failure"
+    PARTIAL = "partial"
+
+
+class ConsolidationType(str, PyEnum):
+    """
+    Types of memory consolidation operations.
+
+    WORKING_TO_EPISODIC: Transfer session state to episodic
+    EPISODIC_TO_SEMANTIC: Extract facts from episodes
+    EPISODIC_TO_PROCEDURAL: Extract procedures from episodes
+    PRUNING: Remove low-value memories
+    """
+
+    WORKING_TO_EPISODIC = "working_to_episodic"
+    EPISODIC_TO_SEMANTIC = "episodic_to_semantic"
+    EPISODIC_TO_PROCEDURAL = "episodic_to_procedural"
+    PRUNING = "pruning"
+
+
+class ConsolidationStatus(str, PyEnum):
+    """
+    Status of a consolidation job.
+
+    PENDING: Job is queued
+    RUNNING: Job is currently executing
+    COMPLETED: Job finished successfully
+    FAILED: Job failed with errors
+    """
+
+    PENDING = "pending"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    FAILED = "failed"
--- a/backend/app/models/memory/episode.py
+++ b/backend/app/models/memory/episode.py
@@ -0,0 +1,125 @@
+# app/models/memory/episode.py
+"""
+Episode database model.
+
+Stores experiential memories - records of past task executions
+with context, actions, outcomes, and lessons learned.
+"""
+
+from sqlalchemy import (
+    BigInteger,
+    Column,
+    DateTime,
+    Enum,
+    Float,
+    ForeignKey,
+    Index,
+    String,
+    Text,
+)
+from sqlalchemy.dialects.postgresql import (
+    JSONB,
+    UUID as PGUUID,
+)
+from sqlalchemy.orm import relationship
+
+from app.models.base import Base, TimestampMixin, UUIDMixin
+
+from .enums import EpisodeOutcome
+
+# Import pgvector type - will be available after migration enables extension
+try:
+    from pgvector.sqlalchemy import Vector  # type: ignore[import-not-found]
+except ImportError:
+    # Fallback for environments without pgvector
+    Vector = None
+
+
+class Episode(Base, UUIDMixin, TimestampMixin):
+    """
+    Episodic memory model.
+
+    Records experiential memories from agent task execution:
+    - What task was performed
+    - What actions were taken
+    - What was the outcome
+    - What lessons were learned
+    """
+
+    __tablename__ = "episodes"
+
+    # Foreign keys
+    project_id = Column(
+        PGUUID(as_uuid=True),
+        ForeignKey("projects.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+
+    agent_instance_id = Column(
+        PGUUID(as_uuid=True),
+        ForeignKey("agent_instances.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+
+    agent_type_id = Column(
+        PGUUID(as_uuid=True),
+        ForeignKey("agent_types.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+
+    # Session reference
+    session_id = Column(String(255), nullable=False, index=True)
+
+    # Task information
+    task_type = Column(String(100), nullable=False, index=True)
+    task_description = Column(Text, nullable=False)
+
+    # Actions taken (list of action dictionaries)
+    actions = Column(JSONB, default=list, nullable=False)
+
+    # Context summary
+    context_summary = Column(Text, nullable=False)
+
+    # Outcome
+    outcome: Column[EpisodeOutcome] = Column(
+        Enum(EpisodeOutcome),
+        nullable=False,
+        index=True,
+    )
+    outcome_details = Column(Text, nullable=True)
+
+    # Metrics
+    duration_seconds = Column(Float, nullable=False, default=0.0)
+    tokens_used = Column(BigInteger, nullable=False, default=0)
+
+    # Learning
+    lessons_learned = Column(JSONB, default=list, nullable=False)
+    importance_score = Column(Float, nullable=False, default=0.5, index=True)
+
+    # Vector embedding for semantic search
+    # Using 1536 dimensions for OpenAI text-embedding-3-small
+    embedding = Column(Vector(1536) if Vector else Text, nullable=True)
+
+    # When the episode occurred
+    occurred_at = Column(DateTime(timezone=True), nullable=False, index=True)
+
+    # Relationships
+    project = relationship("Project", foreign_keys=[project_id])
+    agent_instance = relationship("AgentInstance", foreign_keys=[agent_instance_id])
+    agent_type = relationship("AgentType", foreign_keys=[agent_type_id])
+
+    __table_args__ = (
+        # Primary query patterns
+        Index("ix_episodes_project_task", "project_id", "task_type"),
+        Index("ix_episodes_project_outcome", "project_id", "outcome"),
+        Index("ix_episodes_agent_task", "agent_instance_id", "task_type"),
+        Index("ix_episodes_project_time", "project_id", "occurred_at"),
+        # For importance-based pruning
+        Index("ix_episodes_importance_time", "importance_score", "occurred_at"),
+    )
+
+    def __repr__(self) -> str:
+        return f"<Episode {self.id} task={self.task_type} outcome={self.outcome.value}>"
--- a/backend/app/models/memory/fact.py
+++ b/backend/app/models/memory/fact.py
@@ -0,0 +1,103 @@
+# app/models/memory/fact.py
+"""
+Fact database model.
+
+Stores semantic memories - learned facts in subject-predicate-object
+triple format with confidence scores and source tracking.
+"""
+
+from sqlalchemy import (
+    Column,
+    DateTime,
+    Float,
+    ForeignKey,
+    Index,
+    Integer,
+    String,
+    Text,
+)
+from sqlalchemy.dialects.postgresql import (
+    ARRAY,
+    UUID as PGUUID,
+)
+from sqlalchemy.orm import relationship
+
+from app.models.base import Base, TimestampMixin, UUIDMixin
+
+# Import pgvector type
+try:
+    from pgvector.sqlalchemy import Vector  # type: ignore[import-not-found]
+except ImportError:
+    Vector = None
+
+
+class Fact(Base, UUIDMixin, TimestampMixin):
+    """
+    Semantic memory model.
+
+    Stores learned facts as subject-predicate-object triples:
+    - "FastAPI" - "uses" - "Starlette framework"
+    - "Project Alpha" - "requires" - "OAuth authentication"
+
+    Facts have confidence scores that decay over time and can be
+    reinforced when the same fact is learned again.
+    """
+
+    __tablename__ = "facts"
+
+    # Scoping: project_id is NULL for global facts
+    project_id = Column(
+        PGUUID(as_uuid=True),
+        ForeignKey("projects.id", ondelete="CASCADE"),
+        nullable=True,
+        index=True,
+    )
+
+    # Triple format
+    subject = Column(String(500), nullable=False, index=True)
+    predicate = Column(String(255), nullable=False, index=True)
+    object = Column(Text, nullable=False)
+
+    # Confidence score (0.0 to 1.0)
+    confidence = Column(Float, nullable=False, default=0.8, index=True)
+
+    # Source tracking: which episodes contributed to this fact
+    source_episode_ids: Column[list] = Column(
+        ARRAY(PGUUID(as_uuid=True)), default=list, nullable=False
+    )
+
+    # Learning history
+    first_learned = Column(DateTime(timezone=True), nullable=False)
+    last_reinforced = Column(DateTime(timezone=True), nullable=False)
+    reinforcement_count = Column(Integer, nullable=False, default=1)
+
+    # Vector embedding for semantic search
+    embedding = Column(Vector(1536) if Vector else Text, nullable=True)
+
+    # Relationships
+    project = relationship("Project", foreign_keys=[project_id])
+
+    __table_args__ = (
+        # Unique constraint on triple within project scope
+        Index(
+            "ix_facts_unique_triple",
+            "project_id",
+            "subject",
+            "predicate",
+            "object",
+            unique=True,
+            postgresql_where="project_id IS NOT NULL",
+        ),
+        # Query patterns
+        Index("ix_facts_subject_predicate", "subject", "predicate"),
+        Index("ix_facts_project_subject", "project_id", "subject"),
+        Index("ix_facts_confidence_time", "confidence", "last_reinforced"),
+        # For finding facts by entity (subject or object)
+        Index("ix_facts_subject", "subject"),
+    )
+
+    def __repr__(self) -> str:
+        return (
+            f"<Fact {self.id} '{self.subject}' - '{self.predicate}' - "
+            f"'{self.object[:50]}...' conf={self.confidence:.2f}>"
+        )
--- a/backend/app/models/memory/procedure.py
+++ b/backend/app/models/memory/procedure.py
@@ -0,0 +1,115 @@
+# app/models/memory/procedure.py
+"""
+Procedure database model.
+
+Stores procedural memories - learned skills and procedures
+derived from successful task execution patterns.
+"""
+
+from sqlalchemy import (
+    Column,
+    DateTime,
+    ForeignKey,
+    Index,
+    Integer,
+    String,
+    Text,
+)
+from sqlalchemy.dialects.postgresql import (
+    JSONB,
+    UUID as PGUUID,
+)
+from sqlalchemy.orm import relationship
+
+from app.models.base import Base, TimestampMixin, UUIDMixin
+
+# Import pgvector type
+try:
+    from pgvector.sqlalchemy import Vector  # type: ignore[import-not-found]
+except ImportError:
+    Vector = None
+
+
+class Procedure(Base, UUIDMixin, TimestampMixin):
+    """
+    Procedural memory model.
+
+    Stores learned procedures (skills) extracted from successful
+    task execution patterns:
+    - Name and trigger pattern for matching
+    - Step-by-step actions
+    - Success/failure tracking
+    """
+
+    __tablename__ = "procedures"
+
+    # Scoping
+    project_id = Column(
+        PGUUID(as_uuid=True),
+        ForeignKey("projects.id", ondelete="CASCADE"),
+        nullable=True,
+        index=True,
+    )
+
+    agent_type_id = Column(
+        PGUUID(as_uuid=True),
+        ForeignKey("agent_types.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+
+    # Procedure identification
+    name = Column(String(255), nullable=False, index=True)
+    trigger_pattern = Column(Text, nullable=False)
+
+    # Steps as JSON array of step objects
+    # Each step: {order, action, parameters, expected_outcome, fallback_action}
+    steps = Column(JSONB, default=list, nullable=False)
+
+    # Success tracking
+    success_count = Column(Integer, nullable=False, default=0)
+    failure_count = Column(Integer, nullable=False, default=0)
+
+    # Usage tracking
+    last_used = Column(DateTime(timezone=True), nullable=True, index=True)
+
+    # Vector embedding for semantic matching
+    embedding = Column(Vector(1536) if Vector else Text, nullable=True)
+
+    # Relationships
+    project = relationship("Project", foreign_keys=[project_id])
+    agent_type = relationship("AgentType", foreign_keys=[agent_type_id])
+
+    __table_args__ = (
+        # Unique procedure name within scope
+        Index(
+            "ix_procedures_unique_name",
+            "project_id",
+            "agent_type_id",
+            "name",
+            unique=True,
+        ),
+        # Query patterns
+        Index("ix_procedures_project_name", "project_id", "name"),
+        Index("ix_procedures_agent_type", "agent_type_id"),
+        # For finding best procedures
+        Index("ix_procedures_success_rate", "success_count", "failure_count"),
+    )
+
+    @property
+    def success_rate(self) -> float:
+        """Calculate the success rate of this procedure."""
+        total = self.success_count + self.failure_count
+        if total == 0:
+            return 0.0
+        return self.success_count / total
+
+    @property
+    def total_uses(self) -> int:
+        """Get total number of times this procedure was used."""
+        return self.success_count + self.failure_count
+
+    def __repr__(self) -> str:
+        return (
+            f"<Procedure {self.name} ({self.id}) success_rate={self.success_rate:.2%}>"
+        )
--- a/backend/app/models/memory/working_memory.py
+++ b/backend/app/models/memory/working_memory.py
@@ -0,0 +1,58 @@
+# app/models/memory/working_memory.py
+"""
+Working Memory database model.
+
+Stores ephemeral key-value data for active sessions with TTL support.
+Used as database backup when Redis is unavailable.
+"""
+
+from sqlalchemy import Column, DateTime, Enum, Index, String
+from sqlalchemy.dialects.postgresql import JSONB
+
+from app.models.base import Base, TimestampMixin, UUIDMixin
+
+from .enums import ScopeType
+
+
+class WorkingMemory(Base, UUIDMixin, TimestampMixin):
+    """
+    Working memory storage table.
+
+    Provides database-backed working memory as fallback when
+    Redis is unavailable. Supports TTL-based expiration.
+    """
+
+    __tablename__ = "working_memory"
+
+    # Scoping
+    scope_type: Column[ScopeType] = Column(
+        Enum(ScopeType),
+        nullable=False,
+        index=True,
+    )
+    scope_id = Column(String(255), nullable=False, index=True)
+
+    # Key-value storage
+    key = Column(String(255), nullable=False)
+    value = Column(JSONB, nullable=False)
+
+    # TTL support
+    expires_at = Column(DateTime(timezone=True), nullable=True, index=True)
+
+    __table_args__ = (
+        # Primary lookup: scope + key
+        Index(
+            "ix_working_memory_scope_key",
+            "scope_type",
+            "scope_id",
+            "key",
+            unique=True,
+        ),
+        # For cleanup of expired entries
+        Index("ix_working_memory_expires", "expires_at"),
+        # For listing all keys in a scope
+        Index("ix_working_memory_scope_list", "scope_type", "scope_id"),
+    )
+
+    def __repr__(self) -> str:
+        return f"<WorkingMemory {self.scope_type.value}:{self.scope_id}:{self.key}>"