feat(memory): add database schema and storage layer (Issue #88)

Add SQLAlchemy models for the Agent Memory System: - WorkingMemory: Key-value storage with TTL for active sessions - Episode: Experiential memories from task executions - Fact: Semantic knowledge triples with confidence scores - Procedure: Learned skills and procedures with success tracking - MemoryConsolidationLog: Tracks consolidation jobs between memory tiers Create enums for memory system: - ScopeType: global, project, agent_type, agent_instance, session - EpisodeOutcome: success, failure, partial - ConsolidationType: working_to_episodic, episodic_to_semantic, etc. - ConsolidationStatus: pending, running, completed, failed Add Alembic migration (0005) for all memory tables with: - Foreign key relationships to projects, agent_instances, agent_types - Comprehensive indexes for query patterns - Unique constraints for key lookups and triple uniqueness - Vector embedding column placeholders (Text fallback until pgvector enabled) Fix timezone-naive datetime.now() in types.py TaskState (review feedback) Includes 30 unit tests for models and enums. Closes #88 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 01:37:58 +01:00
parent 085a748929
commit c9d8c0835c
14 changed files with 1383 additions and 7 deletions
--- a/backend/app/alembic/versions/0005_add_memory_system_tables.py
+++ b/backend/app/alembic/versions/0005_add_memory_system_tables.py
@@ -0,0 +1,446 @@
+"""Add Agent Memory System tables
+
+Revision ID: 0005
+Revises: 0004
+Create Date: 2025-01-05
+
+This migration creates the Agent Memory System tables:
+- working_memory: Key-value storage with TTL for active sessions
+- episodes: Experiential memories from task executions
+- facts: Semantic knowledge triples with confidence scores
+- procedures: Learned skills and procedures
+- memory_consolidation_log: Tracks consolidation jobs
+
+See Issue #88: Database Schema & Storage Layer
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision: str = "0005"
+down_revision: str | None = "0004"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Create Agent Memory System tables."""
+
+    # =========================================================================
+    # Create ENUM types for memory system
+    # =========================================================================
+
+    # Scope type enum
+    scope_type_enum = postgresql.ENUM(
+        "global",
+        "project",
+        "agent_type",
+        "agent_instance",
+        "session",
+        name="scope_type",
+        create_type=False,
+    )
+    scope_type_enum.create(op.get_bind(), checkfirst=True)
+
+    # Episode outcome enum
+    episode_outcome_enum = postgresql.ENUM(
+        "success",
+        "failure",
+        "partial",
+        name="episode_outcome",
+        create_type=False,
+    )
+    episode_outcome_enum.create(op.get_bind(), checkfirst=True)
+
+    # Consolidation type enum
+    consolidation_type_enum = postgresql.ENUM(
+        "working_to_episodic",
+        "episodic_to_semantic",
+        "episodic_to_procedural",
+        "pruning",
+        name="consolidation_type",
+        create_type=False,
+    )
+    consolidation_type_enum.create(op.get_bind(), checkfirst=True)
+
+    # Consolidation status enum
+    consolidation_status_enum = postgresql.ENUM(
+        "pending",
+        "running",
+        "completed",
+        "failed",
+        name="consolidation_status",
+        create_type=False,
+    )
+    consolidation_status_enum.create(op.get_bind(), checkfirst=True)
+
+    # =========================================================================
+    # Create working_memory table
+    # Key-value storage with TTL for active sessions
+    # =========================================================================
+    op.create_table(
+        "working_memory",
+        sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column(
+            "scope_type",
+            scope_type_enum,
+            nullable=False,
+        ),
+        sa.Column("scope_id", sa.String(255), nullable=False),
+        sa.Column("key", sa.String(255), nullable=False),
+        sa.Column("value", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
+        sa.Column("expires_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+    # Working memory indexes
+    op.create_index(
+        "ix_working_memory_scope_type",
+        "working_memory",
+        ["scope_type"],
+    )
+    op.create_index(
+        "ix_working_memory_scope_id",
+        "working_memory",
+        ["scope_id"],
+    )
+    op.create_index(
+        "ix_working_memory_scope_key",
+        "working_memory",
+        ["scope_type", "scope_id", "key"],
+        unique=True,
+    )
+    op.create_index(
+        "ix_working_memory_expires",
+        "working_memory",
+        ["expires_at"],
+    )
+    op.create_index(
+        "ix_working_memory_scope_list",
+        "working_memory",
+        ["scope_type", "scope_id"],
+    )
+
+    # =========================================================================
+    # Create episodes table
+    # Experiential memories from task executions
+    # =========================================================================
+    op.create_table(
+        "episodes",
+        sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column("agent_instance_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column("agent_type_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column("session_id", sa.String(255), nullable=False),
+        sa.Column("task_type", sa.String(100), nullable=False),
+        sa.Column("task_description", sa.Text(), nullable=False),
+        sa.Column(
+            "actions",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=False,
+            server_default="[]",
+        ),
+        sa.Column("context_summary", sa.Text(), nullable=False),
+        sa.Column(
+            "outcome",
+            episode_outcome_enum,
+            nullable=False,
+        ),
+        sa.Column("outcome_details", sa.Text(), nullable=True),
+        sa.Column("duration_seconds", sa.Float(), nullable=False, server_default="0.0"),
+        sa.Column("tokens_used", sa.BigInteger(), nullable=False, server_default="0"),
+        sa.Column(
+            "lessons_learned",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=False,
+            server_default="[]",
+        ),
+        sa.Column("importance_score", sa.Float(), nullable=False, server_default="0.5"),
+        # Vector embedding - using TEXT as fallback, will be VECTOR(1536) when pgvector is available
+        sa.Column("embedding", sa.Text(), nullable=True),
+        sa.Column("occurred_at", sa.DateTime(timezone=True), nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.ForeignKeyConstraint(
+            ["project_id"],
+            ["projects.id"],
+            name="fk_episodes_project",
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["agent_instance_id"],
+            ["agent_instances.id"],
+            name="fk_episodes_agent_instance",
+            ondelete="SET NULL",
+        ),
+        sa.ForeignKeyConstraint(
+            ["agent_type_id"],
+            ["agent_types.id"],
+            name="fk_episodes_agent_type",
+            ondelete="SET NULL",
+        ),
+    )
+
+    # Episode indexes
+    op.create_index("ix_episodes_project_id", "episodes", ["project_id"])
+    op.create_index("ix_episodes_agent_instance_id", "episodes", ["agent_instance_id"])
+    op.create_index("ix_episodes_agent_type_id", "episodes", ["agent_type_id"])
+    op.create_index("ix_episodes_session_id", "episodes", ["session_id"])
+    op.create_index("ix_episodes_task_type", "episodes", ["task_type"])
+    op.create_index("ix_episodes_outcome", "episodes", ["outcome"])
+    op.create_index("ix_episodes_importance_score", "episodes", ["importance_score"])
+    op.create_index("ix_episodes_occurred_at", "episodes", ["occurred_at"])
+    op.create_index("ix_episodes_project_task", "episodes", ["project_id", "task_type"])
+    op.create_index(
+        "ix_episodes_project_outcome", "episodes", ["project_id", "outcome"]
+    )
+    op.create_index(
+        "ix_episodes_agent_task", "episodes", ["agent_instance_id", "task_type"]
+    )
+    op.create_index(
+        "ix_episodes_project_time", "episodes", ["project_id", "occurred_at"]
+    )
+    op.create_index(
+        "ix_episodes_importance_time",
+        "episodes",
+        ["importance_score", "occurred_at"],
+    )
+
+    # =========================================================================
+    # Create facts table
+    # Semantic knowledge triples with confidence scores
+    # =========================================================================
+    op.create_table(
+        "facts",
+        sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column(
+            "project_id", postgresql.UUID(as_uuid=True), nullable=True
+        ),  # NULL for global facts
+        sa.Column("subject", sa.String(500), nullable=False),
+        sa.Column("predicate", sa.String(255), nullable=False),
+        sa.Column("object", sa.Text(), nullable=False),
+        sa.Column("confidence", sa.Float(), nullable=False, server_default="0.8"),
+        sa.Column(
+            "source_episode_ids",
+            postgresql.ARRAY(postgresql.UUID(as_uuid=True)),
+            nullable=False,
+            server_default="{}",
+        ),
+        sa.Column("first_learned", sa.DateTime(timezone=True), nullable=False),
+        sa.Column("last_reinforced", sa.DateTime(timezone=True), nullable=False),
+        sa.Column(
+            "reinforcement_count", sa.Integer(), nullable=False, server_default="1"
+        ),
+        # Vector embedding
+        sa.Column("embedding", sa.Text(), nullable=True),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.ForeignKeyConstraint(
+            ["project_id"],
+            ["projects.id"],
+            name="fk_facts_project",
+            ondelete="CASCADE",
+        ),
+    )
+
+    # Fact indexes
+    op.create_index("ix_facts_project_id", "facts", ["project_id"])
+    op.create_index("ix_facts_subject", "facts", ["subject"])
+    op.create_index("ix_facts_predicate", "facts", ["predicate"])
+    op.create_index("ix_facts_confidence", "facts", ["confidence"])
+    op.create_index("ix_facts_subject_predicate", "facts", ["subject", "predicate"])
+    op.create_index("ix_facts_project_subject", "facts", ["project_id", "subject"])
+    op.create_index(
+        "ix_facts_confidence_time", "facts", ["confidence", "last_reinforced"]
+    )
+    # Unique constraint for triples within project scope
+    op.create_index(
+        "ix_facts_unique_triple",
+        "facts",
+        ["project_id", "subject", "predicate", "object"],
+        unique=True,
+        postgresql_where=sa.text("project_id IS NOT NULL"),
+    )
+
+    # =========================================================================
+    # Create procedures table
+    # Learned skills and procedures
+    # =========================================================================
+    op.create_table(
+        "procedures",
+        sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column("agent_type_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column("name", sa.String(255), nullable=False),
+        sa.Column("trigger_pattern", sa.Text(), nullable=False),
+        sa.Column(
+            "steps",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=False,
+            server_default="[]",
+        ),
+        sa.Column("success_count", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column("failure_count", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column("last_used", sa.DateTime(timezone=True), nullable=True),
+        # Vector embedding
+        sa.Column("embedding", sa.Text(), nullable=True),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.ForeignKeyConstraint(
+            ["project_id"],
+            ["projects.id"],
+            name="fk_procedures_project",
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["agent_type_id"],
+            ["agent_types.id"],
+            name="fk_procedures_agent_type",
+            ondelete="SET NULL",
+        ),
+    )
+
+    # Procedure indexes
+    op.create_index("ix_procedures_project_id", "procedures", ["project_id"])
+    op.create_index("ix_procedures_agent_type_id", "procedures", ["agent_type_id"])
+    op.create_index("ix_procedures_name", "procedures", ["name"])
+    op.create_index("ix_procedures_last_used", "procedures", ["last_used"])
+    op.create_index(
+        "ix_procedures_unique_name",
+        "procedures",
+        ["project_id", "agent_type_id", "name"],
+        unique=True,
+    )
+    op.create_index("ix_procedures_project_name", "procedures", ["project_id", "name"])
+    op.create_index("ix_procedures_agent_type", "procedures", ["agent_type_id"])
+    op.create_index(
+        "ix_procedures_success_rate",
+        "procedures",
+        ["success_count", "failure_count"],
+    )
+
+    # =========================================================================
+    # Create memory_consolidation_log table
+    # Tracks consolidation jobs
+    # =========================================================================
+    op.create_table(
+        "memory_consolidation_log",
+        sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column(
+            "consolidation_type",
+            consolidation_type_enum,
+            nullable=False,
+        ),
+        sa.Column("source_count", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column("result_count", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column("started_at", sa.DateTime(timezone=True), nullable=False),
+        sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column(
+            "status",
+            consolidation_status_enum,
+            nullable=False,
+            server_default="pending",
+        ),
+        sa.Column("error", sa.Text(), nullable=True),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+    # Consolidation log indexes
+    op.create_index(
+        "ix_consolidation_type",
+        "memory_consolidation_log",
+        ["consolidation_type"],
+    )
+    op.create_index(
+        "ix_consolidation_status",
+        "memory_consolidation_log",
+        ["status"],
+    )
+    op.create_index(
+        "ix_consolidation_type_status",
+        "memory_consolidation_log",
+        ["consolidation_type", "status"],
+    )
+    op.create_index(
+        "ix_consolidation_started",
+        "memory_consolidation_log",
+        ["started_at"],
+    )
+
+
+def downgrade() -> None:
+    """Drop Agent Memory System tables."""
+
+    # Drop tables in reverse order (dependencies first)
+    op.drop_table("memory_consolidation_log")
+    op.drop_table("procedures")
+    op.drop_table("facts")
+    op.drop_table("episodes")
+    op.drop_table("working_memory")
+
+    # Drop ENUM types
+    op.execute("DROP TYPE IF EXISTS consolidation_status")
+    op.execute("DROP TYPE IF EXISTS consolidation_type")
+    op.execute("DROP TYPE IF EXISTS episode_outcome")
+    op.execute("DROP TYPE IF EXISTS scope_type")