From c9d8c0835c95caa36b28ba531410d7b37870692e Mon Sep 17 00:00:00 2001 From: Felipe Cardoso Date: Mon, 5 Jan 2026 01:37:58 +0100 Subject: [PATCH] feat(memory): add database schema and storage layer (Issue #88) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SQLAlchemy models for the Agent Memory System: - WorkingMemory: Key-value storage with TTL for active sessions - Episode: Experiential memories from task executions - Fact: Semantic knowledge triples with confidence scores - Procedure: Learned skills and procedures with success tracking - MemoryConsolidationLog: Tracks consolidation jobs between memory tiers Create enums for memory system: - ScopeType: global, project, agent_type, agent_instance, session - EpisodeOutcome: success, failure, partial - ConsolidationType: working_to_episodic, episodic_to_semantic, etc. - ConsolidationStatus: pending, running, completed, failed Add Alembic migration (0005) for all memory tables with: - Foreign key relationships to projects, agent_instances, agent_types - Comprehensive indexes for query patterns - Unique constraints for key lookups and triple uniqueness - Vector embedding column placeholders (Text fallback until pgvector enabled) Fix timezone-naive datetime.now() in types.py TaskState (review feedback) Includes 30 unit tests for models and enums. Closes #88 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../versions/0005_add_memory_system_tables.py | 446 ++++++++++++++++++ backend/app/models/__init__.py | 23 + backend/app/models/memory/__init__.py | 32 ++ backend/app/models/memory/consolidation.py | 72 +++ backend/app/models/memory/enums.py | 73 +++ backend/app/models/memory/episode.py | 125 +++++ backend/app/models/memory/fact.py | 103 ++++ backend/app/models/memory/procedure.py | 115 +++++ backend/app/models/memory/working_memory.py | 58 +++ backend/app/services/memory/types.py | 19 +- backend/tests/unit/models/__init__.py | 2 + backend/tests/unit/models/memory/__init__.py | 2 + .../tests/unit/models/memory/test_enums.py | 71 +++ .../tests/unit/models/memory/test_models.py | 249 ++++++++++ 14 files changed, 1383 insertions(+), 7 deletions(-) create mode 100644 backend/app/alembic/versions/0005_add_memory_system_tables.py create mode 100644 backend/app/models/memory/__init__.py create mode 100644 backend/app/models/memory/consolidation.py create mode 100644 backend/app/models/memory/enums.py create mode 100644 backend/app/models/memory/episode.py create mode 100644 backend/app/models/memory/fact.py create mode 100644 backend/app/models/memory/procedure.py create mode 100644 backend/app/models/memory/working_memory.py create mode 100644 backend/tests/unit/models/__init__.py create mode 100644 backend/tests/unit/models/memory/__init__.py create mode 100644 backend/tests/unit/models/memory/test_enums.py create mode 100644 backend/tests/unit/models/memory/test_models.py diff --git a/backend/app/alembic/versions/0005_add_memory_system_tables.py b/backend/app/alembic/versions/0005_add_memory_system_tables.py new file mode 100644 index 0000000..d1e64f7 --- /dev/null +++ b/backend/app/alembic/versions/0005_add_memory_system_tables.py @@ -0,0 +1,446 @@ +"""Add Agent Memory System tables + +Revision ID: 0005 +Revises: 0004 +Create Date: 2025-01-05 + +This migration creates the Agent Memory System tables: +- working_memory: Key-value storage with TTL for active sessions +- episodes: Experiential memories from task executions +- facts: Semantic knowledge triples with confidence scores +- procedures: Learned skills and procedures +- memory_consolidation_log: Tracks consolidation jobs + +See Issue #88: Database Schema & Storage Layer +""" + +from collections.abc import Sequence + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "0005" +down_revision: str | None = "0004" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Create Agent Memory System tables.""" + + # ========================================================================= + # Create ENUM types for memory system + # ========================================================================= + + # Scope type enum + scope_type_enum = postgresql.ENUM( + "global", + "project", + "agent_type", + "agent_instance", + "session", + name="scope_type", + create_type=False, + ) + scope_type_enum.create(op.get_bind(), checkfirst=True) + + # Episode outcome enum + episode_outcome_enum = postgresql.ENUM( + "success", + "failure", + "partial", + name="episode_outcome", + create_type=False, + ) + episode_outcome_enum.create(op.get_bind(), checkfirst=True) + + # Consolidation type enum + consolidation_type_enum = postgresql.ENUM( + "working_to_episodic", + "episodic_to_semantic", + "episodic_to_procedural", + "pruning", + name="consolidation_type", + create_type=False, + ) + consolidation_type_enum.create(op.get_bind(), checkfirst=True) + + # Consolidation status enum + consolidation_status_enum = postgresql.ENUM( + "pending", + "running", + "completed", + "failed", + name="consolidation_status", + create_type=False, + ) + consolidation_status_enum.create(op.get_bind(), checkfirst=True) + + # ========================================================================= + # Create working_memory table + # Key-value storage with TTL for active sessions + # ========================================================================= + op.create_table( + "working_memory", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column( + "scope_type", + scope_type_enum, + nullable=False, + ), + sa.Column("scope_id", sa.String(255), nullable=False), + sa.Column("key", sa.String(255), nullable=False), + sa.Column("value", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("expires_at", sa.DateTime(timezone=True), nullable=True), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.PrimaryKeyConstraint("id"), + ) + + # Working memory indexes + op.create_index( + "ix_working_memory_scope_type", + "working_memory", + ["scope_type"], + ) + op.create_index( + "ix_working_memory_scope_id", + "working_memory", + ["scope_id"], + ) + op.create_index( + "ix_working_memory_scope_key", + "working_memory", + ["scope_type", "scope_id", "key"], + unique=True, + ) + op.create_index( + "ix_working_memory_expires", + "working_memory", + ["expires_at"], + ) + op.create_index( + "ix_working_memory_scope_list", + "working_memory", + ["scope_type", "scope_id"], + ) + + # ========================================================================= + # Create episodes table + # Experiential memories from task executions + # ========================================================================= + op.create_table( + "episodes", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("agent_instance_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("agent_type_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("session_id", sa.String(255), nullable=False), + sa.Column("task_type", sa.String(100), nullable=False), + sa.Column("task_description", sa.Text(), nullable=False), + sa.Column( + "actions", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default="[]", + ), + sa.Column("context_summary", sa.Text(), nullable=False), + sa.Column( + "outcome", + episode_outcome_enum, + nullable=False, + ), + sa.Column("outcome_details", sa.Text(), nullable=True), + sa.Column("duration_seconds", sa.Float(), nullable=False, server_default="0.0"), + sa.Column("tokens_used", sa.BigInteger(), nullable=False, server_default="0"), + sa.Column( + "lessons_learned", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default="[]", + ), + sa.Column("importance_score", sa.Float(), nullable=False, server_default="0.5"), + # Vector embedding - using TEXT as fallback, will be VECTOR(1536) when pgvector is available + sa.Column("embedding", sa.Text(), nullable=True), + sa.Column("occurred_at", sa.DateTime(timezone=True), nullable=False), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.PrimaryKeyConstraint("id"), + sa.ForeignKeyConstraint( + ["project_id"], + ["projects.id"], + name="fk_episodes_project", + ondelete="CASCADE", + ), + sa.ForeignKeyConstraint( + ["agent_instance_id"], + ["agent_instances.id"], + name="fk_episodes_agent_instance", + ondelete="SET NULL", + ), + sa.ForeignKeyConstraint( + ["agent_type_id"], + ["agent_types.id"], + name="fk_episodes_agent_type", + ondelete="SET NULL", + ), + ) + + # Episode indexes + op.create_index("ix_episodes_project_id", "episodes", ["project_id"]) + op.create_index("ix_episodes_agent_instance_id", "episodes", ["agent_instance_id"]) + op.create_index("ix_episodes_agent_type_id", "episodes", ["agent_type_id"]) + op.create_index("ix_episodes_session_id", "episodes", ["session_id"]) + op.create_index("ix_episodes_task_type", "episodes", ["task_type"]) + op.create_index("ix_episodes_outcome", "episodes", ["outcome"]) + op.create_index("ix_episodes_importance_score", "episodes", ["importance_score"]) + op.create_index("ix_episodes_occurred_at", "episodes", ["occurred_at"]) + op.create_index("ix_episodes_project_task", "episodes", ["project_id", "task_type"]) + op.create_index( + "ix_episodes_project_outcome", "episodes", ["project_id", "outcome"] + ) + op.create_index( + "ix_episodes_agent_task", "episodes", ["agent_instance_id", "task_type"] + ) + op.create_index( + "ix_episodes_project_time", "episodes", ["project_id", "occurred_at"] + ) + op.create_index( + "ix_episodes_importance_time", + "episodes", + ["importance_score", "occurred_at"], + ) + + # ========================================================================= + # Create facts table + # Semantic knowledge triples with confidence scores + # ========================================================================= + op.create_table( + "facts", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column( + "project_id", postgresql.UUID(as_uuid=True), nullable=True + ), # NULL for global facts + sa.Column("subject", sa.String(500), nullable=False), + sa.Column("predicate", sa.String(255), nullable=False), + sa.Column("object", sa.Text(), nullable=False), + sa.Column("confidence", sa.Float(), nullable=False, server_default="0.8"), + sa.Column( + "source_episode_ids", + postgresql.ARRAY(postgresql.UUID(as_uuid=True)), + nullable=False, + server_default="{}", + ), + sa.Column("first_learned", sa.DateTime(timezone=True), nullable=False), + sa.Column("last_reinforced", sa.DateTime(timezone=True), nullable=False), + sa.Column( + "reinforcement_count", sa.Integer(), nullable=False, server_default="1" + ), + # Vector embedding + sa.Column("embedding", sa.Text(), nullable=True), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.PrimaryKeyConstraint("id"), + sa.ForeignKeyConstraint( + ["project_id"], + ["projects.id"], + name="fk_facts_project", + ondelete="CASCADE", + ), + ) + + # Fact indexes + op.create_index("ix_facts_project_id", "facts", ["project_id"]) + op.create_index("ix_facts_subject", "facts", ["subject"]) + op.create_index("ix_facts_predicate", "facts", ["predicate"]) + op.create_index("ix_facts_confidence", "facts", ["confidence"]) + op.create_index("ix_facts_subject_predicate", "facts", ["subject", "predicate"]) + op.create_index("ix_facts_project_subject", "facts", ["project_id", "subject"]) + op.create_index( + "ix_facts_confidence_time", "facts", ["confidence", "last_reinforced"] + ) + # Unique constraint for triples within project scope + op.create_index( + "ix_facts_unique_triple", + "facts", + ["project_id", "subject", "predicate", "object"], + unique=True, + postgresql_where=sa.text("project_id IS NOT NULL"), + ) + + # ========================================================================= + # Create procedures table + # Learned skills and procedures + # ========================================================================= + op.create_table( + "procedures", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("agent_type_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("name", sa.String(255), nullable=False), + sa.Column("trigger_pattern", sa.Text(), nullable=False), + sa.Column( + "steps", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default="[]", + ), + sa.Column("success_count", sa.Integer(), nullable=False, server_default="0"), + sa.Column("failure_count", sa.Integer(), nullable=False, server_default="0"), + sa.Column("last_used", sa.DateTime(timezone=True), nullable=True), + # Vector embedding + sa.Column("embedding", sa.Text(), nullable=True), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.PrimaryKeyConstraint("id"), + sa.ForeignKeyConstraint( + ["project_id"], + ["projects.id"], + name="fk_procedures_project", + ondelete="CASCADE", + ), + sa.ForeignKeyConstraint( + ["agent_type_id"], + ["agent_types.id"], + name="fk_procedures_agent_type", + ondelete="SET NULL", + ), + ) + + # Procedure indexes + op.create_index("ix_procedures_project_id", "procedures", ["project_id"]) + op.create_index("ix_procedures_agent_type_id", "procedures", ["agent_type_id"]) + op.create_index("ix_procedures_name", "procedures", ["name"]) + op.create_index("ix_procedures_last_used", "procedures", ["last_used"]) + op.create_index( + "ix_procedures_unique_name", + "procedures", + ["project_id", "agent_type_id", "name"], + unique=True, + ) + op.create_index("ix_procedures_project_name", "procedures", ["project_id", "name"]) + op.create_index("ix_procedures_agent_type", "procedures", ["agent_type_id"]) + op.create_index( + "ix_procedures_success_rate", + "procedures", + ["success_count", "failure_count"], + ) + + # ========================================================================= + # Create memory_consolidation_log table + # Tracks consolidation jobs + # ========================================================================= + op.create_table( + "memory_consolidation_log", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column( + "consolidation_type", + consolidation_type_enum, + nullable=False, + ), + sa.Column("source_count", sa.Integer(), nullable=False, server_default="0"), + sa.Column("result_count", sa.Integer(), nullable=False, server_default="0"), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), + sa.Column( + "status", + consolidation_status_enum, + nullable=False, + server_default="pending", + ), + sa.Column("error", sa.Text(), nullable=True), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.PrimaryKeyConstraint("id"), + ) + + # Consolidation log indexes + op.create_index( + "ix_consolidation_type", + "memory_consolidation_log", + ["consolidation_type"], + ) + op.create_index( + "ix_consolidation_status", + "memory_consolidation_log", + ["status"], + ) + op.create_index( + "ix_consolidation_type_status", + "memory_consolidation_log", + ["consolidation_type", "status"], + ) + op.create_index( + "ix_consolidation_started", + "memory_consolidation_log", + ["started_at"], + ) + + +def downgrade() -> None: + """Drop Agent Memory System tables.""" + + # Drop tables in reverse order (dependencies first) + op.drop_table("memory_consolidation_log") + op.drop_table("procedures") + op.drop_table("facts") + op.drop_table("episodes") + op.drop_table("working_memory") + + # Drop ENUM types + op.execute("DROP TYPE IF EXISTS consolidation_status") + op.execute("DROP TYPE IF EXISTS consolidation_type") + op.execute("DROP TYPE IF EXISTS episode_outcome") + op.execute("DROP TYPE IF EXISTS scope_type") diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index 7a1ea5d..0437187 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -8,6 +8,19 @@ from app.core.database import Base from .base import TimestampMixin, UUIDMixin +# Memory system models +from .memory import ( + ConsolidationStatus, + ConsolidationType, + Episode, + EpisodeOutcome, + Fact, + MemoryConsolidationLog, + Procedure, + ScopeType, + WorkingMemory, +) + # OAuth models (client mode - authenticate via Google/GitHub) from .oauth_account import OAuthAccount @@ -37,7 +50,14 @@ __all__ = [ "AgentInstance", "AgentType", "Base", + # Memory models + "ConsolidationStatus", + "ConsolidationType", + "Episode", + "EpisodeOutcome", + "Fact", "Issue", + "MemoryConsolidationLog", "OAuthAccount", "OAuthAuthorizationCode", "OAuthClient", @@ -46,11 +66,14 @@ __all__ = [ "OAuthState", "Organization", "OrganizationRole", + "Procedure", "Project", + "ScopeType", "Sprint", "TimestampMixin", "UUIDMixin", "User", "UserOrganization", "UserSession", + "WorkingMemory", ] diff --git a/backend/app/models/memory/__init__.py b/backend/app/models/memory/__init__.py new file mode 100644 index 0000000..b2cfd29 --- /dev/null +++ b/backend/app/models/memory/__init__.py @@ -0,0 +1,32 @@ +# app/models/memory/__init__.py +""" +Memory System Database Models. + +Provides SQLAlchemy models for the Agent Memory System: +- WorkingMemory: Key-value storage with TTL +- Episode: Experiential memories +- Fact: Semantic knowledge triples +- Procedure: Learned skills +- MemoryConsolidationLog: Consolidation job tracking +""" + +from .consolidation import MemoryConsolidationLog +from .enums import ConsolidationStatus, ConsolidationType, EpisodeOutcome, ScopeType +from .episode import Episode +from .fact import Fact +from .procedure import Procedure +from .working_memory import WorkingMemory + +__all__ = [ + # Enums + "ConsolidationStatus", + "ConsolidationType", + # Models + "Episode", + "EpisodeOutcome", + "Fact", + "MemoryConsolidationLog", + "Procedure", + "ScopeType", + "WorkingMemory", +] diff --git a/backend/app/models/memory/consolidation.py b/backend/app/models/memory/consolidation.py new file mode 100644 index 0000000..a2a309e --- /dev/null +++ b/backend/app/models/memory/consolidation.py @@ -0,0 +1,72 @@ +# app/models/memory/consolidation.py +""" +Memory Consolidation Log database model. + +Tracks memory consolidation jobs that transfer knowledge +between memory tiers. +""" + +from sqlalchemy import Column, DateTime, Enum, Index, Integer, Text + +from app.models.base import Base, TimestampMixin, UUIDMixin + +from .enums import ConsolidationStatus, ConsolidationType + + +class MemoryConsolidationLog(Base, UUIDMixin, TimestampMixin): + """ + Memory consolidation job log. + + Tracks consolidation operations: + - Working -> Episodic (session end) + - Episodic -> Semantic (fact extraction) + - Episodic -> Procedural (procedure learning) + - Pruning (removing low-value memories) + """ + + __tablename__ = "memory_consolidation_log" + + # Consolidation type + consolidation_type: Column[ConsolidationType] = Column( + Enum(ConsolidationType), + nullable=False, + index=True, + ) + + # Counts + source_count = Column(Integer, nullable=False, default=0) + result_count = Column(Integer, nullable=False, default=0) + + # Timing + started_at = Column(DateTime(timezone=True), nullable=False) + completed_at = Column(DateTime(timezone=True), nullable=True) + + # Status + status: Column[ConsolidationStatus] = Column( + Enum(ConsolidationStatus), + nullable=False, + default=ConsolidationStatus.PENDING, + index=True, + ) + + # Error details if failed + error = Column(Text, nullable=True) + + __table_args__ = ( + # Query patterns + Index("ix_consolidation_type_status", "consolidation_type", "status"), + Index("ix_consolidation_started", "started_at"), + ) + + @property + def duration_seconds(self) -> float | None: + """Calculate duration of the consolidation job.""" + if self.completed_at is None or self.started_at is None: + return None + return (self.completed_at - self.started_at).total_seconds() + + def __repr__(self) -> str: + return ( + f"" + ) diff --git a/backend/app/models/memory/enums.py b/backend/app/models/memory/enums.py new file mode 100644 index 0000000..da58dcb --- /dev/null +++ b/backend/app/models/memory/enums.py @@ -0,0 +1,73 @@ +# app/models/memory/enums.py +""" +Enums for Memory System database models. + +These enums define the database-level constraints for memory types +and scoping levels. +""" + +from enum import Enum as PyEnum + + +class ScopeType(str, PyEnum): + """ + Memory scope levels matching the memory service types. + + GLOBAL: System-wide memories accessible by all + PROJECT: Project-scoped memories + AGENT_TYPE: Type-specific memories (shared by instances of same type) + AGENT_INSTANCE: Instance-specific memories + SESSION: Session-scoped ephemeral memories + """ + + GLOBAL = "global" + PROJECT = "project" + AGENT_TYPE = "agent_type" + AGENT_INSTANCE = "agent_instance" + SESSION = "session" + + +class EpisodeOutcome(str, PyEnum): + """ + Outcome of an episode (task execution). + + SUCCESS: Task completed successfully + FAILURE: Task failed + PARTIAL: Task partially completed + """ + + SUCCESS = "success" + FAILURE = "failure" + PARTIAL = "partial" + + +class ConsolidationType(str, PyEnum): + """ + Types of memory consolidation operations. + + WORKING_TO_EPISODIC: Transfer session state to episodic + EPISODIC_TO_SEMANTIC: Extract facts from episodes + EPISODIC_TO_PROCEDURAL: Extract procedures from episodes + PRUNING: Remove low-value memories + """ + + WORKING_TO_EPISODIC = "working_to_episodic" + EPISODIC_TO_SEMANTIC = "episodic_to_semantic" + EPISODIC_TO_PROCEDURAL = "episodic_to_procedural" + PRUNING = "pruning" + + +class ConsolidationStatus(str, PyEnum): + """ + Status of a consolidation job. + + PENDING: Job is queued + RUNNING: Job is currently executing + COMPLETED: Job finished successfully + FAILED: Job failed with errors + """ + + PENDING = "pending" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" diff --git a/backend/app/models/memory/episode.py b/backend/app/models/memory/episode.py new file mode 100644 index 0000000..49c322e --- /dev/null +++ b/backend/app/models/memory/episode.py @@ -0,0 +1,125 @@ +# app/models/memory/episode.py +""" +Episode database model. + +Stores experiential memories - records of past task executions +with context, actions, outcomes, and lessons learned. +""" + +from sqlalchemy import ( + BigInteger, + Column, + DateTime, + Enum, + Float, + ForeignKey, + Index, + String, + Text, +) +from sqlalchemy.dialects.postgresql import ( + JSONB, + UUID as PGUUID, +) +from sqlalchemy.orm import relationship + +from app.models.base import Base, TimestampMixin, UUIDMixin + +from .enums import EpisodeOutcome + +# Import pgvector type - will be available after migration enables extension +try: + from pgvector.sqlalchemy import Vector # type: ignore[import-not-found] +except ImportError: + # Fallback for environments without pgvector + Vector = None + + +class Episode(Base, UUIDMixin, TimestampMixin): + """ + Episodic memory model. + + Records experiential memories from agent task execution: + - What task was performed + - What actions were taken + - What was the outcome + - What lessons were learned + """ + + __tablename__ = "episodes" + + # Foreign keys + project_id = Column( + PGUUID(as_uuid=True), + ForeignKey("projects.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + + agent_instance_id = Column( + PGUUID(as_uuid=True), + ForeignKey("agent_instances.id", ondelete="SET NULL"), + nullable=True, + index=True, + ) + + agent_type_id = Column( + PGUUID(as_uuid=True), + ForeignKey("agent_types.id", ondelete="SET NULL"), + nullable=True, + index=True, + ) + + # Session reference + session_id = Column(String(255), nullable=False, index=True) + + # Task information + task_type = Column(String(100), nullable=False, index=True) + task_description = Column(Text, nullable=False) + + # Actions taken (list of action dictionaries) + actions = Column(JSONB, default=list, nullable=False) + + # Context summary + context_summary = Column(Text, nullable=False) + + # Outcome + outcome: Column[EpisodeOutcome] = Column( + Enum(EpisodeOutcome), + nullable=False, + index=True, + ) + outcome_details = Column(Text, nullable=True) + + # Metrics + duration_seconds = Column(Float, nullable=False, default=0.0) + tokens_used = Column(BigInteger, nullable=False, default=0) + + # Learning + lessons_learned = Column(JSONB, default=list, nullable=False) + importance_score = Column(Float, nullable=False, default=0.5, index=True) + + # Vector embedding for semantic search + # Using 1536 dimensions for OpenAI text-embedding-3-small + embedding = Column(Vector(1536) if Vector else Text, nullable=True) + + # When the episode occurred + occurred_at = Column(DateTime(timezone=True), nullable=False, index=True) + + # Relationships + project = relationship("Project", foreign_keys=[project_id]) + agent_instance = relationship("AgentInstance", foreign_keys=[agent_instance_id]) + agent_type = relationship("AgentType", foreign_keys=[agent_type_id]) + + __table_args__ = ( + # Primary query patterns + Index("ix_episodes_project_task", "project_id", "task_type"), + Index("ix_episodes_project_outcome", "project_id", "outcome"), + Index("ix_episodes_agent_task", "agent_instance_id", "task_type"), + Index("ix_episodes_project_time", "project_id", "occurred_at"), + # For importance-based pruning + Index("ix_episodes_importance_time", "importance_score", "occurred_at"), + ) + + def __repr__(self) -> str: + return f"" diff --git a/backend/app/models/memory/fact.py b/backend/app/models/memory/fact.py new file mode 100644 index 0000000..b36299d --- /dev/null +++ b/backend/app/models/memory/fact.py @@ -0,0 +1,103 @@ +# app/models/memory/fact.py +""" +Fact database model. + +Stores semantic memories - learned facts in subject-predicate-object +triple format with confidence scores and source tracking. +""" + +from sqlalchemy import ( + Column, + DateTime, + Float, + ForeignKey, + Index, + Integer, + String, + Text, +) +from sqlalchemy.dialects.postgresql import ( + ARRAY, + UUID as PGUUID, +) +from sqlalchemy.orm import relationship + +from app.models.base import Base, TimestampMixin, UUIDMixin + +# Import pgvector type +try: + from pgvector.sqlalchemy import Vector # type: ignore[import-not-found] +except ImportError: + Vector = None + + +class Fact(Base, UUIDMixin, TimestampMixin): + """ + Semantic memory model. + + Stores learned facts as subject-predicate-object triples: + - "FastAPI" - "uses" - "Starlette framework" + - "Project Alpha" - "requires" - "OAuth authentication" + + Facts have confidence scores that decay over time and can be + reinforced when the same fact is learned again. + """ + + __tablename__ = "facts" + + # Scoping: project_id is NULL for global facts + project_id = Column( + PGUUID(as_uuid=True), + ForeignKey("projects.id", ondelete="CASCADE"), + nullable=True, + index=True, + ) + + # Triple format + subject = Column(String(500), nullable=False, index=True) + predicate = Column(String(255), nullable=False, index=True) + object = Column(Text, nullable=False) + + # Confidence score (0.0 to 1.0) + confidence = Column(Float, nullable=False, default=0.8, index=True) + + # Source tracking: which episodes contributed to this fact + source_episode_ids: Column[list] = Column( + ARRAY(PGUUID(as_uuid=True)), default=list, nullable=False + ) + + # Learning history + first_learned = Column(DateTime(timezone=True), nullable=False) + last_reinforced = Column(DateTime(timezone=True), nullable=False) + reinforcement_count = Column(Integer, nullable=False, default=1) + + # Vector embedding for semantic search + embedding = Column(Vector(1536) if Vector else Text, nullable=True) + + # Relationships + project = relationship("Project", foreign_keys=[project_id]) + + __table_args__ = ( + # Unique constraint on triple within project scope + Index( + "ix_facts_unique_triple", + "project_id", + "subject", + "predicate", + "object", + unique=True, + postgresql_where="project_id IS NOT NULL", + ), + # Query patterns + Index("ix_facts_subject_predicate", "subject", "predicate"), + Index("ix_facts_project_subject", "project_id", "subject"), + Index("ix_facts_confidence_time", "confidence", "last_reinforced"), + # For finding facts by entity (subject or object) + Index("ix_facts_subject", "subject"), + ) + + def __repr__(self) -> str: + return ( + f"" + ) diff --git a/backend/app/models/memory/procedure.py b/backend/app/models/memory/procedure.py new file mode 100644 index 0000000..3342510 --- /dev/null +++ b/backend/app/models/memory/procedure.py @@ -0,0 +1,115 @@ +# app/models/memory/procedure.py +""" +Procedure database model. + +Stores procedural memories - learned skills and procedures +derived from successful task execution patterns. +""" + +from sqlalchemy import ( + Column, + DateTime, + ForeignKey, + Index, + Integer, + String, + Text, +) +from sqlalchemy.dialects.postgresql import ( + JSONB, + UUID as PGUUID, +) +from sqlalchemy.orm import relationship + +from app.models.base import Base, TimestampMixin, UUIDMixin + +# Import pgvector type +try: + from pgvector.sqlalchemy import Vector # type: ignore[import-not-found] +except ImportError: + Vector = None + + +class Procedure(Base, UUIDMixin, TimestampMixin): + """ + Procedural memory model. + + Stores learned procedures (skills) extracted from successful + task execution patterns: + - Name and trigger pattern for matching + - Step-by-step actions + - Success/failure tracking + """ + + __tablename__ = "procedures" + + # Scoping + project_id = Column( + PGUUID(as_uuid=True), + ForeignKey("projects.id", ondelete="CASCADE"), + nullable=True, + index=True, + ) + + agent_type_id = Column( + PGUUID(as_uuid=True), + ForeignKey("agent_types.id", ondelete="SET NULL"), + nullable=True, + index=True, + ) + + # Procedure identification + name = Column(String(255), nullable=False, index=True) + trigger_pattern = Column(Text, nullable=False) + + # Steps as JSON array of step objects + # Each step: {order, action, parameters, expected_outcome, fallback_action} + steps = Column(JSONB, default=list, nullable=False) + + # Success tracking + success_count = Column(Integer, nullable=False, default=0) + failure_count = Column(Integer, nullable=False, default=0) + + # Usage tracking + last_used = Column(DateTime(timezone=True), nullable=True, index=True) + + # Vector embedding for semantic matching + embedding = Column(Vector(1536) if Vector else Text, nullable=True) + + # Relationships + project = relationship("Project", foreign_keys=[project_id]) + agent_type = relationship("AgentType", foreign_keys=[agent_type_id]) + + __table_args__ = ( + # Unique procedure name within scope + Index( + "ix_procedures_unique_name", + "project_id", + "agent_type_id", + "name", + unique=True, + ), + # Query patterns + Index("ix_procedures_project_name", "project_id", "name"), + Index("ix_procedures_agent_type", "agent_type_id"), + # For finding best procedures + Index("ix_procedures_success_rate", "success_count", "failure_count"), + ) + + @property + def success_rate(self) -> float: + """Calculate the success rate of this procedure.""" + total = self.success_count + self.failure_count + if total == 0: + return 0.0 + return self.success_count / total + + @property + def total_uses(self) -> int: + """Get total number of times this procedure was used.""" + return self.success_count + self.failure_count + + def __repr__(self) -> str: + return ( + f"" + ) diff --git a/backend/app/models/memory/working_memory.py b/backend/app/models/memory/working_memory.py new file mode 100644 index 0000000..9561125 --- /dev/null +++ b/backend/app/models/memory/working_memory.py @@ -0,0 +1,58 @@ +# app/models/memory/working_memory.py +""" +Working Memory database model. + +Stores ephemeral key-value data for active sessions with TTL support. +Used as database backup when Redis is unavailable. +""" + +from sqlalchemy import Column, DateTime, Enum, Index, String +from sqlalchemy.dialects.postgresql import JSONB + +from app.models.base import Base, TimestampMixin, UUIDMixin + +from .enums import ScopeType + + +class WorkingMemory(Base, UUIDMixin, TimestampMixin): + """ + Working memory storage table. + + Provides database-backed working memory as fallback when + Redis is unavailable. Supports TTL-based expiration. + """ + + __tablename__ = "working_memory" + + # Scoping + scope_type: Column[ScopeType] = Column( + Enum(ScopeType), + nullable=False, + index=True, + ) + scope_id = Column(String(255), nullable=False, index=True) + + # Key-value storage + key = Column(String(255), nullable=False) + value = Column(JSONB, nullable=False) + + # TTL support + expires_at = Column(DateTime(timezone=True), nullable=True, index=True) + + __table_args__ = ( + # Primary lookup: scope + key + Index( + "ix_working_memory_scope_key", + "scope_type", + "scope_id", + "key", + unique=True, + ), + # For cleanup of expired entries + Index("ix_working_memory_expires", "expires_at"), + # For listing all keys in a scope + Index("ix_working_memory_scope_list", "scope_type", "scope_id"), + ) + + def __repr__(self) -> str: + return f"" diff --git a/backend/app/services/memory/types.py b/backend/app/services/memory/types.py index b3f79bd..50cf16c 100644 --- a/backend/app/services/memory/types.py +++ b/backend/app/services/memory/types.py @@ -6,12 +6,17 @@ Core type definitions and interfaces for the Agent Memory System. from abc import ABC, abstractmethod from dataclasses import dataclass, field -from datetime import datetime +from datetime import UTC, datetime from enum import Enum from typing import Any from uuid import UUID +def _utcnow() -> datetime: + """Get current UTC time as timezone-aware datetime.""" + return datetime.now(UTC) + + class MemoryType(str, Enum): """Types of memory in the agent memory system.""" @@ -93,7 +98,7 @@ class MemoryItem: def get_age_seconds(self) -> float: """Get the age of this memory item in seconds.""" - return (datetime.now() - self.created_at).total_seconds() + return (_utcnow() - self.created_at).total_seconds() @dataclass @@ -106,14 +111,14 @@ class WorkingMemoryItem: key: str value: Any expires_at: datetime | None = None - created_at: datetime = field(default_factory=datetime.now) - updated_at: datetime = field(default_factory=datetime.now) + created_at: datetime = field(default_factory=_utcnow) + updated_at: datetime = field(default_factory=_utcnow) def is_expired(self) -> bool: """Check if this item has expired.""" if self.expires_at is None: return False - return datetime.now() > self.expires_at + return _utcnow() > self.expires_at @dataclass @@ -128,8 +133,8 @@ class TaskState: total_steps: int = 0 progress_percent: float = 0.0 context: dict[str, Any] = field(default_factory=dict) - started_at: datetime = field(default_factory=datetime.now) - updated_at: datetime = field(default_factory=datetime.now) + started_at: datetime = field(default_factory=_utcnow) + updated_at: datetime = field(default_factory=_utcnow) @dataclass diff --git a/backend/tests/unit/models/__init__.py b/backend/tests/unit/models/__init__.py new file mode 100644 index 0000000..59b9cf3 --- /dev/null +++ b/backend/tests/unit/models/__init__.py @@ -0,0 +1,2 @@ +# tests/unit/models/__init__.py +"""Unit tests for database models.""" diff --git a/backend/tests/unit/models/memory/__init__.py b/backend/tests/unit/models/memory/__init__.py new file mode 100644 index 0000000..160b6af --- /dev/null +++ b/backend/tests/unit/models/memory/__init__.py @@ -0,0 +1,2 @@ +# tests/unit/models/memory/__init__.py +"""Unit tests for memory database models.""" diff --git a/backend/tests/unit/models/memory/test_enums.py b/backend/tests/unit/models/memory/test_enums.py new file mode 100644 index 0000000..17f12da --- /dev/null +++ b/backend/tests/unit/models/memory/test_enums.py @@ -0,0 +1,71 @@ +# tests/unit/models/memory/test_enums.py +"""Unit tests for memory model enums.""" + +from app.models.memory.enums import ( + ConsolidationStatus, + ConsolidationType, + EpisodeOutcome, + ScopeType, +) + + +class TestScopeType: + """Tests for ScopeType enum.""" + + def test_all_values_exist(self) -> None: + """Test all expected scope types exist.""" + assert ScopeType.GLOBAL.value == "global" + assert ScopeType.PROJECT.value == "project" + assert ScopeType.AGENT_TYPE.value == "agent_type" + assert ScopeType.AGENT_INSTANCE.value == "agent_instance" + assert ScopeType.SESSION.value == "session" + + def test_scope_count(self) -> None: + """Test we have exactly 5 scope types.""" + assert len(ScopeType) == 5 + + +class TestEpisodeOutcome: + """Tests for EpisodeOutcome enum.""" + + def test_all_values_exist(self) -> None: + """Test all expected outcome values exist.""" + assert EpisodeOutcome.SUCCESS.value == "success" + assert EpisodeOutcome.FAILURE.value == "failure" + assert EpisodeOutcome.PARTIAL.value == "partial" + + def test_outcome_count(self) -> None: + """Test we have exactly 3 outcome types.""" + assert len(EpisodeOutcome) == 3 + + +class TestConsolidationType: + """Tests for ConsolidationType enum.""" + + def test_all_values_exist(self) -> None: + """Test all expected consolidation types exist.""" + assert ConsolidationType.WORKING_TO_EPISODIC.value == "working_to_episodic" + assert ConsolidationType.EPISODIC_TO_SEMANTIC.value == "episodic_to_semantic" + assert ( + ConsolidationType.EPISODIC_TO_PROCEDURAL.value == "episodic_to_procedural" + ) + assert ConsolidationType.PRUNING.value == "pruning" + + def test_consolidation_count(self) -> None: + """Test we have exactly 4 consolidation types.""" + assert len(ConsolidationType) == 4 + + +class TestConsolidationStatus: + """Tests for ConsolidationStatus enum.""" + + def test_all_values_exist(self) -> None: + """Test all expected status values exist.""" + assert ConsolidationStatus.PENDING.value == "pending" + assert ConsolidationStatus.RUNNING.value == "running" + assert ConsolidationStatus.COMPLETED.value == "completed" + assert ConsolidationStatus.FAILED.value == "failed" + + def test_status_count(self) -> None: + """Test we have exactly 4 status types.""" + assert len(ConsolidationStatus) == 4 diff --git a/backend/tests/unit/models/memory/test_models.py b/backend/tests/unit/models/memory/test_models.py new file mode 100644 index 0000000..9740c27 --- /dev/null +++ b/backend/tests/unit/models/memory/test_models.py @@ -0,0 +1,249 @@ +# tests/unit/models/memory/test_models.py +"""Unit tests for memory database models.""" + +from datetime import UTC, datetime, timedelta + +import pytest + +from app.models.memory import ( + ConsolidationStatus, + ConsolidationType, + Episode, + EpisodeOutcome, + Fact, + MemoryConsolidationLog, + Procedure, + ScopeType, + WorkingMemory, +) + + +class TestWorkingMemoryModel: + """Tests for WorkingMemory model.""" + + def test_tablename(self) -> None: + """Test table name is correct.""" + assert WorkingMemory.__tablename__ == "working_memory" + + def test_has_required_columns(self) -> None: + """Test all required columns exist.""" + columns = WorkingMemory.__table__.columns + assert "id" in columns + assert "scope_type" in columns + assert "scope_id" in columns + assert "key" in columns + assert "value" in columns + assert "expires_at" in columns + assert "created_at" in columns + assert "updated_at" in columns + + def test_has_unique_constraint(self) -> None: + """Test unique constraint on scope+key.""" + indexes = {idx.name: idx for idx in WorkingMemory.__table__.indexes} + assert "ix_working_memory_scope_key" in indexes + assert indexes["ix_working_memory_scope_key"].unique + + +class TestEpisodeModel: + """Tests for Episode model.""" + + def test_tablename(self) -> None: + """Test table name is correct.""" + assert Episode.__tablename__ == "episodes" + + def test_has_required_columns(self) -> None: + """Test all required columns exist.""" + columns = Episode.__table__.columns + required = [ + "id", + "project_id", + "agent_instance_id", + "agent_type_id", + "session_id", + "task_type", + "task_description", + "actions", + "context_summary", + "outcome", + "outcome_details", + "duration_seconds", + "tokens_used", + "lessons_learned", + "importance_score", + "embedding", + "occurred_at", + "created_at", + "updated_at", + ] + for col in required: + assert col in columns, f"Missing column: {col}" + + def test_has_foreign_keys(self) -> None: + """Test foreign key relationships exist.""" + columns = Episode.__table__.columns + assert columns["project_id"].foreign_keys + assert columns["agent_instance_id"].foreign_keys + assert columns["agent_type_id"].foreign_keys + + def test_has_relationships(self) -> None: + """Test ORM relationships exist.""" + mapper = Episode.__mapper__ + assert "project" in mapper.relationships + assert "agent_instance" in mapper.relationships + assert "agent_type" in mapper.relationships + + +class TestFactModel: + """Tests for Fact model.""" + + def test_tablename(self) -> None: + """Test table name is correct.""" + assert Fact.__tablename__ == "facts" + + def test_has_required_columns(self) -> None: + """Test all required columns exist.""" + columns = Fact.__table__.columns + required = [ + "id", + "project_id", + "subject", + "predicate", + "object", + "confidence", + "source_episode_ids", + "first_learned", + "last_reinforced", + "reinforcement_count", + "embedding", + "created_at", + "updated_at", + ] + for col in required: + assert col in columns, f"Missing column: {col}" + + def test_project_id_nullable(self) -> None: + """Test project_id is nullable for global facts.""" + columns = Fact.__table__.columns + assert columns["project_id"].nullable + + +class TestProcedureModel: + """Tests for Procedure model.""" + + def test_tablename(self) -> None: + """Test table name is correct.""" + assert Procedure.__tablename__ == "procedures" + + def test_has_required_columns(self) -> None: + """Test all required columns exist.""" + columns = Procedure.__table__.columns + required = [ + "id", + "project_id", + "agent_type_id", + "name", + "trigger_pattern", + "steps", + "success_count", + "failure_count", + "last_used", + "embedding", + "created_at", + "updated_at", + ] + for col in required: + assert col in columns, f"Missing column: {col}" + + def test_success_rate_property(self) -> None: + """Test success_rate calculated property.""" + proc = Procedure() + proc.success_count = 8 + proc.failure_count = 2 + assert proc.success_rate == 0.8 + + def test_success_rate_zero_total(self) -> None: + """Test success_rate with zero total uses.""" + proc = Procedure() + proc.success_count = 0 + proc.failure_count = 0 + assert proc.success_rate == 0.0 + + def test_total_uses_property(self) -> None: + """Test total_uses calculated property.""" + proc = Procedure() + proc.success_count = 5 + proc.failure_count = 3 + assert proc.total_uses == 8 + + +class TestMemoryConsolidationLogModel: + """Tests for MemoryConsolidationLog model.""" + + def test_tablename(self) -> None: + """Test table name is correct.""" + assert MemoryConsolidationLog.__tablename__ == "memory_consolidation_log" + + def test_has_required_columns(self) -> None: + """Test all required columns exist.""" + columns = MemoryConsolidationLog.__table__.columns + required = [ + "id", + "consolidation_type", + "source_count", + "result_count", + "started_at", + "completed_at", + "status", + "error", + "created_at", + "updated_at", + ] + for col in required: + assert col in columns, f"Missing column: {col}" + + def test_duration_seconds_property_completed(self) -> None: + """Test duration_seconds with completed job.""" + log = MemoryConsolidationLog() + log.started_at = datetime.now(UTC) + log.completed_at = log.started_at + timedelta(seconds=10) + assert log.duration_seconds == pytest.approx(10.0) + + def test_duration_seconds_property_incomplete(self) -> None: + """Test duration_seconds with incomplete job.""" + log = MemoryConsolidationLog() + log.started_at = datetime.now(UTC) + log.completed_at = None + assert log.duration_seconds is None + + def test_default_status(self) -> None: + """Test default status is PENDING.""" + columns = MemoryConsolidationLog.__table__.columns + assert columns["status"].default.arg == ConsolidationStatus.PENDING + + +class TestModelExports: + """Tests for model package exports.""" + + def test_all_models_exported(self) -> None: + """Test all models are exported from package.""" + from app.models.memory import ( + Episode, + Fact, + MemoryConsolidationLog, + Procedure, + WorkingMemory, + ) + + # Verify these are the actual classes + assert Episode.__tablename__ == "episodes" + assert Fact.__tablename__ == "facts" + assert Procedure.__tablename__ == "procedures" + assert WorkingMemory.__tablename__ == "working_memory" + assert MemoryConsolidationLog.__tablename__ == "memory_consolidation_log" + + def test_enums_exported(self) -> None: + """Test all enums are exported.""" + assert ScopeType.GLOBAL.value == "global" + assert EpisodeOutcome.SUCCESS.value == "success" + assert ConsolidationType.WORKING_TO_EPISODIC.value == "working_to_episodic" + assert ConsolidationStatus.PENDING.value == "pending"