feat(memory): add database schema and storage layer (Issue #88)

Add SQLAlchemy models for the Agent Memory System:
- WorkingMemory: Key-value storage with TTL for active sessions
- Episode: Experiential memories from task executions
- Fact: Semantic knowledge triples with confidence scores
- Procedure: Learned skills and procedures with success tracking
- MemoryConsolidationLog: Tracks consolidation jobs between memory tiers

Create enums for memory system:
- ScopeType: global, project, agent_type, agent_instance, session
- EpisodeOutcome: success, failure, partial
- ConsolidationType: working_to_episodic, episodic_to_semantic, etc.
- ConsolidationStatus: pending, running, completed, failed

Add Alembic migration (0005) for all memory tables with:
- Foreign key relationships to projects, agent_instances, agent_types
- Comprehensive indexes for query patterns
- Unique constraints for key lookups and triple uniqueness
- Vector embedding column placeholders (Text fallback until pgvector enabled)

Fix timezone-naive datetime.now() in types.py TaskState (review feedback)

Includes 30 unit tests for models and enums.

Closes #88

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-05 01:37:58 +01:00
parent 085a748929
commit c9d8c0835c
14 changed files with 1383 additions and 7 deletions

View File

@@ -0,0 +1,32 @@
# app/models/memory/__init__.py
"""
Memory System Database Models.
Provides SQLAlchemy models for the Agent Memory System:
- WorkingMemory: Key-value storage with TTL
- Episode: Experiential memories
- Fact: Semantic knowledge triples
- Procedure: Learned skills
- MemoryConsolidationLog: Consolidation job tracking
"""
from .consolidation import MemoryConsolidationLog
from .enums import ConsolidationStatus, ConsolidationType, EpisodeOutcome, ScopeType
from .episode import Episode
from .fact import Fact
from .procedure import Procedure
from .working_memory import WorkingMemory
__all__ = [
# Enums
"ConsolidationStatus",
"ConsolidationType",
# Models
"Episode",
"EpisodeOutcome",
"Fact",
"MemoryConsolidationLog",
"Procedure",
"ScopeType",
"WorkingMemory",
]

View File

@@ -0,0 +1,72 @@
# app/models/memory/consolidation.py
"""
Memory Consolidation Log database model.
Tracks memory consolidation jobs that transfer knowledge
between memory tiers.
"""
from sqlalchemy import Column, DateTime, Enum, Index, Integer, Text
from app.models.base import Base, TimestampMixin, UUIDMixin
from .enums import ConsolidationStatus, ConsolidationType
class MemoryConsolidationLog(Base, UUIDMixin, TimestampMixin):
"""
Memory consolidation job log.
Tracks consolidation operations:
- Working -> Episodic (session end)
- Episodic -> Semantic (fact extraction)
- Episodic -> Procedural (procedure learning)
- Pruning (removing low-value memories)
"""
__tablename__ = "memory_consolidation_log"
# Consolidation type
consolidation_type: Column[ConsolidationType] = Column(
Enum(ConsolidationType),
nullable=False,
index=True,
)
# Counts
source_count = Column(Integer, nullable=False, default=0)
result_count = Column(Integer, nullable=False, default=0)
# Timing
started_at = Column(DateTime(timezone=True), nullable=False)
completed_at = Column(DateTime(timezone=True), nullable=True)
# Status
status: Column[ConsolidationStatus] = Column(
Enum(ConsolidationStatus),
nullable=False,
default=ConsolidationStatus.PENDING,
index=True,
)
# Error details if failed
error = Column(Text, nullable=True)
__table_args__ = (
# Query patterns
Index("ix_consolidation_type_status", "consolidation_type", "status"),
Index("ix_consolidation_started", "started_at"),
)
@property
def duration_seconds(self) -> float | None:
"""Calculate duration of the consolidation job."""
if self.completed_at is None or self.started_at is None:
return None
return (self.completed_at - self.started_at).total_seconds()
def __repr__(self) -> str:
return (
f"<MemoryConsolidationLog {self.id} "
f"type={self.consolidation_type.value} status={self.status.value}>"
)

View File

@@ -0,0 +1,73 @@
# app/models/memory/enums.py
"""
Enums for Memory System database models.
These enums define the database-level constraints for memory types
and scoping levels.
"""
from enum import Enum as PyEnum
class ScopeType(str, PyEnum):
"""
Memory scope levels matching the memory service types.
GLOBAL: System-wide memories accessible by all
PROJECT: Project-scoped memories
AGENT_TYPE: Type-specific memories (shared by instances of same type)
AGENT_INSTANCE: Instance-specific memories
SESSION: Session-scoped ephemeral memories
"""
GLOBAL = "global"
PROJECT = "project"
AGENT_TYPE = "agent_type"
AGENT_INSTANCE = "agent_instance"
SESSION = "session"
class EpisodeOutcome(str, PyEnum):
"""
Outcome of an episode (task execution).
SUCCESS: Task completed successfully
FAILURE: Task failed
PARTIAL: Task partially completed
"""
SUCCESS = "success"
FAILURE = "failure"
PARTIAL = "partial"
class ConsolidationType(str, PyEnum):
"""
Types of memory consolidation operations.
WORKING_TO_EPISODIC: Transfer session state to episodic
EPISODIC_TO_SEMANTIC: Extract facts from episodes
EPISODIC_TO_PROCEDURAL: Extract procedures from episodes
PRUNING: Remove low-value memories
"""
WORKING_TO_EPISODIC = "working_to_episodic"
EPISODIC_TO_SEMANTIC = "episodic_to_semantic"
EPISODIC_TO_PROCEDURAL = "episodic_to_procedural"
PRUNING = "pruning"
class ConsolidationStatus(str, PyEnum):
"""
Status of a consolidation job.
PENDING: Job is queued
RUNNING: Job is currently executing
COMPLETED: Job finished successfully
FAILED: Job failed with errors
"""
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"

View File

@@ -0,0 +1,125 @@
# app/models/memory/episode.py
"""
Episode database model.
Stores experiential memories - records of past task executions
with context, actions, outcomes, and lessons learned.
"""
from sqlalchemy import (
BigInteger,
Column,
DateTime,
Enum,
Float,
ForeignKey,
Index,
String,
Text,
)
from sqlalchemy.dialects.postgresql import (
JSONB,
UUID as PGUUID,
)
from sqlalchemy.orm import relationship
from app.models.base import Base, TimestampMixin, UUIDMixin
from .enums import EpisodeOutcome
# Import pgvector type - will be available after migration enables extension
try:
from pgvector.sqlalchemy import Vector # type: ignore[import-not-found]
except ImportError:
# Fallback for environments without pgvector
Vector = None
class Episode(Base, UUIDMixin, TimestampMixin):
"""
Episodic memory model.
Records experiential memories from agent task execution:
- What task was performed
- What actions were taken
- What was the outcome
- What lessons were learned
"""
__tablename__ = "episodes"
# Foreign keys
project_id = Column(
PGUUID(as_uuid=True),
ForeignKey("projects.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
agent_instance_id = Column(
PGUUID(as_uuid=True),
ForeignKey("agent_instances.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
agent_type_id = Column(
PGUUID(as_uuid=True),
ForeignKey("agent_types.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
# Session reference
session_id = Column(String(255), nullable=False, index=True)
# Task information
task_type = Column(String(100), nullable=False, index=True)
task_description = Column(Text, nullable=False)
# Actions taken (list of action dictionaries)
actions = Column(JSONB, default=list, nullable=False)
# Context summary
context_summary = Column(Text, nullable=False)
# Outcome
outcome: Column[EpisodeOutcome] = Column(
Enum(EpisodeOutcome),
nullable=False,
index=True,
)
outcome_details = Column(Text, nullable=True)
# Metrics
duration_seconds = Column(Float, nullable=False, default=0.0)
tokens_used = Column(BigInteger, nullable=False, default=0)
# Learning
lessons_learned = Column(JSONB, default=list, nullable=False)
importance_score = Column(Float, nullable=False, default=0.5, index=True)
# Vector embedding for semantic search
# Using 1536 dimensions for OpenAI text-embedding-3-small
embedding = Column(Vector(1536) if Vector else Text, nullable=True)
# When the episode occurred
occurred_at = Column(DateTime(timezone=True), nullable=False, index=True)
# Relationships
project = relationship("Project", foreign_keys=[project_id])
agent_instance = relationship("AgentInstance", foreign_keys=[agent_instance_id])
agent_type = relationship("AgentType", foreign_keys=[agent_type_id])
__table_args__ = (
# Primary query patterns
Index("ix_episodes_project_task", "project_id", "task_type"),
Index("ix_episodes_project_outcome", "project_id", "outcome"),
Index("ix_episodes_agent_task", "agent_instance_id", "task_type"),
Index("ix_episodes_project_time", "project_id", "occurred_at"),
# For importance-based pruning
Index("ix_episodes_importance_time", "importance_score", "occurred_at"),
)
def __repr__(self) -> str:
return f"<Episode {self.id} task={self.task_type} outcome={self.outcome.value}>"

View File

@@ -0,0 +1,103 @@
# app/models/memory/fact.py
"""
Fact database model.
Stores semantic memories - learned facts in subject-predicate-object
triple format with confidence scores and source tracking.
"""
from sqlalchemy import (
Column,
DateTime,
Float,
ForeignKey,
Index,
Integer,
String,
Text,
)
from sqlalchemy.dialects.postgresql import (
ARRAY,
UUID as PGUUID,
)
from sqlalchemy.orm import relationship
from app.models.base import Base, TimestampMixin, UUIDMixin
# Import pgvector type
try:
from pgvector.sqlalchemy import Vector # type: ignore[import-not-found]
except ImportError:
Vector = None
class Fact(Base, UUIDMixin, TimestampMixin):
"""
Semantic memory model.
Stores learned facts as subject-predicate-object triples:
- "FastAPI" - "uses" - "Starlette framework"
- "Project Alpha" - "requires" - "OAuth authentication"
Facts have confidence scores that decay over time and can be
reinforced when the same fact is learned again.
"""
__tablename__ = "facts"
# Scoping: project_id is NULL for global facts
project_id = Column(
PGUUID(as_uuid=True),
ForeignKey("projects.id", ondelete="CASCADE"),
nullable=True,
index=True,
)
# Triple format
subject = Column(String(500), nullable=False, index=True)
predicate = Column(String(255), nullable=False, index=True)
object = Column(Text, nullable=False)
# Confidence score (0.0 to 1.0)
confidence = Column(Float, nullable=False, default=0.8, index=True)
# Source tracking: which episodes contributed to this fact
source_episode_ids: Column[list] = Column(
ARRAY(PGUUID(as_uuid=True)), default=list, nullable=False
)
# Learning history
first_learned = Column(DateTime(timezone=True), nullable=False)
last_reinforced = Column(DateTime(timezone=True), nullable=False)
reinforcement_count = Column(Integer, nullable=False, default=1)
# Vector embedding for semantic search
embedding = Column(Vector(1536) if Vector else Text, nullable=True)
# Relationships
project = relationship("Project", foreign_keys=[project_id])
__table_args__ = (
# Unique constraint on triple within project scope
Index(
"ix_facts_unique_triple",
"project_id",
"subject",
"predicate",
"object",
unique=True,
postgresql_where="project_id IS NOT NULL",
),
# Query patterns
Index("ix_facts_subject_predicate", "subject", "predicate"),
Index("ix_facts_project_subject", "project_id", "subject"),
Index("ix_facts_confidence_time", "confidence", "last_reinforced"),
# For finding facts by entity (subject or object)
Index("ix_facts_subject", "subject"),
)
def __repr__(self) -> str:
return (
f"<Fact {self.id} '{self.subject}' - '{self.predicate}' - "
f"'{self.object[:50]}...' conf={self.confidence:.2f}>"
)

View File

@@ -0,0 +1,115 @@
# app/models/memory/procedure.py
"""
Procedure database model.
Stores procedural memories - learned skills and procedures
derived from successful task execution patterns.
"""
from sqlalchemy import (
Column,
DateTime,
ForeignKey,
Index,
Integer,
String,
Text,
)
from sqlalchemy.dialects.postgresql import (
JSONB,
UUID as PGUUID,
)
from sqlalchemy.orm import relationship
from app.models.base import Base, TimestampMixin, UUIDMixin
# Import pgvector type
try:
from pgvector.sqlalchemy import Vector # type: ignore[import-not-found]
except ImportError:
Vector = None
class Procedure(Base, UUIDMixin, TimestampMixin):
"""
Procedural memory model.
Stores learned procedures (skills) extracted from successful
task execution patterns:
- Name and trigger pattern for matching
- Step-by-step actions
- Success/failure tracking
"""
__tablename__ = "procedures"
# Scoping
project_id = Column(
PGUUID(as_uuid=True),
ForeignKey("projects.id", ondelete="CASCADE"),
nullable=True,
index=True,
)
agent_type_id = Column(
PGUUID(as_uuid=True),
ForeignKey("agent_types.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
# Procedure identification
name = Column(String(255), nullable=False, index=True)
trigger_pattern = Column(Text, nullable=False)
# Steps as JSON array of step objects
# Each step: {order, action, parameters, expected_outcome, fallback_action}
steps = Column(JSONB, default=list, nullable=False)
# Success tracking
success_count = Column(Integer, nullable=False, default=0)
failure_count = Column(Integer, nullable=False, default=0)
# Usage tracking
last_used = Column(DateTime(timezone=True), nullable=True, index=True)
# Vector embedding for semantic matching
embedding = Column(Vector(1536) if Vector else Text, nullable=True)
# Relationships
project = relationship("Project", foreign_keys=[project_id])
agent_type = relationship("AgentType", foreign_keys=[agent_type_id])
__table_args__ = (
# Unique procedure name within scope
Index(
"ix_procedures_unique_name",
"project_id",
"agent_type_id",
"name",
unique=True,
),
# Query patterns
Index("ix_procedures_project_name", "project_id", "name"),
Index("ix_procedures_agent_type", "agent_type_id"),
# For finding best procedures
Index("ix_procedures_success_rate", "success_count", "failure_count"),
)
@property
def success_rate(self) -> float:
"""Calculate the success rate of this procedure."""
total = self.success_count + self.failure_count
if total == 0:
return 0.0
return self.success_count / total
@property
def total_uses(self) -> int:
"""Get total number of times this procedure was used."""
return self.success_count + self.failure_count
def __repr__(self) -> str:
return (
f"<Procedure {self.name} ({self.id}) success_rate={self.success_rate:.2%}>"
)

View File

@@ -0,0 +1,58 @@
# app/models/memory/working_memory.py
"""
Working Memory database model.
Stores ephemeral key-value data for active sessions with TTL support.
Used as database backup when Redis is unavailable.
"""
from sqlalchemy import Column, DateTime, Enum, Index, String
from sqlalchemy.dialects.postgresql import JSONB
from app.models.base import Base, TimestampMixin, UUIDMixin
from .enums import ScopeType
class WorkingMemory(Base, UUIDMixin, TimestampMixin):
"""
Working memory storage table.
Provides database-backed working memory as fallback when
Redis is unavailable. Supports TTL-based expiration.
"""
__tablename__ = "working_memory"
# Scoping
scope_type: Column[ScopeType] = Column(
Enum(ScopeType),
nullable=False,
index=True,
)
scope_id = Column(String(255), nullable=False, index=True)
# Key-value storage
key = Column(String(255), nullable=False)
value = Column(JSONB, nullable=False)
# TTL support
expires_at = Column(DateTime(timezone=True), nullable=True, index=True)
__table_args__ = (
# Primary lookup: scope + key
Index(
"ix_working_memory_scope_key",
"scope_type",
"scope_id",
"key",
unique=True,
),
# For cleanup of expired entries
Index("ix_working_memory_expires", "expires_at"),
# For listing all keys in a scope
Index("ix_working_memory_scope_list", "scope_type", "scope_id"),
)
def __repr__(self) -> str:
return f"<WorkingMemory {self.scope_type.value}:{self.scope_id}:{self.key}>"