feat(memory): add memory consolidation task and switch source_episode_ids to JSON

- Added `memory_consolidation` to the task list and updated `__all__` in test files.
- Updated `source_episode_ids` in `Fact` model to use JSON for cross-database compatibility.
- Revised related database migrations to use JSONB instead of ARRAY.
- Adjusted test concurrency in Makefile for improved test performance.
This commit is contained in:
2026-01-05 15:38:52 +01:00
parent cf6291ac8e
commit b232298c61
4 changed files with 18 additions and 14 deletions

View File

@@ -80,7 +80,7 @@ test:
test-cov:
@echo "🧪 Running tests with coverage..."
@IS_TEST=True PYTHONPATH=. uv run pytest --cov=app --cov-report=term-missing --cov-report=html -n 16
@IS_TEST=True PYTHONPATH=. uv run pytest --cov=app --cov-report=term-missing --cov-report=html -n 20
@echo "📊 Coverage report generated in htmlcov/index.html"
# ============================================================================

View File

@@ -247,11 +247,12 @@ def upgrade() -> None:
sa.Column("predicate", sa.String(255), nullable=False),
sa.Column("object", sa.Text(), nullable=False),
sa.Column("confidence", sa.Float(), nullable=False, server_default="0.8"),
# Source episode IDs stored as JSON array of UUID strings for cross-db compatibility
sa.Column(
"source_episode_ids",
postgresql.ARRAY(postgresql.UUID(as_uuid=True)),
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
server_default="{}",
server_default="[]",
),
sa.Column("first_learned", sa.DateTime(timezone=True), nullable=False),
sa.Column("last_reinforced", sa.DateTime(timezone=True), nullable=False),

View File

@@ -18,11 +18,9 @@ from sqlalchemy import (
Text,
text,
)
from sqlalchemy.dialects.postgresql import (
ARRAY,
UUID as PGUUID,
)
from sqlalchemy.dialects.postgresql import UUID as PGUUID
from sqlalchemy.orm import relationship
from sqlalchemy.types import JSON
from app.models.base import Base, TimestampMixin, UUIDMixin
@@ -63,10 +61,8 @@ class Fact(Base, UUIDMixin, TimestampMixin):
# Confidence score (0.0 to 1.0)
confidence = Column(Float, nullable=False, default=0.8, index=True)
# Source tracking: which episodes contributed to this fact
source_episode_ids: Column[list] = Column(
ARRAY(PGUUID(as_uuid=True)), default=list, nullable=False
)
# Source tracking: which episodes contributed to this fact (stored as JSON array of UUID strings)
source_episode_ids: Column[list] = Column(JSON, default=list, nullable=False)
# Learning history
first_learned = Column(DateTime(timezone=True), nullable=False)
@@ -94,8 +90,7 @@ class Fact(Base, UUIDMixin, TimestampMixin):
Index("ix_facts_subject_predicate", "subject", "predicate"),
Index("ix_facts_project_subject", "project_id", "subject"),
Index("ix_facts_confidence_time", "confidence", "last_reinforced"),
# For finding facts by entity (subject or object)
Index("ix_facts_subject", "subject"),
# Note: subject already has index=True on Column definition, no need for explicit index
# Data integrity constraints
CheckConstraint(
"confidence >= 0.0 AND confidence <= 1.0",

View File

@@ -304,10 +304,18 @@ class TestTaskModuleExports:
assert hasattr(tasks, "sync")
assert hasattr(tasks, "workflow")
assert hasattr(tasks, "cost")
assert hasattr(tasks, "memory_consolidation")
def test_tasks_all_attribute_is_correct(self):
"""Test that __all__ contains all expected module names."""
from app import tasks
expected_modules = ["agent", "git", "sync", "workflow", "cost"]
expected_modules = [
"agent",
"git",
"sync",
"workflow",
"cost",
"memory_consolidation",
]
assert set(tasks.__all__) == set(expected_modules)