feat(memory): add memory consolidation task and switch source_episode_ids to JSON

- Added `memory_consolidation` to the task list and updated `__all__` in test files. - Updated `source_episode_ids` in `Fact` model to use JSON for cross-database compatibility. - Revised related database migrations to use JSONB instead of ARRAY. - Adjusted test concurrency in Makefile for improved test performance.
2026-01-05 15:38:52 +01:00
parent cf6291ac8e
commit b232298c61
4 changed files with 18 additions and 14 deletions
--- a/backend/Makefile
+++ b/backend/Makefile
@@ -80,7 +80,7 @@ test:

 test-cov:
 	@echo "🧪 Running tests with coverage..."
-	@IS_TEST=True PYTHONPATH=. uv run pytest --cov=app --cov-report=term-missing --cov-report=html -n 16
+	@IS_TEST=True PYTHONPATH=. uv run pytest --cov=app --cov-report=term-missing --cov-report=html -n 20
 	@echo "📊 Coverage report generated in htmlcov/index.html"

 # ============================================================================
--- a/backend/app/alembic/versions/0005_add_memory_system_tables.py
+++ b/backend/app/alembic/versions/0005_add_memory_system_tables.py
@@ -247,11 +247,12 @@ def upgrade() -> None:
        sa.Column("predicate", sa.String(255), nullable=False),
        sa.Column("object", sa.Text(), nullable=False),
        sa.Column("confidence", sa.Float(), nullable=False, server_default="0.8"),
+        # Source episode IDs stored as JSON array of UUID strings for cross-db compatibility
        sa.Column(
            "source_episode_ids",
-            postgresql.ARRAY(postgresql.UUID(as_uuid=True)),
+            postgresql.JSONB(astext_type=sa.Text()),
            nullable=False,
-            server_default="{}",
+            server_default="[]",
        ),
        sa.Column("first_learned", sa.DateTime(timezone=True), nullable=False),
        sa.Column("last_reinforced", sa.DateTime(timezone=True), nullable=False),
--- a/backend/app/models/memory/fact.py
+++ b/backend/app/models/memory/fact.py
@@ -18,11 +18,9 @@ from sqlalchemy import (
    Text,
    text,
 )
-from sqlalchemy.dialects.postgresql import (
-    ARRAY,
-    UUID as PGUUID,
-)
+from sqlalchemy.dialects.postgresql import UUID as PGUUID
 from sqlalchemy.orm import relationship
+from sqlalchemy.types import JSON

 from app.models.base import Base, TimestampMixin, UUIDMixin

@@ -63,10 +61,8 @@ class Fact(Base, UUIDMixin, TimestampMixin):
    # Confidence score (0.0 to 1.0)
    confidence = Column(Float, nullable=False, default=0.8, index=True)

-    # Source tracking: which episodes contributed to this fact
-    source_episode_ids: Column[list] = Column(
-        ARRAY(PGUUID(as_uuid=True)), default=list, nullable=False
-    )
+    # Source tracking: which episodes contributed to this fact (stored as JSON array of UUID strings)
+    source_episode_ids: Column[list] = Column(JSON, default=list, nullable=False)

    # Learning history
    first_learned = Column(DateTime(timezone=True), nullable=False)
@@ -94,8 +90,7 @@ class Fact(Base, UUIDMixin, TimestampMixin):
        Index("ix_facts_subject_predicate", "subject", "predicate"),
        Index("ix_facts_project_subject", "project_id", "subject"),
        Index("ix_facts_confidence_time", "confidence", "last_reinforced"),
-        # For finding facts by entity (subject or object)
-        Index("ix_facts_subject", "subject"),
+        # Note: subject already has index=True on Column definition, no need for explicit index
        # Data integrity constraints
        CheckConstraint(
            "confidence >= 0.0 AND confidence <= 1.0",
--- a/backend/tests/tasks/test_celery_config.py
+++ b/backend/tests/tasks/test_celery_config.py
@@ -304,10 +304,18 @@ class TestTaskModuleExports:
        assert hasattr(tasks, "sync")
        assert hasattr(tasks, "workflow")
        assert hasattr(tasks, "cost")
+        assert hasattr(tasks, "memory_consolidation")

    def test_tasks_all_attribute_is_correct(self):
        """Test that __all__ contains all expected module names."""
        from app import tasks

-        expected_modules = ["agent", "git", "sync", "workflow", "cost"]
+        expected_modules = [
+            "agent",
+            "git",
+            "sync",
+            "workflow",
+            "cost",
+            "memory_consolidation",
+        ]
        assert set(tasks.__all__) == set(expected_modules)