syndarix/backend/app/models/memory/fact.py

# app/models/memory/fact.py
"""
Fact database model.

Stores semantic memories - learned facts in subject-predicate-object
triple format with confidence scores and source tracking.
"""

from sqlalchemy import (
    CheckConstraint,
    Column,
    DateTime,
    Float,
    ForeignKey,
    Index,
    Integer,
    String,
    Text,
    text,
)
from sqlalchemy.dialects.postgresql import UUID as PGUUID
from sqlalchemy.orm import relationship
from sqlalchemy.types import JSON

from app.models.base import Base, TimestampMixin, UUIDMixin

# Import pgvector type
try:
    from pgvector.sqlalchemy import Vector  # type: ignore[import-not-found]
except ImportError:
    Vector = None


class Fact(Base, UUIDMixin, TimestampMixin):
    """
    Semantic memory model.

    Stores learned facts as subject-predicate-object triples:
    - "FastAPI" - "uses" - "Starlette framework"
    - "Project Alpha" - "requires" - "OAuth authentication"

    Facts have confidence scores that decay over time and can be
    reinforced when the same fact is learned again.
    """

    __tablename__ = "facts"

    # Scoping: project_id is NULL for global facts
    project_id = Column(
        PGUUID(as_uuid=True),
        ForeignKey("projects.id", ondelete="CASCADE"),
        nullable=True,
        index=True,
    )

    # Triple format
    subject = Column(String(500), nullable=False, index=True)
    predicate = Column(String(255), nullable=False, index=True)
    object = Column(Text, nullable=False)

    # Confidence score (0.0 to 1.0)
    confidence = Column(Float, nullable=False, default=0.8, index=True)

    # Source tracking: which episodes contributed to this fact (stored as JSON array of UUID strings)
    source_episode_ids: Column[list] = Column(JSON, default=list, nullable=False)

    # Learning history
    first_learned = Column(DateTime(timezone=True), nullable=False)
    last_reinforced = Column(DateTime(timezone=True), nullable=False)
    reinforcement_count = Column(Integer, nullable=False, default=1)

    # Vector embedding for semantic search
    embedding = Column(Vector(1536) if Vector else Text, nullable=True)

    # Relationships
    project = relationship("Project", foreign_keys=[project_id])

    __table_args__ = (
        # Unique constraint on triple within project scope
        Index(
            "ix_facts_unique_triple",
            "project_id",
            "subject",
            "predicate",
            "object",
            unique=True,
            postgresql_where=text("project_id IS NOT NULL"),
        ),
        # Query patterns
        Index("ix_facts_subject_predicate", "subject", "predicate"),
        Index("ix_facts_project_subject", "project_id", "subject"),
        Index("ix_facts_confidence_time", "confidence", "last_reinforced"),
        # Note: subject already has index=True on Column definition, no need for explicit index
        # Data integrity constraints
        CheckConstraint(
            "confidence >= 0.0 AND confidence <= 1.0",
            name="ck_facts_confidence_range",
        ),
    )

    def __repr__(self) -> str:
        return (
            f"<Fact {self.id} '{self.subject}' - '{self.predicate}' - "
            f"'{self.object[:50]}...' conf={self.confidence:.2f}>"
        )