diff --git a/backend/app/alembic/versions/0003_enable_pgvector_extension.py b/backend/app/alembic/versions/0003_enable_pgvector_extension.py new file mode 100644 index 0000000..5d59a45 --- /dev/null +++ b/backend/app/alembic/versions/0003_enable_pgvector_extension.py @@ -0,0 +1,66 @@ +"""Enable pgvector extension + +Revision ID: 0003 +Revises: 0002 +Create Date: 2025-12-30 + +This migration enables the pgvector extension for PostgreSQL, which provides +vector similarity search capabilities required for the RAG (Retrieval-Augmented +Generation) knowledge base system. + +Vector Dimension Reference (per ADR-008 and SPIKE-006): +--------------------------------------------------------- +The dimension size depends on the embedding model used: + +| Model | Dimensions | Use Case | +|----------------------------|------------|------------------------------| +| text-embedding-3-small | 1536 | General docs, conversations | +| text-embedding-3-large | 256-3072 | High accuracy (configurable) | +| voyage-code-3 | 1024 | Code files (Python, JS, etc) | +| voyage-3-large | 1024 | High quality general purpose | +| nomic-embed-text (Ollama) | 768 | Local/fallback embedding | + +Recommended defaults for Syndarix: +- Documentation/conversations: 1536 (text-embedding-3-small) +- Code files: 1024 (voyage-code-3) + +Prerequisites: +-------------- +This migration requires PostgreSQL with the pgvector extension installed. +The Docker Compose configuration uses `pgvector/pgvector:pg17` which includes +the extension pre-installed. + +References: +----------- +- ADR-008: Knowledge Base and RAG Architecture +- SPIKE-006: Knowledge Base with pgvector for RAG System +- https://github.com/pgvector/pgvector +""" + +from collections.abc import Sequence + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "0003" +down_revision: str | None = "0002" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Enable the pgvector extension. + + The CREATE EXTENSION IF NOT EXISTS statement is idempotent - it will + succeed whether the extension already exists or not. + """ + op.execute("CREATE EXTENSION IF NOT EXISTS vector") + + +def downgrade() -> None: + """Drop the pgvector extension. + + Note: This will fail if any tables with vector columns exist. + Future migrations that create vector columns should be downgraded first. + """ + op.execute("DROP EXTENSION IF EXISTS vector")