forked from cardosofelipe/fast-next-template
feat(memory): implement memory indexing and retrieval engine (#94)
Add comprehensive indexing and retrieval system for memory search: - VectorIndex for semantic similarity search using cosine similarity - TemporalIndex for time-based queries with range and recency support - EntityIndex for entity-based lookups with multi-entity intersection - OutcomeIndex for success/failure filtering on episodes - MemoryIndexer as unified interface for all index types - RetrievalEngine with hybrid search combining all indices - RelevanceScorer for multi-signal relevance scoring - RetrievalCache for LRU caching of search results 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
497
backend/tests/unit/services/memory/indexing/test_index.py
Normal file
497
backend/tests/unit/services/memory/indexing/test_index.py
Normal file
@@ -0,0 +1,497 @@
|
||||
# tests/unit/services/memory/indexing/test_index.py
|
||||
"""Unit tests for memory indexing."""
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from app.services.memory.indexing.index import (
|
||||
EntityIndex,
|
||||
MemoryIndexer,
|
||||
OutcomeIndex,
|
||||
TemporalIndex,
|
||||
VectorIndex,
|
||||
get_memory_indexer,
|
||||
)
|
||||
from app.services.memory.types import Episode, Fact, MemoryType, Outcome, Procedure
|
||||
|
||||
|
||||
def _utcnow() -> datetime:
|
||||
"""Get current UTC time."""
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
def make_episode(
|
||||
embedding: list[float] | None = None,
|
||||
outcome: Outcome = Outcome.SUCCESS,
|
||||
occurred_at: datetime | None = None,
|
||||
) -> Episode:
|
||||
"""Create a test episode."""
|
||||
return Episode(
|
||||
id=uuid4(),
|
||||
project_id=uuid4(),
|
||||
agent_instance_id=uuid4(),
|
||||
agent_type_id=uuid4(),
|
||||
session_id="test-session",
|
||||
task_type="test_task",
|
||||
task_description="Test task description",
|
||||
actions=[{"action": "test"}],
|
||||
context_summary="Test context",
|
||||
outcome=outcome,
|
||||
outcome_details="Test outcome",
|
||||
duration_seconds=10.0,
|
||||
tokens_used=100,
|
||||
lessons_learned=["lesson1"],
|
||||
importance_score=0.8,
|
||||
embedding=embedding,
|
||||
occurred_at=occurred_at or _utcnow(),
|
||||
created_at=_utcnow(),
|
||||
updated_at=_utcnow(),
|
||||
)
|
||||
|
||||
|
||||
def make_fact(
|
||||
embedding: list[float] | None = None,
|
||||
subject: str = "test_subject",
|
||||
predicate: str = "has_property",
|
||||
obj: str = "test_value",
|
||||
) -> Fact:
|
||||
"""Create a test fact."""
|
||||
return Fact(
|
||||
id=uuid4(),
|
||||
project_id=uuid4(),
|
||||
subject=subject,
|
||||
predicate=predicate,
|
||||
object=obj,
|
||||
confidence=0.9,
|
||||
source_episode_ids=[uuid4()],
|
||||
first_learned=_utcnow(),
|
||||
last_reinforced=_utcnow(),
|
||||
reinforcement_count=1,
|
||||
embedding=embedding,
|
||||
created_at=_utcnow(),
|
||||
updated_at=_utcnow(),
|
||||
)
|
||||
|
||||
|
||||
def make_procedure(
|
||||
embedding: list[float] | None = None,
|
||||
success_count: int = 8,
|
||||
failure_count: int = 2,
|
||||
) -> Procedure:
|
||||
"""Create a test procedure."""
|
||||
return Procedure(
|
||||
id=uuid4(),
|
||||
project_id=uuid4(),
|
||||
agent_type_id=uuid4(),
|
||||
name="test_procedure",
|
||||
trigger_pattern="test.*",
|
||||
steps=[{"step": 1, "action": "test"}],
|
||||
success_count=success_count,
|
||||
failure_count=failure_count,
|
||||
last_used=_utcnow(),
|
||||
embedding=embedding,
|
||||
created_at=_utcnow(),
|
||||
updated_at=_utcnow(),
|
||||
)
|
||||
|
||||
|
||||
class TestVectorIndex:
|
||||
"""Tests for VectorIndex."""
|
||||
|
||||
@pytest.fixture
|
||||
def index(self) -> VectorIndex[Episode]:
|
||||
"""Create a vector index."""
|
||||
return VectorIndex[Episode](dimension=4)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_item(self, index: VectorIndex[Episode]) -> None:
|
||||
"""Test adding an item to the index."""
|
||||
episode = make_episode(embedding=[1.0, 0.0, 0.0, 0.0])
|
||||
|
||||
entry = await index.add(episode)
|
||||
|
||||
assert entry.memory_id == episode.id
|
||||
assert entry.memory_type == MemoryType.EPISODIC
|
||||
assert entry.dimension == 4
|
||||
assert await index.count() == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_remove_item(self, index: VectorIndex[Episode]) -> None:
|
||||
"""Test removing an item from the index."""
|
||||
episode = make_episode(embedding=[1.0, 0.0, 0.0, 0.0])
|
||||
await index.add(episode)
|
||||
|
||||
result = await index.remove(episode.id)
|
||||
|
||||
assert result is True
|
||||
assert await index.count() == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_remove_nonexistent(self, index: VectorIndex[Episode]) -> None:
|
||||
"""Test removing a nonexistent item."""
|
||||
result = await index.remove(uuid4())
|
||||
assert result is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_similar(self, index: VectorIndex[Episode]) -> None:
|
||||
"""Test searching for similar items."""
|
||||
# Add items with different embeddings
|
||||
e1 = make_episode(embedding=[1.0, 0.0, 0.0, 0.0])
|
||||
e2 = make_episode(embedding=[0.9, 0.1, 0.0, 0.0])
|
||||
e3 = make_episode(embedding=[0.0, 1.0, 0.0, 0.0])
|
||||
|
||||
await index.add(e1)
|
||||
await index.add(e2)
|
||||
await index.add(e3)
|
||||
|
||||
# Search for similar to first
|
||||
results = await index.search([1.0, 0.0, 0.0, 0.0], limit=2)
|
||||
|
||||
assert len(results) == 2
|
||||
# First result should be most similar
|
||||
assert results[0].memory_id == e1.id
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_min_similarity(self, index: VectorIndex[Episode]) -> None:
|
||||
"""Test minimum similarity threshold."""
|
||||
e1 = make_episode(embedding=[1.0, 0.0, 0.0, 0.0])
|
||||
e2 = make_episode(embedding=[0.0, 1.0, 0.0, 0.0]) # Orthogonal
|
||||
|
||||
await index.add(e1)
|
||||
await index.add(e2)
|
||||
|
||||
# Search with high threshold
|
||||
results = await index.search([1.0, 0.0, 0.0, 0.0], min_similarity=0.9)
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0].memory_id == e1.id
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_empty_query(self, index: VectorIndex[Episode]) -> None:
|
||||
"""Test search with empty query."""
|
||||
e1 = make_episode(embedding=[1.0, 0.0, 0.0, 0.0])
|
||||
await index.add(e1)
|
||||
|
||||
results = await index.search([], limit=10)
|
||||
assert len(results) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_clear(self, index: VectorIndex[Episode]) -> None:
|
||||
"""Test clearing the index."""
|
||||
await index.add(make_episode(embedding=[1.0, 0.0, 0.0, 0.0]))
|
||||
await index.add(make_episode(embedding=[0.0, 1.0, 0.0, 0.0]))
|
||||
|
||||
count = await index.clear()
|
||||
|
||||
assert count == 2
|
||||
assert await index.count() == 0
|
||||
|
||||
|
||||
class TestTemporalIndex:
|
||||
"""Tests for TemporalIndex."""
|
||||
|
||||
@pytest.fixture
|
||||
def index(self) -> TemporalIndex[Episode]:
|
||||
"""Create a temporal index."""
|
||||
return TemporalIndex[Episode]()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_item(self, index: TemporalIndex[Episode]) -> None:
|
||||
"""Test adding an item."""
|
||||
episode = make_episode()
|
||||
entry = await index.add(episode)
|
||||
|
||||
assert entry.memory_id == episode.id
|
||||
assert await index.count() == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_by_time_range(self, index: TemporalIndex[Episode]) -> None:
|
||||
"""Test searching by time range."""
|
||||
now = _utcnow()
|
||||
old = make_episode(occurred_at=now - timedelta(hours=2))
|
||||
recent = make_episode(occurred_at=now - timedelta(hours=1))
|
||||
newest = make_episode(occurred_at=now)
|
||||
|
||||
await index.add(old)
|
||||
await index.add(recent)
|
||||
await index.add(newest)
|
||||
|
||||
# Search last hour
|
||||
results = await index.search(
|
||||
query=None,
|
||||
start_time=now - timedelta(hours=1, minutes=30),
|
||||
end_time=now,
|
||||
)
|
||||
|
||||
assert len(results) == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_recent(self, index: TemporalIndex[Episode]) -> None:
|
||||
"""Test searching for recent items."""
|
||||
now = _utcnow()
|
||||
old = make_episode(occurred_at=now - timedelta(hours=2))
|
||||
recent = make_episode(occurred_at=now - timedelta(minutes=30))
|
||||
|
||||
await index.add(old)
|
||||
await index.add(recent)
|
||||
|
||||
# Search last hour (3600 seconds)
|
||||
results = await index.search(query=None, recent_seconds=3600)
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0].memory_id == recent.id
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_order(self, index: TemporalIndex[Episode]) -> None:
|
||||
"""Test result ordering."""
|
||||
now = _utcnow()
|
||||
e1 = make_episode(occurred_at=now - timedelta(hours=2))
|
||||
e2 = make_episode(occurred_at=now - timedelta(hours=1))
|
||||
e3 = make_episode(occurred_at=now)
|
||||
|
||||
await index.add(e1)
|
||||
await index.add(e2)
|
||||
await index.add(e3)
|
||||
|
||||
# Descending order (newest first)
|
||||
results_desc = await index.search(query=None, order="desc", limit=10)
|
||||
assert results_desc[0].memory_id == e3.id
|
||||
|
||||
# Ascending order (oldest first)
|
||||
results_asc = await index.search(query=None, order="asc", limit=10)
|
||||
assert results_asc[0].memory_id == e1.id
|
||||
|
||||
|
||||
class TestEntityIndex:
|
||||
"""Tests for EntityIndex."""
|
||||
|
||||
@pytest.fixture
|
||||
def index(self) -> EntityIndex[Fact]:
|
||||
"""Create an entity index."""
|
||||
return EntityIndex[Fact]()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_item(self, index: EntityIndex[Fact]) -> None:
|
||||
"""Test adding an item."""
|
||||
fact = make_fact(subject="user", obj="admin")
|
||||
entry = await index.add(fact)
|
||||
|
||||
assert entry.memory_id == fact.id
|
||||
assert await index.count() == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_by_entity(self, index: EntityIndex[Fact]) -> None:
|
||||
"""Test searching by entity."""
|
||||
f1 = make_fact(subject="user", obj="admin")
|
||||
f2 = make_fact(subject="system", obj="config")
|
||||
|
||||
await index.add(f1)
|
||||
await index.add(f2)
|
||||
|
||||
results = await index.search(
|
||||
query=None,
|
||||
entity_type="subject",
|
||||
entity_value="user",
|
||||
)
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0].memory_id == f1.id
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_multiple_entities(self, index: EntityIndex[Fact]) -> None:
|
||||
"""Test searching with multiple entities."""
|
||||
f1 = make_fact(subject="user", obj="admin")
|
||||
f2 = make_fact(subject="user", obj="guest")
|
||||
|
||||
await index.add(f1)
|
||||
await index.add(f2)
|
||||
|
||||
# Search for facts about "user" subject
|
||||
results = await index.search(
|
||||
query=None,
|
||||
entities=[("subject", "user")],
|
||||
)
|
||||
|
||||
assert len(results) == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_match_all(self, index: EntityIndex[Fact]) -> None:
|
||||
"""Test matching all entities."""
|
||||
f1 = make_fact(subject="user", obj="admin")
|
||||
f2 = make_fact(subject="user", obj="guest")
|
||||
|
||||
await index.add(f1)
|
||||
await index.add(f2)
|
||||
|
||||
# Search for user+admin (match all)
|
||||
results = await index.search(
|
||||
query=None,
|
||||
entities=[("subject", "user"), ("object", "admin")],
|
||||
match_all=True,
|
||||
)
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0].memory_id == f1.id
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_entities(self, index: EntityIndex[Fact]) -> None:
|
||||
"""Test getting entities for a memory."""
|
||||
fact = make_fact(subject="user", obj="admin")
|
||||
await index.add(fact)
|
||||
|
||||
entities = await index.get_entities(fact.id)
|
||||
|
||||
assert ("subject", "user") in entities
|
||||
assert ("object", "admin") in entities
|
||||
|
||||
|
||||
class TestOutcomeIndex:
|
||||
"""Tests for OutcomeIndex."""
|
||||
|
||||
@pytest.fixture
|
||||
def index(self) -> OutcomeIndex[Episode]:
|
||||
"""Create an outcome index."""
|
||||
return OutcomeIndex[Episode]()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_item(self, index: OutcomeIndex[Episode]) -> None:
|
||||
"""Test adding an item."""
|
||||
episode = make_episode(outcome=Outcome.SUCCESS)
|
||||
entry = await index.add(episode)
|
||||
|
||||
assert entry.memory_id == episode.id
|
||||
assert entry.outcome == Outcome.SUCCESS
|
||||
assert await index.count() == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_by_outcome(self, index: OutcomeIndex[Episode]) -> None:
|
||||
"""Test searching by outcome."""
|
||||
success = make_episode(outcome=Outcome.SUCCESS)
|
||||
failure = make_episode(outcome=Outcome.FAILURE)
|
||||
|
||||
await index.add(success)
|
||||
await index.add(failure)
|
||||
|
||||
results = await index.search(query=None, outcome=Outcome.SUCCESS)
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0].memory_id == success.id
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_multiple_outcomes(self, index: OutcomeIndex[Episode]) -> None:
|
||||
"""Test searching with multiple outcomes."""
|
||||
success = make_episode(outcome=Outcome.SUCCESS)
|
||||
partial = make_episode(outcome=Outcome.PARTIAL)
|
||||
failure = make_episode(outcome=Outcome.FAILURE)
|
||||
|
||||
await index.add(success)
|
||||
await index.add(partial)
|
||||
await index.add(failure)
|
||||
|
||||
results = await index.search(
|
||||
query=None,
|
||||
outcomes=[Outcome.SUCCESS, Outcome.PARTIAL],
|
||||
)
|
||||
|
||||
assert len(results) == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_outcome_stats(self, index: OutcomeIndex[Episode]) -> None:
|
||||
"""Test getting outcome statistics."""
|
||||
await index.add(make_episode(outcome=Outcome.SUCCESS))
|
||||
await index.add(make_episode(outcome=Outcome.SUCCESS))
|
||||
await index.add(make_episode(outcome=Outcome.FAILURE))
|
||||
|
||||
stats = await index.get_outcome_stats()
|
||||
|
||||
assert stats[Outcome.SUCCESS] == 2
|
||||
assert stats[Outcome.FAILURE] == 1
|
||||
assert stats[Outcome.PARTIAL] == 0
|
||||
|
||||
|
||||
class TestMemoryIndexer:
|
||||
"""Tests for MemoryIndexer."""
|
||||
|
||||
@pytest.fixture
|
||||
def indexer(self) -> MemoryIndexer:
|
||||
"""Create a memory indexer."""
|
||||
return MemoryIndexer()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_episode(self, indexer: MemoryIndexer) -> None:
|
||||
"""Test indexing an episode."""
|
||||
episode = make_episode(embedding=[1.0, 0.0, 0.0, 0.0])
|
||||
|
||||
results = await indexer.index(episode)
|
||||
|
||||
assert "vector" in results
|
||||
assert "temporal" in results
|
||||
assert "entity" in results
|
||||
assert "outcome" in results
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_fact(self, indexer: MemoryIndexer) -> None:
|
||||
"""Test indexing a fact."""
|
||||
fact = make_fact(embedding=[1.0, 0.0, 0.0, 0.0])
|
||||
|
||||
results = await indexer.index(fact)
|
||||
|
||||
# Facts don't have outcomes
|
||||
assert "vector" in results
|
||||
assert "temporal" in results
|
||||
assert "entity" in results
|
||||
assert "outcome" not in results
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_remove_from_all(self, indexer: MemoryIndexer) -> None:
|
||||
"""Test removing from all indices."""
|
||||
episode = make_episode(embedding=[1.0, 0.0, 0.0, 0.0])
|
||||
await indexer.index(episode)
|
||||
|
||||
results = await indexer.remove(episode.id)
|
||||
|
||||
assert results["vector"] is True
|
||||
assert results["temporal"] is True
|
||||
assert results["entity"] is True
|
||||
assert results["outcome"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_clear_all(self, indexer: MemoryIndexer) -> None:
|
||||
"""Test clearing all indices."""
|
||||
await indexer.index(make_episode(embedding=[1.0, 0.0, 0.0, 0.0]))
|
||||
await indexer.index(make_episode(embedding=[0.0, 1.0, 0.0, 0.0]))
|
||||
|
||||
counts = await indexer.clear_all()
|
||||
|
||||
assert counts["vector"] == 2
|
||||
assert counts["temporal"] == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_stats(self, indexer: MemoryIndexer) -> None:
|
||||
"""Test getting index statistics."""
|
||||
await indexer.index(make_episode(embedding=[1.0, 0.0, 0.0, 0.0]))
|
||||
|
||||
stats = await indexer.get_stats()
|
||||
|
||||
assert stats["vector"] == 1
|
||||
assert stats["temporal"] == 1
|
||||
assert stats["entity"] == 1
|
||||
assert stats["outcome"] == 1
|
||||
|
||||
|
||||
class TestGetMemoryIndexer:
|
||||
"""Tests for singleton getter."""
|
||||
|
||||
def test_returns_instance(self) -> None:
|
||||
"""Test that getter returns instance."""
|
||||
indexer = get_memory_indexer()
|
||||
assert indexer is not None
|
||||
assert isinstance(indexer, MemoryIndexer)
|
||||
|
||||
def test_returns_same_instance(self) -> None:
|
||||
"""Test that getter returns same instance."""
|
||||
indexer1 = get_memory_indexer()
|
||||
indexer2 = get_memory_indexer()
|
||||
assert indexer1 is indexer2
|
||||
Reference in New Issue
Block a user