chore(context): refactor for consistency, optimize formatting, and simplify logic
- Cleaned up unnecessary comments in `__all__` definitions for better readability. - Adjusted indentation and formatting across modules for improved clarity (e.g., long lines, logical grouping). - Simplified conditional expressions and inline comments for context scoring and ranking. - Replaced some hard-coded values with type-safe annotations (e.g., `ClassVar`). - Removed unused imports and ensured consistent usage across test files. - Updated `test_score_not_cached_on_context` to clarify caching behavior. - Improved truncation strategy logic and marker handling.
This commit is contained in:
@@ -6,7 +6,6 @@ from unittest.mock import AsyncMock, MagicMock
|
||||
import pytest
|
||||
|
||||
from app.services.context.scoring import (
|
||||
BaseScorer,
|
||||
CompositeScorer,
|
||||
PriorityScorer,
|
||||
RecencyScorer,
|
||||
@@ -149,15 +148,9 @@ class TestRelevanceScorer:
|
||||
scorer = RelevanceScorer()
|
||||
|
||||
contexts = [
|
||||
KnowledgeContext(
|
||||
content="Python", source="1", relevance_score=0.8
|
||||
),
|
||||
KnowledgeContext(
|
||||
content="Java", source="2", relevance_score=0.6
|
||||
),
|
||||
KnowledgeContext(
|
||||
content="Go", source="3", relevance_score=0.9
|
||||
),
|
||||
KnowledgeContext(content="Python", source="1", relevance_score=0.8),
|
||||
KnowledgeContext(content="Java", source="2", relevance_score=0.6),
|
||||
KnowledgeContext(content="Go", source="3", relevance_score=0.9),
|
||||
]
|
||||
|
||||
scores = await scorer.score_batch(contexts, "test")
|
||||
@@ -263,7 +256,9 @@ class TestRecencyScorer:
|
||||
)
|
||||
|
||||
conv_score = await scorer.score(conv_context, "query", reference_time=now)
|
||||
knowledge_score = await scorer.score(knowledge_context, "query", reference_time=now)
|
||||
knowledge_score = await scorer.score(
|
||||
knowledge_context, "query", reference_time=now
|
||||
)
|
||||
|
||||
# Conversation should decay much faster
|
||||
assert conv_score < knowledge_score
|
||||
@@ -301,12 +296,8 @@ class TestRecencyScorer:
|
||||
|
||||
contexts = [
|
||||
TaskContext(content="1", source="t", timestamp=now),
|
||||
TaskContext(
|
||||
content="2", source="t", timestamp=now - timedelta(hours=24)
|
||||
),
|
||||
TaskContext(
|
||||
content="3", source="t", timestamp=now - timedelta(hours=48)
|
||||
),
|
||||
TaskContext(content="2", source="t", timestamp=now - timedelta(hours=24)),
|
||||
TaskContext(content="3", source="t", timestamp=now - timedelta(hours=48)),
|
||||
]
|
||||
|
||||
scores = await scorer.score_batch(contexts, "query", reference_time=now)
|
||||
@@ -508,8 +499,12 @@ class TestCompositeScorer:
|
||||
assert scored.priority_score > 0.5 # HIGH priority
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_score_cached_on_context(self) -> None:
|
||||
"""Test that score is cached on the context."""
|
||||
async def test_score_not_cached_on_context(self) -> None:
|
||||
"""Test that scores are NOT cached on the context.
|
||||
|
||||
Scores should not be cached on the context because they are query-dependent.
|
||||
Different queries would get incorrect cached scores if we cached on the context.
|
||||
"""
|
||||
scorer = CompositeScorer()
|
||||
|
||||
context = KnowledgeContext(
|
||||
@@ -518,14 +513,18 @@ class TestCompositeScorer:
|
||||
relevance_score=0.5,
|
||||
)
|
||||
|
||||
# First scoring
|
||||
# After scoring, context._score should remain None
|
||||
# (we don't cache on context because scores are query-dependent)
|
||||
await scorer.score(context, "query")
|
||||
assert context._score is not None
|
||||
# The scorer should compute fresh scores each time
|
||||
# rather than caching on the context object
|
||||
|
||||
# Second scoring should use cached value
|
||||
context._score = 0.999 # Set to a known value
|
||||
score2 = await scorer.score(context, "query")
|
||||
assert score2 == 0.999
|
||||
# Score again with different query - should compute fresh score
|
||||
score1 = await scorer.score(context, "query 1")
|
||||
score2 = await scorer.score(context, "query 2")
|
||||
# Both should be valid scores (not necessarily equal since queries differ)
|
||||
assert 0.0 <= score1 <= 1.0
|
||||
assert 0.0 <= score2 <= 1.0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_score_batch(self) -> None:
|
||||
@@ -555,15 +554,9 @@ class TestCompositeScorer:
|
||||
scorer = CompositeScorer()
|
||||
|
||||
contexts = [
|
||||
KnowledgeContext(
|
||||
content="Low", source="docs", relevance_score=0.2
|
||||
),
|
||||
KnowledgeContext(
|
||||
content="High", source="docs", relevance_score=0.9
|
||||
),
|
||||
KnowledgeContext(
|
||||
content="Medium", source="docs", relevance_score=0.5
|
||||
),
|
||||
KnowledgeContext(content="Low", source="docs", relevance_score=0.2),
|
||||
KnowledgeContext(content="High", source="docs", relevance_score=0.9),
|
||||
KnowledgeContext(content="Medium", source="docs", relevance_score=0.5),
|
||||
]
|
||||
|
||||
ranked = await scorer.rank(contexts, "query")
|
||||
@@ -580,9 +573,7 @@ class TestCompositeScorer:
|
||||
scorer = CompositeScorer()
|
||||
|
||||
contexts = [
|
||||
KnowledgeContext(
|
||||
content=str(i), source="docs", relevance_score=i / 10
|
||||
)
|
||||
KnowledgeContext(content=str(i), source="docs", relevance_score=i / 10)
|
||||
for i in range(10)
|
||||
]
|
||||
|
||||
@@ -595,12 +586,8 @@ class TestCompositeScorer:
|
||||
scorer = CompositeScorer()
|
||||
|
||||
contexts = [
|
||||
KnowledgeContext(
|
||||
content="Low", source="docs", relevance_score=0.1
|
||||
),
|
||||
KnowledgeContext(
|
||||
content="High", source="docs", relevance_score=0.9
|
||||
),
|
||||
KnowledgeContext(content="Low", source="docs", relevance_score=0.1),
|
||||
KnowledgeContext(content="High", source="docs", relevance_score=0.9),
|
||||
]
|
||||
|
||||
ranked = await scorer.rank(contexts, "query", min_score=0.5)
|
||||
@@ -625,7 +612,13 @@ class TestCompositeScorer:
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
scorer = CompositeScorer()
|
||||
# Use scorer with recency_weight=0 to eliminate time-dependent variation
|
||||
# (recency scores change as time passes between calls)
|
||||
scorer = CompositeScorer(
|
||||
relevance_weight=0.5,
|
||||
recency_weight=0.0, # Disable recency to get deterministic results
|
||||
priority_weight=0.5,
|
||||
)
|
||||
|
||||
# Create a single context that will be scored multiple times concurrently
|
||||
context = KnowledgeContext(
|
||||
@@ -639,11 +632,9 @@ class TestCompositeScorer:
|
||||
tasks = [scorer.score(context, "test query") for _ in range(num_concurrent)]
|
||||
scores = await asyncio.gather(*tasks)
|
||||
|
||||
# All scores should be identical (the same context scored the same way)
|
||||
# All scores should be identical (deterministic scoring without recency)
|
||||
assert all(s == scores[0] for s in scores)
|
||||
|
||||
# The context should have its _score cached
|
||||
assert context._score is not None
|
||||
# Note: We don't cache _score on context because scores are query-dependent
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_concurrent_scoring_different_contexts(self) -> None:
|
||||
@@ -671,10 +662,7 @@ class TestCompositeScorer:
|
||||
|
||||
# Each context should have a different score based on its relevance
|
||||
assert len(set(scores)) > 1 # Not all the same
|
||||
|
||||
# All contexts should have cached scores
|
||||
for ctx in contexts:
|
||||
assert ctx._score is not None
|
||||
# Note: We don't cache _score on context because scores are query-dependent
|
||||
|
||||
|
||||
class TestScoredContext:
|
||||
|
||||
Reference in New Issue
Block a user