feat(context): enhance performance, caching, and settings management

- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings).
- Optimize parallel execution in token counting, scoring, and reranking for source diversity.
- Improve caching logic:
  - Add per-context locks for safe parallel scoring.
  - Reuse precomputed fingerprints for cache efficiency.
- Make truncation, scoring, and ranker behaviors fully configurable via settings.
- Add support for middle truncation, context hash-based hashing, and dynamic token limiting.
- Refactor methods for scalability and better error handling.

Tests: Updated all affected components with additional test cases.
This commit is contained in:
2026-01-04 12:37:58 +01:00
parent 6c7b72f130
commit 96e6400bd8
8 changed files with 256 additions and 86 deletions

View File

@@ -104,9 +104,21 @@ class ContextSettings(BaseSettings):
le=1.0,
description="Compress when budget usage exceeds this percentage",
)
truncation_suffix: str = Field(
default="... [truncated]",
description="Suffix to add when truncating content",
truncation_marker: str = Field(
default="\n\n[...content truncated...]\n\n",
description="Marker text to insert where content was truncated",
)
truncation_preserve_ratio: float = Field(
default=0.7,
ge=0.1,
le=0.9,
description="Ratio of content to preserve from start in middle truncation (0.7 = 70% start, 30% end)",
)
truncation_min_content_length: int = Field(
default=100,
ge=10,
le=1000,
description="Minimum content length in characters before truncation applies",
)
summary_model_group: str = Field(
default="fast",
@@ -128,6 +140,12 @@ class ContextSettings(BaseSettings):
default="ctx",
description="Redis key prefix for context cache",
)
cache_memory_max_items: int = Field(
default=1000,
ge=100,
le=100000,
description="Maximum items in memory fallback cache when Redis unavailable",
)
# Performance settings
max_assembly_time_ms: int = Field(
@@ -165,6 +183,28 @@ class ContextSettings(BaseSettings):
description="Minimum relevance score for knowledge",
)
# Relevance scoring settings
relevance_keyword_fallback_weight: float = Field(
default=0.5,
ge=0.0,
le=1.0,
description="Maximum score for keyword-based fallback scoring (when semantic unavailable)",
)
relevance_semantic_max_chars: int = Field(
default=2000,
ge=100,
le=10000,
description="Maximum content length in chars for semantic similarity computation",
)
# Diversity/ranking settings
diversity_max_per_source: int = Field(
default=3,
ge=1,
le=20,
description="Maximum contexts from the same source in diversity reranking",
)
# Conversation history settings
conversation_max_turns: int = Field(
default=20,
@@ -253,11 +293,15 @@ class ContextSettings(BaseSettings):
"compression": {
"threshold": self.compression_threshold,
"summary_model_group": self.summary_model_group,
"truncation_marker": self.truncation_marker,
"truncation_preserve_ratio": self.truncation_preserve_ratio,
"truncation_min_content_length": self.truncation_min_content_length,
},
"cache": {
"enabled": self.cache_enabled,
"ttl_seconds": self.cache_ttl_seconds,
"prefix": self.cache_prefix,
"memory_max_items": self.cache_memory_max_items,
},
"performance": {
"max_assembly_time_ms": self.max_assembly_time_ms,
@@ -269,6 +313,13 @@ class ContextSettings(BaseSettings):
"max_results": self.knowledge_max_results,
"min_score": self.knowledge_min_score,
},
"relevance": {
"keyword_fallback_weight": self.relevance_keyword_fallback_weight,
"semantic_max_chars": self.relevance_semantic_max_chars,
},
"diversity": {
"max_per_source": self.diversity_max_per_source,
},
"conversation": {
"max_turns": self.conversation_max_turns,
"recent_priority": self.conversation_recent_priority,