feat(context): enhance performance, caching, and settings management

- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings). - Optimize parallel execution in token counting, scoring, and reranking for source diversity. - Improve caching logic: - Add per-context locks for safe parallel scoring. - Reuse precomputed fingerprints for cache efficiency. - Make truncation, scoring, and ranker behaviors fully configurable via settings. - Add support for middle truncation, context hash-based hashing, and dynamic token limiting. - Refactor methods for scalability and better error handling. Tests: Updated all affected components with additional test cases.
2026-01-04 12:37:58 +01:00
parent 6c7b72f130
commit 96e6400bd8
8 changed files with 256 additions and 86 deletions
--- a/backend/app/services/context/config.py
+++ b/backend/app/services/context/config.py
@@ -104,9 +104,21 @@ class ContextSettings(BaseSettings):
        le=1.0,
        description="Compress when budget usage exceeds this percentage",
    )
-    truncation_suffix: str = Field(
-        default="... [truncated]",
-        description="Suffix to add when truncating content",
+    truncation_marker: str = Field(
+        default="\n\n[...content truncated...]\n\n",
+        description="Marker text to insert where content was truncated",
+    )
+    truncation_preserve_ratio: float = Field(
+        default=0.7,
+        ge=0.1,
+        le=0.9,
+        description="Ratio of content to preserve from start in middle truncation (0.7 = 70% start, 30% end)",
+    )
+    truncation_min_content_length: int = Field(
+        default=100,
+        ge=10,
+        le=1000,
+        description="Minimum content length in characters before truncation applies",
    )
    summary_model_group: str = Field(
        default="fast",
@@ -128,6 +140,12 @@ class ContextSettings(BaseSettings):
        default="ctx",
        description="Redis key prefix for context cache",
    )
+    cache_memory_max_items: int = Field(
+        default=1000,
+        ge=100,
+        le=100000,
+        description="Maximum items in memory fallback cache when Redis unavailable",
+    )

    # Performance settings
    max_assembly_time_ms: int = Field(
@@ -165,6 +183,28 @@ class ContextSettings(BaseSettings):
        description="Minimum relevance score for knowledge",
    )

+    # Relevance scoring settings
+    relevance_keyword_fallback_weight: float = Field(
+        default=0.5,
+        ge=0.0,
+        le=1.0,
+        description="Maximum score for keyword-based fallback scoring (when semantic unavailable)",
+    )
+    relevance_semantic_max_chars: int = Field(
+        default=2000,
+        ge=100,
+        le=10000,
+        description="Maximum content length in chars for semantic similarity computation",
+    )
+
+    # Diversity/ranking settings
+    diversity_max_per_source: int = Field(
+        default=3,
+        ge=1,
+        le=20,
+        description="Maximum contexts from the same source in diversity reranking",
+    )
+
    # Conversation history settings
    conversation_max_turns: int = Field(
        default=20,
@@ -253,11 +293,15 @@ class ContextSettings(BaseSettings):
            "compression": {
                "threshold": self.compression_threshold,
                "summary_model_group": self.summary_model_group,
+                "truncation_marker": self.truncation_marker,
+                "truncation_preserve_ratio": self.truncation_preserve_ratio,
+                "truncation_min_content_length": self.truncation_min_content_length,
            },
            "cache": {
                "enabled": self.cache_enabled,
                "ttl_seconds": self.cache_ttl_seconds,
                "prefix": self.cache_prefix,
+                "memory_max_items": self.cache_memory_max_items,
            },
            "performance": {
                "max_assembly_time_ms": self.max_assembly_time_ms,
@@ -269,6 +313,13 @@ class ContextSettings(BaseSettings):
                "max_results": self.knowledge_max_results,
                "min_score": self.knowledge_min_score,
            },
+            "relevance": {
+                "keyword_fallback_weight": self.relevance_keyword_fallback_weight,
+                "semantic_max_chars": self.relevance_semantic_max_chars,
+            },
+            "diversity": {
+                "max_per_source": self.diversity_max_per_source,
+            },
            "conversation": {
                "max_turns": self.conversation_max_turns,
                "recent_priority": self.conversation_recent_priority,