forked from cardosofelipe/fast-next-template
feat(context): enhance performance, caching, and settings management
- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings). - Optimize parallel execution in token counting, scoring, and reranking for source diversity. - Improve caching logic: - Add per-context locks for safe parallel scoring. - Reuse precomputed fingerprints for cache efficiency. - Make truncation, scoring, and ranker behaviors fully configurable via settings. - Add support for middle truncation, context hash-based hashing, and dynamic token limiting. - Refactor methods for scalability and better error handling. Tests: Updated all affected components with additional test cases.
This commit is contained in:
@@ -54,7 +54,7 @@ class ContextCache:
|
||||
|
||||
# In-memory fallback cache when Redis unavailable
|
||||
self._memory_cache: dict[str, tuple[str, float]] = {}
|
||||
self._max_memory_items = 1000
|
||||
self._max_memory_items = self._settings.cache_memory_max_items
|
||||
|
||||
def set_redis(self, redis: "Redis") -> None:
|
||||
"""Set Redis connection."""
|
||||
@@ -100,7 +100,7 @@ class ContextCache:
|
||||
Compute a fingerprint for a context assembly request.
|
||||
|
||||
The fingerprint is based on:
|
||||
- Context content and metadata
|
||||
- Context content hash and metadata (not full content for performance)
|
||||
- Query string
|
||||
- Target model
|
||||
|
||||
@@ -112,12 +112,13 @@ class ContextCache:
|
||||
Returns:
|
||||
32-character hex fingerprint
|
||||
"""
|
||||
# Build a deterministic representation
|
||||
# Build a deterministic representation using content hashes for performance
|
||||
# This avoids JSON serializing potentially large content strings
|
||||
context_data = []
|
||||
for ctx in contexts:
|
||||
context_data.append({
|
||||
"type": ctx.get_type().value,
|
||||
"content": ctx.content,
|
||||
"content_hash": self._hash_content(ctx.content), # Hash instead of full content
|
||||
"source": ctx.source,
|
||||
"priority": ctx.priority, # Already an int
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user