feat(context): enhance performance, caching, and settings management

- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings). - Optimize parallel execution in token counting, scoring, and reranking for source diversity. - Improve caching logic: - Add per-context locks for safe parallel scoring. - Reuse precomputed fingerprints for cache efficiency. - Make truncation, scoring, and ranker behaviors fully configurable via settings. - Add support for middle truncation, context hash-based hashing, and dynamic token limiting. - Refactor methods for scalability and better error handling. Tests: Updated all affected components with additional test cases.
2026-01-04 12:37:58 +01:00
parent 6c7b72f130
commit 96e6400bd8
8 changed files with 256 additions and 86 deletions
--- a/backend/app/services/context/budget/calculator.py
+++ b/backend/app/services/context/budget/calculator.py
@@ -237,7 +237,7 @@ class TokenCalculator:
        """
        Count tokens for multiple texts.

-        Efficient batch counting with caching.
+        Efficient batch counting with caching and parallel execution.

        Args:
            texts: List of texts to count
@@ -246,13 +246,14 @@ class TokenCalculator:
        Returns:
            List of token counts (same order as input)
        """
-        results: list[int] = []
+        import asyncio

-        for text in texts:
-            count = await self.count_tokens(text, model)
-            results.append(count)
+        if not texts:
+            return []

-        return results
+        # Execute all token counts in parallel for better performance
+        tasks = [self.count_tokens(text, model) for text in texts]
+        return await asyncio.gather(*tasks)

    def clear_cache(self) -> None:
        """Clear the token count cache."""