fix(llm-gateway): improve type safety and datetime consistency

- Add type annotations for mypy compliance - Use UTC-aware datetimes consistently (datetime.now(UTC)) - Add type: ignore comments for LiteLLM incomplete stubs - Fix import ordering and formatting - Update pyproject.toml mypy configuration 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-03 20:56:05 +01:00
parent 6e8b0b022a
commit f482559e15
15 changed files with 111 additions and 105 deletions
--- a/mcp-servers/llm-gateway/server.py
+++ b/mcp-servers/llm-gateway/server.py
@@ -13,6 +13,7 @@ Per ADR-004: LLM Provider Abstraction.

 import logging
 import uuid
+from collections.abc import AsyncIterator
 from contextlib import asynccontextmanager
 from typing import Any

@@ -53,7 +54,7 @@ mcp = FastMCP("syndarix-llm-gateway")


@asynccontextmanager
-async def lifespan(_app: FastAPI):
+async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
    """Application lifespan handler."""
    settings = get_settings()
    logger.info(f"Starting LLM Gateway on {settings.host}:{settings.port}")
@@ -66,6 +67,7 @@ async def lifespan(_app: FastAPI):

    # Cleanup
    from cost_tracking import close_cost_tracker
+
    await close_cost_tracker()
    logger.info("LLM Gateway shutdown complete")

@@ -326,7 +328,7 @@ async def _impl_chat_completion(
        # Non-streaming completion
        response = await provider.router.acompletion(
            model=model_name,
-            messages=messages,
+            messages=messages,  # type: ignore[arg-type]
            max_tokens=max_tokens,
            temperature=temperature,
        )
@@ -335,12 +337,12 @@ async def _impl_chat_completion(
        await circuit.record_success()

        # Extract response data
-        content = response.choices[0].message.content or ""
+        content = response.choices[0].message.content or ""  # type: ignore[union-attr]
        finish_reason = response.choices[0].finish_reason or "stop"

        # Get usage stats
-        prompt_tokens = response.usage.prompt_tokens if response.usage else 0
-        completion_tokens = response.usage.completion_tokens if response.usage else 0
+        prompt_tokens = response.usage.prompt_tokens if response.usage else 0  # type: ignore[attr-defined]
+        completion_tokens = response.usage.completion_tokens if response.usage else 0  # type: ignore[attr-defined]

        # Calculate cost
        cost_usd = calculate_cost(model_name, prompt_tokens, completion_tokens)
@@ -445,17 +447,19 @@ async def _impl_list_models(
        all_models: list[dict[str, Any]] = []
        available_models = provider.get_available_models()
        for name, config in MODEL_CONFIGS.items():
-            all_models.append({
-                "name": name,
-                "provider": config.provider.value,
-                "available": name in available_models,
-                "cost_per_1m_input": config.cost_per_1m_input,
-                "cost_per_1m_output": config.cost_per_1m_output,
-                "context_window": config.context_window,
-                "max_output_tokens": config.max_output_tokens,
-                "supports_vision": config.supports_vision,
-                "supports_streaming": config.supports_streaming,
-            })
+            all_models.append(
+                {
+                    "name": name,
+                    "provider": config.provider.value,
+                    "available": name in available_models,
+                    "cost_per_1m_input": config.cost_per_1m_input,
+                    "cost_per_1m_output": config.cost_per_1m_output,
+                    "context_window": config.context_window,
+                    "max_output_tokens": config.max_output_tokens,
+                    "supports_vision": config.supports_vision,
+                    "supports_streaming": config.supports_streaming,
+                }
+            )
        result["models"] = all_models

    return result