fix(llm-gateway): improve type safety and datetime consistency

- Add type annotations for mypy compliance
- Use UTC-aware datetimes consistently (datetime.now(UTC))
- Add type: ignore comments for LiteLLM incomplete stubs
- Fix import ordering and formatting
- Update pyproject.toml mypy configuration

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-03 20:56:05 +01:00
parent 6e8b0b022a
commit f482559e15
15 changed files with 111 additions and 105 deletions

View File

@@ -13,6 +13,7 @@ Per ADR-004: LLM Provider Abstraction.
import logging
import uuid
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
from typing import Any
@@ -53,7 +54,7 @@ mcp = FastMCP("syndarix-llm-gateway")
@asynccontextmanager
async def lifespan(_app: FastAPI):
async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
"""Application lifespan handler."""
settings = get_settings()
logger.info(f"Starting LLM Gateway on {settings.host}:{settings.port}")
@@ -66,6 +67,7 @@ async def lifespan(_app: FastAPI):
# Cleanup
from cost_tracking import close_cost_tracker
await close_cost_tracker()
logger.info("LLM Gateway shutdown complete")
@@ -326,7 +328,7 @@ async def _impl_chat_completion(
# Non-streaming completion
response = await provider.router.acompletion(
model=model_name,
messages=messages,
messages=messages, # type: ignore[arg-type]
max_tokens=max_tokens,
temperature=temperature,
)
@@ -335,12 +337,12 @@ async def _impl_chat_completion(
await circuit.record_success()
# Extract response data
content = response.choices[0].message.content or ""
content = response.choices[0].message.content or "" # type: ignore[union-attr]
finish_reason = response.choices[0].finish_reason or "stop"
# Get usage stats
prompt_tokens = response.usage.prompt_tokens if response.usage else 0
completion_tokens = response.usage.completion_tokens if response.usage else 0
prompt_tokens = response.usage.prompt_tokens if response.usage else 0 # type: ignore[attr-defined]
completion_tokens = response.usage.completion_tokens if response.usage else 0 # type: ignore[attr-defined]
# Calculate cost
cost_usd = calculate_cost(model_name, prompt_tokens, completion_tokens)
@@ -445,17 +447,19 @@ async def _impl_list_models(
all_models: list[dict[str, Any]] = []
available_models = provider.get_available_models()
for name, config in MODEL_CONFIGS.items():
all_models.append({
"name": name,
"provider": config.provider.value,
"available": name in available_models,
"cost_per_1m_input": config.cost_per_1m_input,
"cost_per_1m_output": config.cost_per_1m_output,
"context_window": config.context_window,
"max_output_tokens": config.max_output_tokens,
"supports_vision": config.supports_vision,
"supports_streaming": config.supports_streaming,
})
all_models.append(
{
"name": name,
"provider": config.provider.value,
"available": name in available_models,
"cost_per_1m_input": config.cost_per_1m_input,
"cost_per_1m_output": config.cost_per_1m_output,
"context_window": config.context_window,
"max_output_tokens": config.max_output_tokens,
"supports_vision": config.supports_vision,
"supports_streaming": config.supports_streaming,
}
)
result["models"] = all_models
return result