fix(llm-gateway): improve type safety and datetime consistency
- Add type annotations for mypy compliance - Use UTC-aware datetimes consistently (datetime.now(UTC)) - Add type: ignore comments for LiteLLM incomplete stubs - Fix import ordering and formatting - Update pyproject.toml mypy configuration 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -13,6 +13,7 @@ Per ADR-004: LLM Provider Abstraction.
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from collections.abc import AsyncIterator
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any
|
||||
|
||||
@@ -53,7 +54,7 @@ mcp = FastMCP("syndarix-llm-gateway")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_app: FastAPI):
|
||||
async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
|
||||
"""Application lifespan handler."""
|
||||
settings = get_settings()
|
||||
logger.info(f"Starting LLM Gateway on {settings.host}:{settings.port}")
|
||||
@@ -66,6 +67,7 @@ async def lifespan(_app: FastAPI):
|
||||
|
||||
# Cleanup
|
||||
from cost_tracking import close_cost_tracker
|
||||
|
||||
await close_cost_tracker()
|
||||
logger.info("LLM Gateway shutdown complete")
|
||||
|
||||
@@ -326,7 +328,7 @@ async def _impl_chat_completion(
|
||||
# Non-streaming completion
|
||||
response = await provider.router.acompletion(
|
||||
model=model_name,
|
||||
messages=messages,
|
||||
messages=messages, # type: ignore[arg-type]
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
)
|
||||
@@ -335,12 +337,12 @@ async def _impl_chat_completion(
|
||||
await circuit.record_success()
|
||||
|
||||
# Extract response data
|
||||
content = response.choices[0].message.content or ""
|
||||
content = response.choices[0].message.content or "" # type: ignore[union-attr]
|
||||
finish_reason = response.choices[0].finish_reason or "stop"
|
||||
|
||||
# Get usage stats
|
||||
prompt_tokens = response.usage.prompt_tokens if response.usage else 0
|
||||
completion_tokens = response.usage.completion_tokens if response.usage else 0
|
||||
prompt_tokens = response.usage.prompt_tokens if response.usage else 0 # type: ignore[attr-defined]
|
||||
completion_tokens = response.usage.completion_tokens if response.usage else 0 # type: ignore[attr-defined]
|
||||
|
||||
# Calculate cost
|
||||
cost_usd = calculate_cost(model_name, prompt_tokens, completion_tokens)
|
||||
@@ -445,17 +447,19 @@ async def _impl_list_models(
|
||||
all_models: list[dict[str, Any]] = []
|
||||
available_models = provider.get_available_models()
|
||||
for name, config in MODEL_CONFIGS.items():
|
||||
all_models.append({
|
||||
"name": name,
|
||||
"provider": config.provider.value,
|
||||
"available": name in available_models,
|
||||
"cost_per_1m_input": config.cost_per_1m_input,
|
||||
"cost_per_1m_output": config.cost_per_1m_output,
|
||||
"context_window": config.context_window,
|
||||
"max_output_tokens": config.max_output_tokens,
|
||||
"supports_vision": config.supports_vision,
|
||||
"supports_streaming": config.supports_streaming,
|
||||
})
|
||||
all_models.append(
|
||||
{
|
||||
"name": name,
|
||||
"provider": config.provider.value,
|
||||
"available": name in available_models,
|
||||
"cost_per_1m_input": config.cost_per_1m_input,
|
||||
"cost_per_1m_output": config.cost_per_1m_output,
|
||||
"context_window": config.context_window,
|
||||
"max_output_tokens": config.max_output_tokens,
|
||||
"supports_vision": config.supports_vision,
|
||||
"supports_streaming": config.supports_streaming,
|
||||
}
|
||||
)
|
||||
result["models"] = all_models
|
||||
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user