Files
syndarix/backend/tests/e2e/test_mcp_workflows.py
Felipe Cardoso ad0c06851d feat(tests): add comprehensive E2E tests for MCP and Agent workflows
- Introduced end-to-end tests for MCP workflows, including server discovery, authentication, context engine operations, error handling, and input validation.
- Added full lifecycle tests for agent workflows, covering type management, instance spawning, status transitions, and admin-only operations.
- Enhanced test coverage for real-world MCP and Agent scenarios across PostgreSQL and async environments.
2026-01-05 01:02:41 +01:00

461 lines
16 KiB
Python

"""
MCP and Context Engine E2E Workflow Tests.
Tests complete workflows involving MCP servers and the Context Engine
against real PostgreSQL. These tests verify:
- MCP server listing and tool discovery
- Context engine operations
- Admin-only MCP operations with proper authentication
- Error handling for MCP operations
Usage:
make test-e2e # Run all E2E tests
"""
from uuid import uuid4
import pytest
pytestmark = [
pytest.mark.e2e,
pytest.mark.postgres,
pytest.mark.asyncio,
]
class TestMCPServerDiscovery:
"""Test MCP server listing and discovery workflows."""
async def test_list_mcp_servers(self, e2e_client):
"""Test listing MCP servers returns expected configuration."""
response = await e2e_client.get("/api/v1/mcp/servers")
assert response.status_code == 200, f"Failed: {response.text}"
data = response.json()
# Should have servers configured
assert "servers" in data
assert "total" in data
assert isinstance(data["servers"], list)
# Should have at least llm-gateway and knowledge-base
server_names = [s["name"] for s in data["servers"]]
assert "llm-gateway" in server_names
assert "knowledge-base" in server_names
async def test_list_all_mcp_tools(self, e2e_client):
"""Test listing all tools from all MCP servers."""
response = await e2e_client.get("/api/v1/mcp/tools")
assert response.status_code == 200, f"Failed: {response.text}"
data = response.json()
assert "tools" in data
assert "total" in data
assert isinstance(data["tools"], list)
async def test_mcp_health_check(self, e2e_client):
"""Test MCP health check returns server status."""
response = await e2e_client.get("/api/v1/mcp/health")
assert response.status_code == 200, f"Failed: {response.text}"
data = response.json()
assert "servers" in data
assert "healthy_count" in data
assert "unhealthy_count" in data
assert "total" in data
async def test_list_circuit_breakers(self, e2e_client):
"""Test listing circuit breaker status."""
response = await e2e_client.get("/api/v1/mcp/circuit-breakers")
assert response.status_code == 200, f"Failed: {response.text}"
data = response.json()
assert "circuit_breakers" in data
assert isinstance(data["circuit_breakers"], list)
class TestMCPServerTools:
"""Test MCP server tool listing."""
async def test_list_llm_gateway_tools(self, e2e_client):
"""Test listing tools from LLM Gateway server."""
response = await e2e_client.get("/api/v1/mcp/servers/llm-gateway/tools")
# May return 200 with tools or 404 if server not connected
assert response.status_code in [200, 404, 502]
if response.status_code == 200:
data = response.json()
assert "tools" in data
assert "total" in data
async def test_list_knowledge_base_tools(self, e2e_client):
"""Test listing tools from Knowledge Base server."""
response = await e2e_client.get("/api/v1/mcp/servers/knowledge-base/tools")
# May return 200 with tools or 404/502 if server not connected
assert response.status_code in [200, 404, 502]
if response.status_code == 200:
data = response.json()
assert "tools" in data
assert "total" in data
async def test_invalid_server_returns_404(self, e2e_client):
"""Test that invalid server name returns 404."""
response = await e2e_client.get("/api/v1/mcp/servers/nonexistent-server/tools")
assert response.status_code == 404
class TestContextEngineWorkflows:
"""Test Context Engine operations."""
async def test_context_engine_health(self, e2e_client):
"""Test context engine health endpoint."""
response = await e2e_client.get("/api/v1/context/health")
assert response.status_code == 200, f"Failed: {response.text}"
data = response.json()
assert data["status"] == "healthy"
assert "mcp_connected" in data
assert "cache_enabled" in data
async def test_get_token_budget_claude_sonnet(self, e2e_client):
"""Test getting token budget for Claude 3 Sonnet."""
response = await e2e_client.get("/api/v1/context/budget/claude-3-sonnet")
assert response.status_code == 200, f"Failed: {response.text}"
data = response.json()
assert data["model"] == "claude-3-sonnet"
assert "total_tokens" in data
assert "system_tokens" in data
assert "knowledge_tokens" in data
assert "conversation_tokens" in data
assert "tool_tokens" in data
assert "response_reserve" in data
# Verify budget allocation makes sense
assert data["total_tokens"] > 0
total_allocated = (
data["system_tokens"]
+ data["knowledge_tokens"]
+ data["conversation_tokens"]
+ data["tool_tokens"]
+ data["response_reserve"]
)
assert total_allocated <= data["total_tokens"]
async def test_get_token_budget_with_custom_max(self, e2e_client):
"""Test getting token budget with custom max tokens."""
response = await e2e_client.get(
"/api/v1/context/budget/claude-3-sonnet",
params={"max_tokens": 50000},
)
assert response.status_code == 200, f"Failed: {response.text}"
data = response.json()
assert data["model"] == "claude-3-sonnet"
# Custom max should be respected or capped
assert data["total_tokens"] <= 50000
async def test_count_tokens(self, e2e_client):
"""Test token counting endpoint."""
response = await e2e_client.post(
"/api/v1/context/count-tokens",
json={
"content": "Hello, this is a test message for token counting.",
"model": "claude-3-sonnet",
},
)
assert response.status_code == 200, f"Failed: {response.text}"
data = response.json()
assert "token_count" in data
assert data["token_count"] > 0
assert data["model"] == "claude-3-sonnet"
class TestAdminMCPOperations:
"""Test admin-only MCP operations require authentication."""
async def test_tool_call_requires_auth(self, e2e_client):
"""Test that tool execution requires authentication."""
response = await e2e_client.post(
"/api/v1/mcp/call",
json={
"server": "llm-gateway",
"tool": "count_tokens",
"arguments": {"text": "test"},
},
)
# Should require authentication
assert response.status_code in [401, 403]
async def test_circuit_reset_requires_auth(self, e2e_client):
"""Test that circuit breaker reset requires authentication."""
response = await e2e_client.post(
"/api/v1/mcp/circuit-breakers/llm-gateway/reset"
)
assert response.status_code in [401, 403]
async def test_server_reconnect_requires_auth(self, e2e_client):
"""Test that server reconnect requires authentication."""
response = await e2e_client.post("/api/v1/mcp/servers/llm-gateway/reconnect")
assert response.status_code in [401, 403]
async def test_context_stats_requires_auth(self, e2e_client):
"""Test that context stats requires authentication."""
response = await e2e_client.get("/api/v1/context/stats")
assert response.status_code in [401, 403]
async def test_context_assemble_requires_auth(self, e2e_client):
"""Test that context assembly requires authentication."""
response = await e2e_client.post(
"/api/v1/context/assemble",
json={
"project_id": "test-project",
"agent_id": "test-agent",
"query": "test query",
"model": "claude-3-sonnet",
},
)
assert response.status_code in [401, 403]
async def test_cache_invalidate_requires_auth(self, e2e_client):
"""Test that cache invalidation requires authentication."""
response = await e2e_client.post("/api/v1/context/cache/invalidate")
assert response.status_code in [401, 403]
class TestAdminMCPWithAuthentication:
"""Test admin MCP operations with superuser authentication."""
async def test_superuser_can_get_context_stats(self, e2e_client, e2e_superuser):
"""Test that superuser can get context engine stats."""
response = await e2e_client.get(
"/api/v1/context/stats",
headers={
"Authorization": f"Bearer {e2e_superuser['tokens']['access_token']}"
},
)
assert response.status_code == 200, f"Failed: {response.text}"
data = response.json()
assert "cache" in data
assert "settings" in data
@pytest.mark.skip(
reason="Requires MCP servers (llm-gateway, knowledge-base) to be running"
)
async def test_superuser_can_assemble_context(self, e2e_client, e2e_superuser):
"""Test that superuser can assemble context."""
response = await e2e_client.post(
"/api/v1/context/assemble",
headers={
"Authorization": f"Bearer {e2e_superuser['tokens']['access_token']}"
},
json={
"project_id": f"test-project-{uuid4().hex[:8]}",
"agent_id": f"test-agent-{uuid4().hex[:8]}",
"query": "What is the status of the project?",
"model": "claude-3-sonnet",
"system_prompt": "You are a helpful assistant.",
"compress": True,
"use_cache": False,
},
)
assert response.status_code == 200, f"Failed: {response.text}"
data = response.json()
assert "content" in data
assert "total_tokens" in data
assert "context_count" in data
assert "budget_used_percent" in data
assert "metadata" in data
async def test_superuser_can_invalidate_cache(self, e2e_client, e2e_superuser):
"""Test that superuser can invalidate cache."""
response = await e2e_client.post(
"/api/v1/context/cache/invalidate",
headers={
"Authorization": f"Bearer {e2e_superuser['tokens']['access_token']}"
},
params={"project_id": "test-project"},
)
assert response.status_code == 204
async def test_regular_user_cannot_access_admin_operations(self, e2e_client):
"""Test that regular (non-superuser) cannot access admin operations."""
email = f"regular-{uuid4().hex[:8]}@example.com"
password = "RegularUser123!"
# Register regular user
await e2e_client.post(
"/api/v1/auth/register",
json={
"email": email,
"password": password,
"first_name": "Regular",
"last_name": "User",
},
)
# Login
login_resp = await e2e_client.post(
"/api/v1/auth/login",
json={"email": email, "password": password},
)
tokens = login_resp.json()
# Try to access admin endpoint
response = await e2e_client.get(
"/api/v1/context/stats",
headers={"Authorization": f"Bearer {tokens['access_token']}"},
)
# Should be forbidden for non-superuser
assert response.status_code == 403
class TestMCPInputValidation:
"""Test input validation for MCP endpoints."""
async def test_server_name_max_length(self, e2e_client):
"""Test that server name has max length validation."""
long_name = "a" * 100 # Exceeds 64 char limit
response = await e2e_client.get(f"/api/v1/mcp/servers/{long_name}/tools")
assert response.status_code == 422
async def test_server_name_invalid_characters(self, e2e_client):
"""Test that server name rejects invalid characters."""
invalid_name = "server@name!invalid"
response = await e2e_client.get(f"/api/v1/mcp/servers/{invalid_name}/tools")
assert response.status_code == 422
async def test_token_count_empty_content(self, e2e_client):
"""Test token counting with empty content."""
response = await e2e_client.post(
"/api/v1/context/count-tokens",
json={"content": ""},
)
# Empty content is valid, should return 0 tokens
if response.status_code == 200:
data = response.json()
assert data["token_count"] == 0
else:
# Or it might be rejected as invalid
assert response.status_code == 422
class TestMCPWorkflowIntegration:
"""Test complete MCP workflows end-to-end."""
async def test_discovery_to_budget_workflow(self, e2e_client):
"""Test complete workflow: discover servers -> check budget -> ready for use."""
# 1. Discover available servers
servers_resp = await e2e_client.get("/api/v1/mcp/servers")
assert servers_resp.status_code == 200
servers = servers_resp.json()["servers"]
assert len(servers) > 0
# 2. Check context engine health
health_resp = await e2e_client.get("/api/v1/context/health")
assert health_resp.status_code == 200
health = health_resp.json()
assert health["status"] == "healthy"
# 3. Get token budget for a model
budget_resp = await e2e_client.get("/api/v1/context/budget/claude-3-sonnet")
assert budget_resp.status_code == 200
budget = budget_resp.json()
# 4. Verify system is ready for context assembly
assert budget["total_tokens"] > 0
assert health["mcp_connected"] is True
@pytest.mark.skip(
reason="Requires MCP servers (llm-gateway, knowledge-base) to be running"
)
async def test_full_context_assembly_workflow(self, e2e_client, e2e_superuser):
"""Test complete context assembly workflow with superuser."""
project_id = f"e2e-project-{uuid4().hex[:8]}"
agent_id = f"e2e-agent-{uuid4().hex[:8]}"
# 1. Check budget before assembly
budget_resp = await e2e_client.get("/api/v1/context/budget/claude-3-sonnet")
assert budget_resp.status_code == 200
_ = budget_resp.json() # Verify valid response
# 2. Count tokens in sample content
count_resp = await e2e_client.post(
"/api/v1/context/count-tokens",
json={"content": "This is a test message for context assembly."},
)
assert count_resp.status_code == 200
token_count = count_resp.json()["token_count"]
assert token_count > 0
# 3. Assemble context
assemble_resp = await e2e_client.post(
"/api/v1/context/assemble",
headers={
"Authorization": f"Bearer {e2e_superuser['tokens']['access_token']}"
},
json={
"project_id": project_id,
"agent_id": agent_id,
"query": "Summarize the current project status",
"model": "claude-3-sonnet",
"system_prompt": "You are a project management assistant.",
"task_description": "Generate a status report",
"conversation_history": [
{"role": "user", "content": "What's the project status?"},
{
"role": "assistant",
"content": "Let me check the current status.",
},
],
"compress": True,
"use_cache": False,
},
)
assert assemble_resp.status_code == 200
assembled = assemble_resp.json()
# 4. Verify assembly results
assert assembled["total_tokens"] > 0
assert assembled["context_count"] > 0
assert assembled["budget_used_percent"] > 0
assert assembled["budget_used_percent"] <= 100
# 5. Get stats to verify the operation was recorded
stats_resp = await e2e_client.get(
"/api/v1/context/stats",
headers={
"Authorization": f"Bearer {e2e_superuser['tokens']['access_token']}"
},
)
assert stats_resp.status_code == 200