diff --git a/backend/docs/BENCHMARKS.md b/backend/docs/BENCHMARKS.md index c418dcc..22ebff3 100644 --- a/backend/docs/BENCHMARKS.md +++ b/backend/docs/BENCHMARKS.md @@ -72,8 +72,8 @@ The test suite includes two categories of performance tests: | Type | How it works | Examples | |------|-------------|----------| -| **pytest-benchmark tests** | Uses the `benchmark` fixture for precise, multi-round timing | `test_health_endpoint_performance`, `test_openapi_schema_performance` | -| **Manual latency tests** | Uses `time.perf_counter` with explicit thresholds (for async endpoints that pytest-benchmark doesn't support natively) | `test_login_latency`, `test_get_current_user_latency` | +| **pytest-benchmark tests** | Uses the `benchmark` fixture for precise, multi-round timing | `test_health_endpoint_performance`, `test_openapi_schema_performance`, `test_password_hashing_performance`, `test_password_verification_performance`, `test_access_token_creation_performance`, `test_refresh_token_creation_performance`, `test_token_decode_performance` | +| **Manual latency tests** | Uses `time.perf_counter` with explicit thresholds (for async endpoints that pytest-benchmark doesn't support natively) | `test_login_latency`, `test_get_current_user_latency`, `test_register_latency`, `test_token_refresh_latency`, `test_sessions_list_latency`, `test_user_profile_update_latency` | ### 3. Regression detection @@ -130,12 +130,21 @@ This is a **relative ranking within the current run** — red does NOT mean the For this project's current endpoints: -| Endpoint | Expected range | Why | -|----------|---------------|-----| +| Test | Expected range | Why | +|------|---------------|-----| | `GET /health` | ~1–1.5ms | Minimal logic, mocked DB check | | `GET /api/v1/openapi.json` | ~1.5–2.5ms | Serializes entire API schema | -| `POST /api/v1/auth/login` | < 500ms threshold | Includes bcrypt password hashing | +| `get_password_hash` | ~200ms | CPU-bound bcrypt hashing | +| `verify_password` | ~200ms | CPU-bound bcrypt verification | +| `create_access_token` | ~17–20µs | JWT encoding with HMAC-SHA256 | +| `create_refresh_token` | ~17–20µs | JWT encoding with HMAC-SHA256 | +| `decode_token` | ~20–25µs | JWT decoding and claim validation | +| `POST /api/v1/auth/login` | < 500ms threshold | Includes bcrypt password verification | +| `POST /api/v1/auth/register` | < 500ms threshold | Includes bcrypt password hashing | +| `POST /api/v1/auth/refresh` | < 200ms threshold | Token rotation + DB session update | | `GET /api/v1/users/me` | < 200ms threshold | DB lookup + token validation | +| `GET /api/v1/sessions/me` | < 200ms threshold | Session list query + token validation | +| `PATCH /api/v1/users/me` | < 200ms threshold | DB update + token validation | --- @@ -297,6 +306,6 @@ If StdDev is high relative to the Mean, results may be unreliable. Common causes Try running benchmarks on an idle system or increasing `min_rounds` in `pyproject.toml`. -### Only 2 of 4 tests run +### Only 7 of 13 tests run -The async tests (`test_login_latency`, `test_get_current_user_latency`) are skipped during `--benchmark-only` runs because they don't use the `benchmark` fixture. They run as part of the normal test suite (`make test`) with manual threshold assertions. +The async tests (`test_login_latency`, `test_get_current_user_latency`, `test_register_latency`, `test_token_refresh_latency`, `test_sessions_list_latency`, `test_user_profile_update_latency`) are skipped during `--benchmark-only` runs because they don't use the `benchmark` fixture. They run as part of the normal test suite (`make test`) with manual threshold assertions. diff --git a/backend/tests/benchmarks/test_endpoint_performance.py b/backend/tests/benchmarks/test_endpoint_performance.py index 82b503d..4d0485f 100644 --- a/backend/tests/benchmarks/test_endpoint_performance.py +++ b/backend/tests/benchmarks/test_endpoint_performance.py @@ -2,7 +2,7 @@ Performance Benchmark Tests. These tests establish baseline performance metrics for critical API endpoints -and detect regressions when response times degrade significantly. +and core operations, detecting regressions when response times degrade. Usage: make benchmark # Run benchmarks and save baseline @@ -20,10 +20,21 @@ import pytest import pytest_asyncio from fastapi.testclient import TestClient +from app.core.auth import ( + create_access_token, + create_refresh_token, + decode_token, + get_password_hash, + verify_password, +) from app.main import app pytestmark = [pytest.mark.benchmark] +# Pre-computed hash for sync benchmarks (avoids hashing in every iteration) +_BENCH_PASSWORD = "BenchPass123!" +_BENCH_HASH = get_password_hash(_BENCH_PASSWORD) + # ============================================================================= # Fixtures @@ -55,6 +66,50 @@ def test_openapi_schema_performance(sync_client, benchmark): assert result.status_code == 200 +# ============================================================================= +# Core Crypto & Token Benchmarks (no DB required) +# +# These benchmark the CPU-intensive operations that underpin auth: +# password hashing, verification, and JWT creation/decoding. +# ============================================================================= + + +def test_password_hashing_performance(benchmark): + """Benchmark: bcrypt password hashing (CPU-bound, ~100ms expected).""" + result = benchmark(get_password_hash, _BENCH_PASSWORD) + assert result.startswith("$2b$") + + +def test_password_verification_performance(benchmark): + """Benchmark: bcrypt password verification against a known hash.""" + result = benchmark(verify_password, _BENCH_PASSWORD, _BENCH_HASH) + assert result is True + + +def test_access_token_creation_performance(benchmark): + """Benchmark: JWT access token generation.""" + user_id = str(uuid.uuid4()) + token = benchmark(create_access_token, user_id) + assert isinstance(token, str) + assert len(token) > 0 + + +def test_refresh_token_creation_performance(benchmark): + """Benchmark: JWT refresh token generation.""" + user_id = str(uuid.uuid4()) + token = benchmark(create_refresh_token, user_id) + assert isinstance(token, str) + assert len(token) > 0 + + +def test_token_decode_performance(benchmark): + """Benchmark: JWT token decoding and validation.""" + user_id = str(uuid.uuid4()) + token = create_access_token(user_id) + payload = benchmark(decode_token, token, "access") + assert payload.sub == user_id + + # ============================================================================= # Database-dependent Endpoint Benchmarks (async, manual timing) # @@ -65,12 +120,15 @@ def test_openapi_schema_performance(sync_client, benchmark): MAX_LOGIN_MS = 500 MAX_GET_USER_MS = 200 +MAX_REGISTER_MS = 500 +MAX_TOKEN_REFRESH_MS = 200 +MAX_SESSIONS_LIST_MS = 200 +MAX_USER_UPDATE_MS = 200 @pytest_asyncio.fixture async def bench_user(async_test_db): """Create a test user for benchmark tests.""" - from app.core.auth import get_password_hash from app.models.user import User _test_engine, AsyncTestingSessionLocal = async_test_db @@ -102,6 +160,17 @@ async def bench_token(client, bench_user): return response.json()["access_token"] +@pytest_asyncio.fixture +async def bench_refresh_token(client, bench_user): + """Get a refresh token for the benchmark user.""" + response = await client.post( + "/api/v1/auth/login", + json={"email": "bench@example.com", "password": "BenchPass123!"}, + ) + assert response.status_code == 200, f"Login failed: {response.text}" + return response.json()["refresh_token"] + + @pytest.mark.asyncio async def test_login_latency(client, bench_user): """Performance: POST /api/v1/auth/login must respond under threshold.""" @@ -148,3 +217,111 @@ async def test_get_current_user_latency(client, bench_token): assert mean_ms < MAX_GET_USER_MS, ( f"Get user latency regression: {mean_ms:.1f}ms exceeds {MAX_GET_USER_MS}ms threshold" ) + + +@pytest.mark.asyncio +async def test_register_latency(client): + """Performance: POST /api/v1/auth/register must respond under threshold.""" + iterations = 3 + total_ms = 0.0 + + for i in range(iterations): + start = time.perf_counter() + response = await client.post( + "/api/v1/auth/register", + json={ + "email": f"benchreg{i}@example.com", + "password": "BenchRegPass123!", + "first_name": "Bench", + "last_name": "Register", + }, + ) + elapsed_ms = (time.perf_counter() - start) * 1000 + total_ms += elapsed_ms + assert response.status_code == 201, f"Register failed: {response.text}" + + mean_ms = total_ms / iterations + print( + f"\n Register mean latency: {mean_ms:.1f}ms (threshold: {MAX_REGISTER_MS}ms)" + ) + assert mean_ms < MAX_REGISTER_MS, ( + f"Register latency regression: {mean_ms:.1f}ms exceeds {MAX_REGISTER_MS}ms threshold" + ) + + +@pytest.mark.asyncio +async def test_token_refresh_latency(client, bench_refresh_token): + """Performance: POST /api/v1/auth/refresh must respond under threshold.""" + iterations = 5 + total_ms = 0.0 + + for _ in range(iterations): + start = time.perf_counter() + response = await client.post( + "/api/v1/auth/refresh", + json={"refresh_token": bench_refresh_token}, + ) + elapsed_ms = (time.perf_counter() - start) * 1000 + total_ms += elapsed_ms + assert response.status_code == 200, f"Refresh failed: {response.text}" + # Use the new refresh token for the next iteration + bench_refresh_token = response.json()["refresh_token"] + + mean_ms = total_ms / iterations + print( + f"\n Token refresh mean latency: {mean_ms:.1f}ms (threshold: {MAX_TOKEN_REFRESH_MS}ms)" + ) + assert mean_ms < MAX_TOKEN_REFRESH_MS, ( + f"Token refresh latency regression: {mean_ms:.1f}ms exceeds {MAX_TOKEN_REFRESH_MS}ms threshold" + ) + + +@pytest.mark.asyncio +async def test_sessions_list_latency(client, bench_token): + """Performance: GET /api/v1/sessions must respond under threshold.""" + iterations = 10 + total_ms = 0.0 + + for _ in range(iterations): + start = time.perf_counter() + response = await client.get( + "/api/v1/sessions/me", + headers={"Authorization": f"Bearer {bench_token}"}, + ) + elapsed_ms = (time.perf_counter() - start) * 1000 + total_ms += elapsed_ms + assert response.status_code == 200 + + mean_ms = total_ms / iterations + print( + f"\n Sessions list mean latency: {mean_ms:.1f}ms (threshold: {MAX_SESSIONS_LIST_MS}ms)" + ) + assert mean_ms < MAX_SESSIONS_LIST_MS, ( + f"Sessions list latency regression: {mean_ms:.1f}ms exceeds {MAX_SESSIONS_LIST_MS}ms threshold" + ) + + +@pytest.mark.asyncio +async def test_user_profile_update_latency(client, bench_token): + """Performance: PATCH /api/v1/users/me must respond under threshold.""" + iterations = 5 + total_ms = 0.0 + + for i in range(iterations): + start = time.perf_counter() + response = await client.patch( + "/api/v1/users/me", + headers={"Authorization": f"Bearer {bench_token}"}, + json={"first_name": f"Bench{i}"}, + ) + elapsed_ms = (time.perf_counter() - start) * 1000 + total_ms += elapsed_ms + assert response.status_code == 200, f"Update failed: {response.text}" + + mean_ms = total_ms / iterations + print( + f"\n User update mean latency: {mean_ms:.1f}ms (threshold: {MAX_USER_UPDATE_MS}ms)" + ) + assert mean_ms < MAX_USER_UPDATE_MS, ( + f"User update latency regression: {mean_ms:.1f}ms exceeds {MAX_USER_UPDATE_MS}ms threshold" + )