diff --git a/backend/docs/CODING_STANDARDS.md b/backend/docs/CODING_STANDARDS.md index 329f275..c252406 100644 --- a/backend/docs/CODING_STANDARDS.md +++ b/backend/docs/CODING_STANDARDS.md @@ -31,14 +31,23 @@ All Python code should follow [PEP 8](https://www.python.org/dev/peps/pep-0008/) Always use type hints for function parameters and return values: ```python -from typing import Optional, List +from typing import Optional from uuid import UUID +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy import select -def get_user(db: Session, user_id: UUID) -> Optional[User]: +async def get_user(db: AsyncSession, user_id: UUID) -> Optional[User]: """Retrieve a user by ID.""" - return db.query(User).filter(User.id == user_id).first() + result = await db.execute(select(User).where(User.id == user_id)) + return result.scalar_one_or_none() ``` +**Modern Python Type Hints:** +- Use `list[T]` instead of `List[T]` (Python 3.10+) +- Use `dict[K, V]` instead of `Dict[K, V]` +- Use `T | None` instead of `Optional[T]` +- Use `str | int` instead of `Union[str, int]` + ### 3. Docstrings Use Google-style docstrings for all public functions, classes, and methods: @@ -207,23 +216,24 @@ if not user: ### Error Handling Pattern -Always follow this pattern in CRUD operations: +Always follow this pattern in CRUD operations (Async version): ```python from sqlalchemy.exc import IntegrityError, OperationalError, DataError +from sqlalchemy.ext.asyncio import AsyncSession -def create_user(db: Session, user_in: UserCreate) -> User: +async def create_user(db: AsyncSession, user_in: UserCreate) -> User: """Create a new user.""" try: db_user = User(**user_in.model_dump()) db.add(db_user) - db.commit() - db.refresh(db_user) + await db.commit() + await db.refresh(db_user) logger.info(f"User created: {db_user.id}") return db_user except IntegrityError as e: - db.rollback() + await db.rollback() logger.error(f"Integrity error creating user: {str(e)}") # Check for specific constraint violations @@ -237,7 +247,7 @@ def create_user(db: Session, user_in: UserCreate) -> User: raise DatabaseError(message="Failed to create user") except OperationalError as e: - db.rollback() + await db.rollback() logger.error(f"Database operational error: {str(e)}", exc_info=True) raise DatabaseError(message="Database is currently unavailable") @@ -274,11 +284,25 @@ All error responses follow this structure: ## Database Operations -### Use the CRUD Base Class +### Async CRUD Pattern + +**IMPORTANT**: This application uses **async SQLAlchemy** with modern patterns for better performance and testability. + +#### Core Principles + +1. **Async by Default**: All database operations are async +2. **Modern SQLAlchemy 2.0**: Use `select()` instead of `.query()` +3. **Type Safety**: Full type hints with generics +4. **Testability**: Easy to mock and test +5. **Consistent Ordering**: Always order queries for pagination + +### Use the Async CRUD Base Class Always inherit from `CRUDBase` for database operations: ```python +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy import select from app.crud.base import CRUDBase from app.models.user import User from app.schemas.users import UserCreate, UserUpdate @@ -286,22 +310,106 @@ from app.schemas.users import UserCreate, UserUpdate class CRUDUser(CRUDBase[User, UserCreate, UserUpdate]): """CRUD operations for User model.""" - def get_by_email(self, db: Session, email: str) -> Optional[User]: + async def get_by_email( + self, + db: AsyncSession, + email: str + ) -> User | None: """Get user by email address.""" - return db.query(User).filter(User.email == email).first() + result = await db.execute( + select(User).where(User.email == email) + ) + return result.scalar_one_or_none() user_crud = CRUDUser(User) ``` +**Key Points:** +- Use `async def` for all methods +- Use `select()` instead of `db.query()` +- Use `await db.execute()` for queries +- Use `.scalar_one_or_none()` instead of `.first()` +- Use `T | None` instead of `Optional[T]` + +### Modern SQLAlchemy Patterns + +#### Query Pattern (Old vs New) + +```python +# ❌ OLD - Legacy query() API (sync) +def get_user(db: Session, user_id: UUID) -> Optional[User]: + return db.query(User).filter(User.id == user_id).first() + +# ✅ NEW - Modern select() API (async) +async def get_user(db: AsyncSession, user_id: UUID) -> User | None: + result = await db.execute( + select(User).where(User.id == user_id) + ) + return result.scalar_one_or_none() +``` + +#### Multiple Results + +```python +# ❌ OLD +def get_users(db: Session) -> List[User]: + return db.query(User).all() + +# ✅ NEW +async def get_users(db: AsyncSession) -> list[User]: + result = await db.execute(select(User)) + return list(result.scalars().all()) +``` + +#### With Ordering and Pagination + +```python +# ✅ CORRECT - Always use ordering for pagination +async def get_users_paginated( + db: AsyncSession, + skip: int = 0, + limit: int = 100 +) -> list[User]: + result = await db.execute( + select(User) + .where(User.deleted_at.is_(None)) # Soft delete filter + .order_by(User.created_at.desc()) # Consistent ordering + .offset(skip) + .limit(limit) + ) + return list(result.scalars().all()) +``` + +#### With Relationships (Eager Loading) + +```python +from sqlalchemy.orm import selectinload + +# Load user with sessions +async def get_user_with_sessions( + db: AsyncSession, + user_id: UUID +) -> User | None: + result = await db.execute( + select(User) + .where(User.id == user_id) + .options(selectinload(User.sessions)) # Eager load relationship + ) + return result.scalar_one_or_none() +``` + ### Transaction Management #### In Routes (Dependency Injection) ```python +from sqlalchemy.ext.asyncio import AsyncSession +from app.core.database import get_db + @router.post("/users", response_model=UserResponse) -def create_user( +async def create_user( user_in: UserCreate, - db: Session = Depends(get_db) + db: AsyncSession = Depends(get_db) ): """ Create a new user. @@ -309,25 +417,32 @@ def create_user( The database session is automatically managed by FastAPI. Commit on success, rollback on error. """ - return user_crud.create(db, obj_in=user_in) + return await user_crud.create(db, obj_in=user_in) ``` -#### In Services (Context Manager) +**Key Points:** +- Route functions must be `async def` +- Database parameter is `AsyncSession` +- Always `await` CRUD operations + +#### In Services (Multiple Operations) ```python -from app.core.database import transaction_scope - -def complex_operation(): +async def complex_operation( + db: AsyncSession, + user_data: UserCreate, + session_data: SessionCreate +) -> tuple[User, UserSession]: """ Perform multiple database operations atomically. - The context manager automatically commits on success - or rolls back on error. + The session automatically commits on success or rolls back on error. """ - with transaction_scope() as db: - user = user_crud.create(db, obj_in=user_data) - session = session_crud.create(db, obj_in=session_data) - return user, session + user = await user_crud.create(db, obj_in=user_data) + session = await session_crud.create(db, obj_in=session_data) + + # Commit is handled by the route's dependency + return user, session ``` ### Use Soft Deletes @@ -336,7 +451,7 @@ Prefer soft deletes over hard deletes for audit trails: ```python # Good - Soft delete (sets deleted_at) -user_crud.soft_delete(db, id=user_id) +await user_crud.soft_delete(db, id=user_id) # Acceptable only when required - Hard delete user_crud.remove(db, id=user_id) @@ -597,7 +712,7 @@ Follow the existing test structure: ``` tests/ -├── conftest.py # Shared fixtures +├── conftest.py # Shared fixtures (async) ├── api/ # Integration tests │ ├── test_users.py │ └── test_auth.py @@ -606,31 +721,52 @@ tests/ └── services/ # Service tests ``` +### Async Testing with pytest-asyncio + +**IMPORTANT**: All tests using async database operations must use `pytest-asyncio`. + +```python +import pytest +import pytest_asyncio +from sqlalchemy.ext.asyncio import AsyncSession + +# Mark async tests +@pytest.mark.asyncio +async def test_create_user(): + """Test async user creation.""" + pass +``` + ### Test Naming Convention ```python -# Test function names should be descriptive -def test_create_user_with_valid_data(): +# Test function names should be descriptive and use async +@pytest.mark.asyncio +async def test_create_user_with_valid_data(): """Test creating a user with valid data succeeds.""" pass -def test_create_user_with_duplicate_email_raises_error(): +@pytest.mark.asyncio +async def test_create_user_with_duplicate_email_raises_error(): """Test creating a user with duplicate email raises DuplicateError.""" pass -def test_get_user_that_does_not_exist_returns_none(): +@pytest.mark.asyncio +async def test_get_user_that_does_not_exist_returns_none(): """Test getting non-existent user returns None.""" pass ``` -### Use Fixtures +### Use Async Fixtures ```python import pytest +import pytest_asyncio +from sqlalchemy.ext.asyncio import AsyncSession from app.models.user import User -@pytest.fixture -def test_user(db_session: Session) -> User: +@pytest_asyncio.fixture +async def test_user(db_session: AsyncSession) -> User: """Create a test user.""" user = User( email="test@example.com", @@ -638,53 +774,107 @@ def test_user(db_session: Session) -> User: is_active=True ) db_session.add(user) - db_session.commit() - db_session.refresh(user) + await db_session.commit() + await db_session.refresh(user) return user -def test_get_user(db_session: Session, test_user: User): +@pytest.mark.asyncio +async def test_get_user(db_session: AsyncSession, test_user: User): """Test retrieving a user by ID.""" - user = user_crud.get(db_session, id=test_user.id) + user = await user_crud.get(db_session, id=test_user.id) assert user is not None assert user.email == test_user.email ``` +### Database Test Configuration + +Use SQLite in-memory for tests with proper pooling: + +```python +# tests/conftest.py +import pytest +import pytest_asyncio +from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession +from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import StaticPool +from app.models.base import Base + +@pytest_asyncio.fixture(scope="function") +async def db_engine(): + """Create async engine for testing.""" + engine = create_async_engine( + "sqlite+aiosqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, # IMPORTANT: Share single in-memory DB + ) + + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + + yield engine + + await engine.dispose() + +@pytest_asyncio.fixture +async def db_session(db_engine): + """Create async session for tests.""" + async_session = sessionmaker( + db_engine, + class_=AsyncSession, + expire_on_commit=False + ) + + async with async_session() as session: + yield session + await session.rollback() +``` + ### Test Coverage -Aim for high test coverage: +Aim for 80%+ test coverage: ```python # Test the happy path -def test_create_user_success(): +@pytest.mark.asyncio +async def test_create_user_success(): pass # Test error cases -def test_create_user_with_duplicate_email(): +@pytest.mark.asyncio +async def test_create_user_with_duplicate_email(): pass -def test_create_user_with_invalid_email(): +@pytest.mark.asyncio +async def test_create_user_with_invalid_email(): pass # Test edge cases -def test_create_user_with_empty_password(): +@pytest.mark.asyncio +async def test_create_user_with_empty_password(): pass # Test authorization -def test_user_cannot_delete_other_users_resources(): +@pytest.mark.asyncio +async def test_user_cannot_delete_other_users_resources(): pass -def test_superuser_can_delete_any_resource(): +@pytest.mark.asyncio +async def test_superuser_can_delete_any_resource(): pass ``` -### API Testing Pattern +### API Testing Pattern (Async) ```python -from fastapi.testclient import TestClient +import pytest +from httpx import AsyncClient +from app.main import app -def test_create_user_endpoint(client: TestClient): - """Test POST /api/v1/users endpoint.""" - response = client.post( +@pytest.mark.asyncio +async def test_create_user_endpoint(): + """Test POST /api/v1/users endpoint (async).""" + async with AsyncClient(app=app, base_url="http://test") as client: + response = await client.post( "/api/v1/users", json={ "email": "newuser@example.com", diff --git a/backend/docs/COMMON_PITFALLS.md b/backend/docs/COMMON_PITFALLS.md new file mode 100644 index 0000000..8bbb064 --- /dev/null +++ b/backend/docs/COMMON_PITFALLS.md @@ -0,0 +1,698 @@ +# Common Pitfalls & How to Avoid Them + +> **Purpose**: This document catalogs common mistakes encountered during implementation and provides explicit rules to prevent them. **Read this before writing any code.** + +## Table of Contents + +- [SQLAlchemy & Database](#sqlalchemy--database) +- [Pydantic & Validation](#pydantic--validation) +- [FastAPI & API Design](#fastapi--api-design) +- [Security & Authentication](#security--authentication) +- [Python Language Gotchas](#python-language-gotchas) + +--- + +## SQLAlchemy & Database + +### ❌ PITFALL #1: Using Mutable Defaults in Columns + +**Issue**: Using `default={}` or `default=[]` creates shared state across all instances. + +```python +# ❌ WRONG - All instances share the same dict! +class User(Base): + metadata = Column(JSON, default={}) # DANGER: Mutable default! + tags = Column(JSON, default=[]) # DANGER: Shared list! +``` + +```python +# ✅ CORRECT - Use callable factory +class User(Base): + metadata = Column(JSON, default=dict) # New dict per instance + tags = Column(JSON, default=list) # New list per instance +``` + +**Rule**: Always use `default=dict` or `default=list` (without parentheses), never `default={}` or `default=[]`. + +--- + +### ❌ PITFALL #2: Forgetting to Index Foreign Keys + +**Issue**: Foreign key columns without indexes cause slow JOIN operations. + +```python +# ❌ WRONG - No index on foreign key +class UserSession(Base): + user_id = Column(UUID, ForeignKey('users.id'), nullable=False) +``` + +```python +# ✅ CORRECT - Always index foreign keys +class UserSession(Base): + user_id = Column(UUID, ForeignKey('users.id'), nullable=False, index=True) +``` + +**Rule**: ALWAYS add `index=True` to foreign key columns. SQLAlchemy doesn't do this automatically. + +--- + +### ❌ PITFALL #3: Missing Composite Indexes + +**Issue**: Queries filtering by multiple columns cannot use single-column indexes efficiently. + +```python +# ❌ MISSING - Slow query on (user_id, is_active) +class UserSession(Base): + user_id = Column(UUID, ForeignKey('users.id'), index=True) + is_active = Column(Boolean, default=True, index=True) + # Query: WHERE user_id=X AND is_active=TRUE uses only one index! +``` + +```python +# ✅ CORRECT - Composite index for common query pattern +class UserSession(Base): + user_id = Column(UUID, ForeignKey('users.id'), index=True) + is_active = Column(Boolean, default=True, index=True) + + __table_args__ = ( + Index('ix_user_sessions_user_active', 'user_id', 'is_active'), + ) +``` + +**Rule**: Add composite indexes for commonly used multi-column filters. Review query patterns and create indexes accordingly. + +**Performance Impact**: Can reduce query time from seconds to milliseconds for large tables. + +--- + +### ❌ PITFALL #4: Not Using Soft Deletes + +**Issue**: Hard deletes destroy data and audit trails permanently. + +```python +# ❌ RISKY - Permanent data loss +def delete_user(user_id: UUID): + user = db.query(User).filter(User.id == user_id).first() + db.delete(user) # Data gone forever! + db.commit() +``` + +```python +# ✅ CORRECT - Soft delete with audit trail +class User(Base): + deleted_at = Column(DateTime(timezone=True), nullable=True) + +def soft_delete_user(user_id: UUID): + user = db.query(User).filter(User.id == user_id).first() + user.deleted_at = datetime.now(timezone.utc) + db.commit() +``` + +**Rule**: For user data, ALWAYS use soft deletes. Add `deleted_at` column and filter queries with `.filter(deleted_at.is_(None))`. + +--- + +### ❌ PITFALL #5: Missing Query Ordering + +**Issue**: Queries without `ORDER BY` return unpredictable results, breaking pagination. + +```python +# ❌ WRONG - Random order, pagination broken +def get_users(skip: int, limit: int): + return db.query(User).offset(skip).limit(limit).all() +``` + +```python +# ✅ CORRECT - Stable ordering for consistent pagination +def get_users(skip: int, limit: int): + return ( + db.query(User) + .filter(User.deleted_at.is_(None)) + .order_by(User.created_at.desc()) # Consistent order + .offset(skip) + .limit(limit) + .all() + ) +``` + +**Rule**: ALWAYS add `.order_by()` to paginated queries. Default to `created_at.desc()` for newest-first. + +--- + +## Pydantic & Validation + +### ❌ PITFALL #6: Missing Size Validation on JSON Fields + +**Issue**: Unbounded JSON fields enable DoS attacks through deeply nested objects. + +```python +# ❌ WRONG - No size limit (JSON bomb vulnerability) +class UserCreate(BaseModel): + metadata: dict[str, Any] # No limit! +``` + +```python +# ✅ CORRECT - Validate serialized size +import json +from pydantic import field_validator + +class UserCreate(BaseModel): + metadata: dict[str, Any] + + @field_validator("metadata") + @classmethod + def validate_metadata_size(cls, v: dict[str, Any]) -> dict[str, Any]: + metadata_json = json.dumps(v, separators=(",", ":")) + max_size = 10_000 # 10KB limit + + if len(metadata_json) > max_size: + raise ValueError(f"Metadata exceeds {max_size} bytes") + + return v +``` + +**Rule**: ALWAYS validate the serialized size of dict/JSON fields. Typical limits: +- User metadata: 10KB +- Configuration: 100KB +- Never exceed 1MB + +**Security Impact**: Prevents DoS attacks via deeply nested JSON objects. + +--- + +### ❌ PITFALL #7: Missing max_length on String Fields + +**Issue**: Unbounded text fields enable memory exhaustion attacks and database errors. + +```python +# ❌ WRONG - No length limit +class UserCreate(BaseModel): + email: str + name: str + bio: str | None = None +``` + +```python +# ✅ CORRECT - Explicit length limits matching database +class UserCreate(BaseModel): + email: str = Field(..., max_length=255) + name: str = Field(..., min_length=1, max_length=100) + bio: str | None = Field(None, max_length=500) +``` + +**Rule**: Add `max_length` to ALL string fields. Limits should match database column definitions: +- Emails: 255 characters +- Names/titles: 100-255 characters +- Descriptions/bios: 500-1000 characters +- Error messages: 5000 characters + +--- + +### ❌ PITFALL #8: Inconsistent Validation Between Create and Update + +**Issue**: Adding validators to Create schema but not Update schema. + +```python +# ❌ INCOMPLETE - Only validates on create +class UserCreate(BaseModel): + email: str = Field(..., max_length=255) + + @field_validator("email") + @classmethod + def validate_email_format(cls, v: str) -> str: + if "@" not in v: + raise ValueError("Invalid email format") + return v.lower() + +class UserUpdate(BaseModel): + email: str | None = None # No validator! +``` + +```python +# ✅ CORRECT - Same validation on both schemas +class UserCreate(BaseModel): + email: str = Field(..., max_length=255) + + @field_validator("email") + @classmethod + def validate_email_format(cls, v: str) -> str: + if "@" not in v: + raise ValueError("Invalid email format") + return v.lower() + +class UserUpdate(BaseModel): + email: str | None = Field(None, max_length=255) + + @field_validator("email") + @classmethod + def validate_email_format(cls, v: str | None) -> str | None: + if v is None: + return v + if "@" not in v: + raise ValueError("Invalid email format") + return v.lower() +``` + +**Rule**: Apply the SAME validators to both Create and Update schemas. Handle `None` values in Update validators. + +--- + +### ❌ PITFALL #9: Not Using Field Descriptions + +**Issue**: Missing descriptions make API documentation unclear. + +```python +# ❌ WRONG - No descriptions +class UserCreate(BaseModel): + email: str + password: str + is_superuser: bool = False +``` + +```python +# ✅ CORRECT - Clear descriptions +class UserCreate(BaseModel): + email: str = Field( + ..., + description="User's email address (must be unique)", + examples=["user@example.com"] + ) + password: str = Field( + ..., + min_length=8, + description="Password (minimum 8 characters)", + examples=["SecurePass123!"] + ) + is_superuser: bool = Field( + default=False, + description="Whether user has superuser privileges" + ) +``` + +**Rule**: Add `description` and `examples` to all fields for automatic OpenAPI documentation. + +--- + +## FastAPI & API Design + +### ❌ PITFALL #10: Missing Rate Limiting + +**Issue**: No rate limiting allows abuse and DoS attacks. + +```python +# ❌ WRONG - No rate limits +@router.post("/auth/login") +def login(credentials: OAuth2PasswordRequestForm): + # Anyone can try unlimited passwords! + ... +``` + +```python +# ✅ CORRECT - Rate limit sensitive endpoints +from slowapi import Limiter + +limiter = Limiter(key_func=lambda request: request.client.host) + +@router.post("/auth/login") +@limiter.limit("5/minute") # Only 5 attempts per minute +def login(request: Request, credentials: OAuth2PasswordRequestForm): + ... +``` + +**Rule**: Apply rate limits to ALL endpoints: +- Authentication: 5/minute +- Write operations: 10-20/minute +- Read operations: 30-60/minute + +--- + +### ❌ PITFALL #11: Returning Sensitive Data in Responses + +**Issue**: Exposing internal fields like passwords, tokens, or internal IDs. + +```python +# ❌ WRONG - Returns password hash! +@router.get("/users/{user_id}") +def get_user(user_id: UUID, db: Session = Depends(get_db)) -> User: + return user_crud.get(db, id=user_id) # Returns ORM model with ALL fields! +``` + +```python +# ✅ CORRECT - Use response schema +@router.get("/users/{user_id}", response_model=UserResponse) +def get_user(user_id: UUID, db: Session = Depends(get_db)): + user = user_crud.get(db, id=user_id) + if not user: + raise HTTPException(status_code=404, detail="User not found") + return user # Pydantic filters to only UserResponse fields + +class UserResponse(BaseModel): + """Public user data - NO sensitive fields.""" + id: UUID + email: str + is_active: bool + created_at: datetime + # NO: password, hashed_password, tokens, etc. + + model_config = ConfigDict(from_attributes=True) +``` + +**Rule**: ALWAYS use dedicated response schemas. Never return ORM models directly. + +--- + +### ❌ PITFALL #12: Missing Error Response Standardization + +**Issue**: Inconsistent error formats confuse API consumers. + +```python +# ❌ WRONG - Different error formats +@router.get("/users/{user_id}") +def get_user(user_id: UUID): + if not user: + raise HTTPException(404, "Not found") # Format 1 + + if not user.is_active: + return {"error": "User inactive"} # Format 2 + + try: + ... + except Exception as e: + return {"message": str(e)} # Format 3 +``` + +```python +# ✅ CORRECT - Consistent error format +class ErrorResponse(BaseModel): + success: bool = False + errors: list[ErrorDetail] + +class ErrorDetail(BaseModel): + code: str + message: str + field: str | None = None + +@router.get("/users/{user_id}") +def get_user(user_id: UUID): + if not user: + raise NotFoundError( + message="User not found", + error_code="USER_001" + ) + +# Global exception handler ensures consistent format +@app.exception_handler(APIException) +async def api_exception_handler(request: Request, exc: APIException): + return JSONResponse( + status_code=exc.status_code, + content={ + "success": False, + "errors": [ + { + "code": exc.error_code, + "message": exc.message, + "field": exc.field + } + ] + } + ) +``` + +**Rule**: Use custom exceptions and global handlers for consistent error responses across all endpoints. + +--- + +## Security & Authentication + +### ❌ PITFALL #13: Logging Sensitive Information + +**Issue**: Passwords, tokens, and secrets in logs create security vulnerabilities. + +```python +# ❌ WRONG - Logs credentials +logger.info(f"User {email} logged in with password: {password}") # NEVER! +logger.debug(f"JWT token: {access_token}") # NEVER! +logger.info(f"Database URL: {settings.database_url}") # Contains password! +``` + +```python +# ✅ CORRECT - Never log sensitive data +logger.info(f"User {email} logged in successfully") +logger.debug("Access token generated") +logger.info(f"Database connected: {settings.database_url.split('@')[1]}") # Only host +``` + +**Rule**: NEVER log: +- Passwords (plain or hashed) +- Tokens (access, refresh, API keys) +- Full database URLs +- Credit card numbers +- Personal data (SSN, passport, etc.) + +**Use Pydantic's `SecretStr`** for sensitive config values. + +--- + +### ❌ PITFALL #14: Weak Password Requirements + +**Issue**: No password strength requirements allow weak passwords. + +```python +# ❌ WRONG - No validation +class UserCreate(BaseModel): + password: str +``` + +```python +# ✅ CORRECT - Enforce minimum standards +class UserCreate(BaseModel): + password: str = Field(..., min_length=8) + + @field_validator("password") + @classmethod + def validate_password_strength(cls, v: str) -> str: + if len(v) < 8: + raise ValueError("Password must be at least 8 characters") + + # For admin/superuser, enforce stronger requirements + has_upper = any(c.isupper() for c in v) + has_lower = any(c.islower() for c in v) + has_digit = any(c.isdigit() for c in v) + + if not (has_upper and has_lower and has_digit): + raise ValueError( + "Password must contain uppercase, lowercase, and number" + ) + + return v +``` + +**Rule**: Enforce password requirements: +- Minimum 8 characters +- Mix of upper/lower case and numbers for sensitive accounts +- Use bcrypt with appropriate cost factor (12+) + +--- + +### ❌ PITFALL #15: Not Validating Token Ownership + +**Issue**: Users can access other users' resources using valid tokens. + +```python +# ❌ WRONG - No ownership check +@router.delete("/sessions/{session_id}") +def revoke_session( + session_id: UUID, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db) +): + session = session_crud.get(db, id=session_id) + session_crud.deactivate(db, session_id=session_id) + # BUG: User can revoke ANYONE'S session! + return {"message": "Session revoked"} +``` + +```python +# ✅ CORRECT - Verify ownership +@router.delete("/sessions/{session_id}") +def revoke_session( + session_id: UUID, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db) +): + session = session_crud.get(db, id=session_id) + + if not session: + raise NotFoundError("Session not found") + + # CRITICAL: Check ownership + if session.user_id != current_user.id: + raise AuthorizationError("You can only revoke your own sessions") + + session_crud.deactivate(db, session_id=session_id) + return {"message": "Session revoked"} +``` + +**Rule**: ALWAYS verify resource ownership before allowing operations. Check `resource.user_id == current_user.id`. + +--- + +## Python Language Gotchas + +### ❌ PITFALL #16: Using is for Value Comparison + +**Issue**: `is` checks identity, not equality. + +```python +# ❌ WRONG - Compares object identity +if user.role is "admin": # May fail due to string interning + grant_access() + +if count is 0: # Never works for integers outside -5 to 256 + return empty_response +``` + +```python +# ✅ CORRECT - Use == for value comparison +if user.role == "admin": + grant_access() + +if count == 0: + return empty_response +``` + +**Rule**: Use `==` for value comparison. Only use `is` for: +- `is None` (checking for None) +- `is True` / `is False` (checking for exact boolean objects) + +--- + +### ❌ PITFALL #17: Mutable Default Arguments + +**Issue**: Default mutable arguments are shared across all function calls. + +```python +# ❌ WRONG - list is shared! +def add_tag(user: User, tags: list = []): + tags.append("default") + user.tags.extend(tags) + # Second call will have ["default", "default"]! +``` + +```python +# ✅ CORRECT - Use None and create new list +def add_tag(user: User, tags: list | None = None): + if tags is None: + tags = [] + tags.append("default") + user.tags.extend(tags) +``` + +**Rule**: Never use mutable defaults (`[]`, `{}`). Use `None` and create inside function. + +--- + +### ❌ PITFALL #18: Not Using Type Hints + +**Issue**: Missing type hints prevent catching bugs at development time. + +```python +# ❌ WRONG - No type hints +def create_user(email, password, is_active=True): + user = User(email=email, password=password, is_active=is_active) + db.add(user) + return user +``` + +```python +# ✅ CORRECT - Full type hints +def create_user( + email: str, + password: str, + is_active: bool = True +) -> User: + user = User(email=email, password=password, is_active=is_active) + db.add(user) + return user +``` + +**Rule**: Add type hints to ALL functions. Use `mypy` to enforce type checking. + +--- + +## Checklist Before Committing + +Use this checklist to catch issues before code review: + +### Database +- [ ] No mutable defaults (`default=dict`, not `default={}`) +- [ ] All foreign keys have `index=True` +- [ ] Composite indexes for multi-column queries +- [ ] Soft deletes with `deleted_at` column +- [ ] All queries have `.order_by()` for pagination + +### Validation +- [ ] All dict/JSON fields have size validators +- [ ] All string fields have `max_length` +- [ ] Validators applied to BOTH Create and Update schemas +- [ ] All fields have descriptions + +### API Design +- [ ] Rate limits on all endpoints +- [ ] Response schemas (never return ORM models) +- [ ] Consistent error format with global handlers +- [ ] OpenAPI docs are clear and complete + +### Security +- [ ] No passwords, tokens, or secrets in logs +- [ ] Password strength validation +- [ ] Resource ownership verification +- [ ] CORS configured (no wildcards in production) + +### Python +- [ ] Use `==` not `is` for value comparison +- [ ] No mutable default arguments +- [ ] Type hints on all functions +- [ ] No unused imports or variables + +--- + +## Prevention Tools + +### Pre-commit Checks + +Add these to your development workflow: + +```bash +# Format code +black app tests +isort app tests + +# Type checking +mypy app --strict + +# Linting +flake8 app tests + +# Run tests +pytest --cov=app --cov-report=term-missing + +# Check coverage (should be 80%+) +coverage report --fail-under=80 +``` + +--- + +## When to Update This Document + +Add new entries when: +1. A bug makes it to production +2. Multiple review cycles catch the same issue +3. An issue takes >30 minutes to debug +4. Security vulnerability discovered + +--- + +**Last Updated**: 2025-10-31 +**Issues Cataloged**: 18 common pitfalls +**Remember**: This document exists because these issues HAVE occurred. Don't skip it.