""" Workspace management for Git Operations MCP Server. Handles isolated workspaces for each project, including creation, locking, cleanup, and size management. """ import asyncio import json import logging import shutil from datetime import UTC, datetime, timedelta from pathlib import Path from typing import Any import aiofiles # type: ignore[import-untyped] from filelock import FileLock, Timeout from config import Settings, get_settings from exceptions import ( WorkspaceLockedError, WorkspaceNotFoundError, WorkspaceSizeExceededError, ) from models import WorkspaceInfo, WorkspaceState logger = logging.getLogger(__name__) # Metadata file name WORKSPACE_METADATA_FILE = ".syndarix-workspace.json" class WorkspaceManager: """ Manages git workspaces for projects. Each project gets an isolated workspace directory for git operations. Supports distributed locking via Redis or local file locks. """ def __init__(self, settings: Settings | None = None) -> None: """ Initialize WorkspaceManager. Args: settings: Optional settings override """ self.settings = settings or get_settings() self.base_path = self.settings.workspace_base_path self._ensure_base_path() def _ensure_base_path(self) -> None: """Ensure the base workspace directory exists.""" self.base_path.mkdir(parents=True, exist_ok=True) def _get_workspace_path(self, project_id: str) -> Path: """Get the path for a project workspace with path traversal protection.""" # Sanitize project ID for filesystem safe_id = "".join(c if c.isalnum() or c in "-_" else "_" for c in project_id) # Reject reserved names reserved_names = {".", "..", "con", "prn", "aux", "nul"} if safe_id.lower() in reserved_names: raise ValueError(f"Invalid project ID: reserved name '{project_id}'") # Construct path and verify it's within base_path (prevent path traversal) workspace_path = (self.base_path / safe_id).resolve() base_resolved = self.base_path.resolve() if not workspace_path.is_relative_to(base_resolved): raise ValueError( f"Invalid project ID: path traversal detected '{project_id}'" ) return workspace_path def _get_lock_path(self, project_id: str) -> Path: """Get the lock file path for a workspace.""" return self._get_workspace_path(project_id) / ".lock" def _get_metadata_path(self, project_id: str) -> Path: """Get the metadata file path for a workspace.""" return self._get_workspace_path(project_id) / WORKSPACE_METADATA_FILE async def get_workspace(self, project_id: str) -> WorkspaceInfo | None: """ Get workspace info for a project. Args: project_id: Project identifier Returns: WorkspaceInfo or None if not found """ workspace_path = self._get_workspace_path(project_id) if not workspace_path.exists(): return None # Load metadata metadata = await self._load_metadata(project_id) # Calculate size size_bytes = await self._calculate_size(workspace_path) # Check lock status lock_holder = None lock_expires = None if metadata: lock_holder = metadata.get("lock_holder") if metadata.get("lock_expires"): lock_expires = datetime.fromisoformat(metadata["lock_expires"]) # Clear expired locks if lock_expires < datetime.now(UTC): lock_holder = None lock_expires = None # Determine state state = WorkspaceState.READY if lock_holder: state = WorkspaceState.LOCKED # Check if stale last_accessed = datetime.now(UTC) if metadata and metadata.get("last_accessed"): last_accessed = datetime.fromisoformat(metadata["last_accessed"]) stale_threshold = datetime.now(UTC) - timedelta( days=self.settings.workspace_stale_days ) if last_accessed < stale_threshold: state = WorkspaceState.STALE return WorkspaceInfo( project_id=project_id, path=str(workspace_path), state=state, repo_url=metadata.get("repo_url") if metadata else None, current_branch=metadata.get("current_branch") if metadata else None, last_accessed=last_accessed, size_bytes=size_bytes, lock_holder=lock_holder, lock_expires=lock_expires, ) async def create_workspace( self, project_id: str, repo_url: str | None = None, ) -> WorkspaceInfo: """ Create or get a workspace for a project. Args: project_id: Project identifier repo_url: Optional repository URL Returns: WorkspaceInfo for the workspace """ workspace_path = self._get_workspace_path(project_id) if workspace_path.exists(): # Workspace already exists, update metadata await self._update_metadata(project_id, repo_url=repo_url) workspace = await self.get_workspace(project_id) if workspace: return workspace # Create workspace directory workspace_path.mkdir(parents=True, exist_ok=True) # Create initial metadata metadata = { "project_id": project_id, "repo_url": repo_url, "created_at": datetime.now(UTC).isoformat(), "last_accessed": datetime.now(UTC).isoformat(), } await self._save_metadata(project_id, metadata) return WorkspaceInfo( project_id=project_id, path=str(workspace_path), state=WorkspaceState.INITIALIZING, repo_url=repo_url, last_accessed=datetime.now(UTC), size_bytes=0, ) async def delete_workspace(self, project_id: str, force: bool = False) -> bool: """ Delete a workspace. Args: project_id: Project identifier force: Force delete even if locked Returns: True if deleted """ workspace_path = self._get_workspace_path(project_id) if not workspace_path.exists(): return True # Check lock if not force: workspace = await self.get_workspace(project_id) if workspace and workspace.state == WorkspaceState.LOCKED: raise WorkspaceLockedError(project_id, workspace.lock_holder) try: # Use shutil.rmtree for robust deletion shutil.rmtree(workspace_path) logger.info(f"Deleted workspace for project: {project_id}") return True except Exception as e: logger.error(f"Failed to delete workspace {project_id}: {e}") return False async def lock_workspace( self, project_id: str, holder: str, timeout: int | None = None, ) -> bool: """ Acquire a lock on a workspace. Args: project_id: Project identifier holder: Lock holder identifier (agent_id) timeout: Lock timeout in seconds Returns: True if lock acquired Raises: WorkspaceNotFoundError: If workspace doesn't exist WorkspaceLockedError: If already locked by another """ workspace = await self.get_workspace(project_id) if workspace is None: raise WorkspaceNotFoundError(project_id) # Check if already locked by someone else if workspace.state == WorkspaceState.LOCKED and workspace.lock_holder != holder: # Check if lock expired if workspace.lock_expires and workspace.lock_expires > datetime.now(UTC): raise WorkspaceLockedError(project_id, workspace.lock_holder) # Calculate lock expiry lock_timeout = timeout or self.settings.workspace_lock_timeout lock_expires = datetime.now(UTC) + timedelta(seconds=lock_timeout) # Update metadata with lock info await self._update_metadata( project_id, lock_holder=holder, lock_expires=lock_expires.isoformat(), ) logger.info(f"Workspace {project_id} locked by {holder}") return True async def unlock_workspace( self, project_id: str, holder: str, force: bool = False, ) -> bool: """ Release a lock on a workspace. Args: project_id: Project identifier holder: Lock holder identifier force: Force unlock regardless of holder Returns: True if unlocked """ workspace = await self.get_workspace(project_id) if workspace is None: raise WorkspaceNotFoundError(project_id) # Verify holder if not force and workspace.lock_holder and workspace.lock_holder != holder: raise WorkspaceLockedError(project_id, workspace.lock_holder) # Clear lock await self._update_metadata( project_id, lock_holder=None, lock_expires=None, ) logger.info(f"Workspace {project_id} unlocked by {holder}") return True async def touch_workspace(self, project_id: str) -> None: """ Update last accessed time for a workspace. Args: project_id: Project identifier """ await self._update_metadata( project_id, last_accessed=datetime.now(UTC).isoformat(), ) async def update_workspace_branch( self, project_id: str, branch: str, ) -> None: """ Update the current branch in workspace metadata. Args: project_id: Project identifier branch: Current branch name """ await self._update_metadata( project_id, current_branch=branch, last_accessed=datetime.now(UTC).isoformat(), ) async def check_size_limit(self, project_id: str) -> bool: """ Check if workspace exceeds size limit. Args: project_id: Project identifier Returns: True if within limits Raises: WorkspaceSizeExceededError: If size exceeds limit """ workspace_path = self._get_workspace_path(project_id) if not workspace_path.exists(): return True size_bytes = await self._calculate_size(workspace_path) size_gb = size_bytes / (1024**3) max_size_gb = self.settings.workspace_max_size_gb if size_gb > max_size_gb: raise WorkspaceSizeExceededError(project_id, size_gb, max_size_gb) return True async def list_workspaces( self, include_stale: bool = False, ) -> list[WorkspaceInfo]: """ List all workspaces. Args: include_stale: Include stale workspaces Returns: List of WorkspaceInfo """ workspaces: list[WorkspaceInfo] = [] if not self.base_path.exists(): return workspaces for entry in self.base_path.iterdir(): if entry.is_dir() and not entry.name.startswith("."): # Extract project_id from directory name workspace = await self.get_workspace(entry.name) if workspace: if not include_stale and workspace.state == WorkspaceState.STALE: continue workspaces.append(workspace) return workspaces async def cleanup_stale_workspaces(self) -> int: """ Clean up stale workspaces. Returns: Number of workspaces cleaned up """ cleaned = 0 workspaces = await self.list_workspaces(include_stale=True) for workspace in workspaces: if workspace.state == WorkspaceState.STALE: try: await self.delete_workspace(workspace.project_id, force=True) cleaned += 1 except Exception as e: logger.error( f"Failed to cleanup stale workspace {workspace.project_id}: {e}" ) if cleaned > 0: logger.info(f"Cleaned up {cleaned} stale workspaces") return cleaned async def get_total_size(self) -> int: """ Get total size of all workspaces. Returns: Total size in bytes """ return await self._calculate_size(self.base_path) # Private methods async def _load_metadata(self, project_id: str) -> dict[str, Any] | None: """Load workspace metadata from file.""" metadata_path = self._get_metadata_path(project_id) if not metadata_path.exists(): return None try: async with aiofiles.open(metadata_path) as f: content = await f.read() return json.loads(content) except Exception as e: logger.warning(f"Failed to load metadata for {project_id}: {e}") return None async def _save_metadata( self, project_id: str, metadata: dict[str, Any], ) -> None: """Save workspace metadata to file.""" metadata_path = self._get_metadata_path(project_id) # Ensure parent directory exists metadata_path.parent.mkdir(parents=True, exist_ok=True) try: async with aiofiles.open(metadata_path, "w") as f: await f.write(json.dumps(metadata, indent=2)) except Exception as e: logger.error(f"Failed to save metadata for {project_id}: {e}") async def _update_metadata( self, project_id: str, **updates: Any, ) -> None: """Update specific fields in workspace metadata.""" metadata = await self._load_metadata(project_id) or {} # Handle None values (to clear fields) for key, value in updates.items(): if value is None: metadata.pop(key, None) else: metadata[key] = value await self._save_metadata(project_id, metadata) async def _calculate_size(self, path: Path) -> int: """Calculate total size of a directory.""" def _calc_size() -> int: total = 0 try: for entry in path.rglob("*"): if entry.is_file(): try: total += entry.stat().st_size except OSError: pass except Exception: pass return total # Run in executor for async compatibility loop = asyncio.get_event_loop() return await loop.run_in_executor(None, _calc_size) class WorkspaceLock: """ Context manager for workspace locking. Provides automatic locking/unlocking with proper cleanup. """ def __init__( self, manager: WorkspaceManager, project_id: str, holder: str, timeout: int | None = None, ) -> None: """ Initialize workspace lock. Args: manager: WorkspaceManager instance project_id: Project identifier holder: Lock holder identifier timeout: Lock timeout in seconds """ self.manager = manager self.project_id = project_id self.holder = holder self.timeout = timeout self._acquired = False async def __aenter__(self) -> "WorkspaceLock": """Acquire lock on enter.""" await self.manager.lock_workspace( self.project_id, self.holder, self.timeout, ) self._acquired = True return self async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: """Release lock on exit.""" if self._acquired: try: await self.manager.unlock_workspace( self.project_id, self.holder, ) except Exception as e: logger.warning(f"Failed to release lock for {self.project_id}: {e}") class FileLockManager: """ File-based locking for single-instance deployments. Uses filelock for local locking when Redis is not available. """ def __init__(self, lock_dir: Path) -> None: """ Initialize file lock manager. Args: lock_dir: Directory for lock files """ self.lock_dir = lock_dir self.lock_dir.mkdir(parents=True, exist_ok=True) self._locks: dict[str, FileLock] = {} def _get_lock(self, key: str) -> FileLock: """Get or create a file lock for a key.""" if key not in self._locks: lock_path = self.lock_dir / f"{key}.lock" self._locks[key] = FileLock(lock_path) return self._locks[key] def acquire( self, key: str, timeout: float = 10.0, ) -> bool: """ Acquire a lock. Args: key: Lock key timeout: Timeout in seconds Returns: True if acquired """ lock = self._get_lock(key) try: lock.acquire(timeout=timeout) return True except Timeout: return False def release(self, key: str) -> bool: """ Release a lock. Args: key: Lock key Returns: True if released """ if key in self._locks: try: self._locks[key].release() return True except Exception: pass return False def is_locked(self, key: str) -> bool: """Check if a key is locked.""" lock = self._get_lock(key) return lock.is_locked