Files
syndarix/mcp-servers/git-ops/workspace.py
Felipe Cardoso 76d7de5334 **feat(git-ops): enhance MCP server with Git provider updates and SSRF protection**
- Added `mcp-git-ops` service to `docker-compose.dev.yml` with health checks and configurations.
- Integrated SSRF protection in repository URL validation for enhanced security.
- Expanded `pyproject.toml` mypy settings and adjusted code to meet stricter type checking.
- Improved workspace management and GitWrapper operations with error handling refinements.
- Updated input validation, branching, and repository operations to align with new error structure.
- Shut down thread pool executor gracefully during server cleanup.
2026-01-07 09:17:00 +01:00

615 lines
18 KiB
Python

"""
Workspace management for Git Operations MCP Server.
Handles isolated workspaces for each project, including creation,
locking, cleanup, and size management.
"""
import asyncio
import json
import logging
import shutil
from datetime import UTC, datetime, timedelta
from pathlib import Path
from typing import Any
import aiofiles # type: ignore[import-untyped]
from filelock import FileLock, Timeout
from config import Settings, get_settings
from exceptions import (
WorkspaceLockedError,
WorkspaceNotFoundError,
WorkspaceSizeExceededError,
)
from models import WorkspaceInfo, WorkspaceState
logger = logging.getLogger(__name__)
# Metadata file name
WORKSPACE_METADATA_FILE = ".syndarix-workspace.json"
class WorkspaceManager:
"""
Manages git workspaces for projects.
Each project gets an isolated workspace directory for git operations.
Supports distributed locking via Redis or local file locks.
"""
def __init__(self, settings: Settings | None = None) -> None:
"""
Initialize WorkspaceManager.
Args:
settings: Optional settings override
"""
self.settings = settings or get_settings()
self.base_path = self.settings.workspace_base_path
self._ensure_base_path()
def _ensure_base_path(self) -> None:
"""Ensure the base workspace directory exists."""
self.base_path.mkdir(parents=True, exist_ok=True)
def _get_workspace_path(self, project_id: str) -> Path:
"""Get the path for a project workspace with path traversal protection."""
# Sanitize project ID for filesystem
safe_id = "".join(c if c.isalnum() or c in "-_" else "_" for c in project_id)
# Reject reserved names
reserved_names = {".", "..", "con", "prn", "aux", "nul"}
if safe_id.lower() in reserved_names:
raise ValueError(f"Invalid project ID: reserved name '{project_id}'")
# Construct path and verify it's within base_path (prevent path traversal)
workspace_path = (self.base_path / safe_id).resolve()
base_resolved = self.base_path.resolve()
if not workspace_path.is_relative_to(base_resolved):
raise ValueError(
f"Invalid project ID: path traversal detected '{project_id}'"
)
return workspace_path
def _get_lock_path(self, project_id: str) -> Path:
"""Get the lock file path for a workspace."""
return self._get_workspace_path(project_id) / ".lock"
def _get_metadata_path(self, project_id: str) -> Path:
"""Get the metadata file path for a workspace."""
return self._get_workspace_path(project_id) / WORKSPACE_METADATA_FILE
async def get_workspace(self, project_id: str) -> WorkspaceInfo | None:
"""
Get workspace info for a project.
Args:
project_id: Project identifier
Returns:
WorkspaceInfo or None if not found
"""
workspace_path = self._get_workspace_path(project_id)
if not workspace_path.exists():
return None
# Load metadata
metadata = await self._load_metadata(project_id)
# Calculate size
size_bytes = await self._calculate_size(workspace_path)
# Check lock status
lock_holder = None
lock_expires = None
if metadata:
lock_holder = metadata.get("lock_holder")
if metadata.get("lock_expires"):
lock_expires = datetime.fromisoformat(metadata["lock_expires"])
# Clear expired locks
if lock_expires < datetime.now(UTC):
lock_holder = None
lock_expires = None
# Determine state
state = WorkspaceState.READY
if lock_holder:
state = WorkspaceState.LOCKED
# Check if stale
last_accessed = datetime.now(UTC)
if metadata and metadata.get("last_accessed"):
last_accessed = datetime.fromisoformat(metadata["last_accessed"])
stale_threshold = datetime.now(UTC) - timedelta(
days=self.settings.workspace_stale_days
)
if last_accessed < stale_threshold:
state = WorkspaceState.STALE
return WorkspaceInfo(
project_id=project_id,
path=str(workspace_path),
state=state,
repo_url=metadata.get("repo_url") if metadata else None,
current_branch=metadata.get("current_branch") if metadata else None,
last_accessed=last_accessed,
size_bytes=size_bytes,
lock_holder=lock_holder,
lock_expires=lock_expires,
)
async def create_workspace(
self,
project_id: str,
repo_url: str | None = None,
) -> WorkspaceInfo:
"""
Create or get a workspace for a project.
Args:
project_id: Project identifier
repo_url: Optional repository URL
Returns:
WorkspaceInfo for the workspace
"""
workspace_path = self._get_workspace_path(project_id)
if workspace_path.exists():
# Workspace already exists, update metadata
await self._update_metadata(project_id, repo_url=repo_url)
workspace = await self.get_workspace(project_id)
if workspace:
return workspace
# Create workspace directory
workspace_path.mkdir(parents=True, exist_ok=True)
# Create initial metadata
metadata = {
"project_id": project_id,
"repo_url": repo_url,
"created_at": datetime.now(UTC).isoformat(),
"last_accessed": datetime.now(UTC).isoformat(),
}
await self._save_metadata(project_id, metadata)
return WorkspaceInfo(
project_id=project_id,
path=str(workspace_path),
state=WorkspaceState.INITIALIZING,
repo_url=repo_url,
last_accessed=datetime.now(UTC),
size_bytes=0,
)
async def delete_workspace(self, project_id: str, force: bool = False) -> bool:
"""
Delete a workspace.
Args:
project_id: Project identifier
force: Force delete even if locked
Returns:
True if deleted
"""
workspace_path = self._get_workspace_path(project_id)
if not workspace_path.exists():
return True
# Check lock
if not force:
workspace = await self.get_workspace(project_id)
if workspace and workspace.state == WorkspaceState.LOCKED:
raise WorkspaceLockedError(project_id, workspace.lock_holder)
try:
# Use shutil.rmtree for robust deletion
shutil.rmtree(workspace_path)
logger.info(f"Deleted workspace for project: {project_id}")
return True
except Exception as e:
logger.error(f"Failed to delete workspace {project_id}: {e}")
return False
async def lock_workspace(
self,
project_id: str,
holder: str,
timeout: int | None = None,
) -> bool:
"""
Acquire a lock on a workspace.
Args:
project_id: Project identifier
holder: Lock holder identifier (agent_id)
timeout: Lock timeout in seconds
Returns:
True if lock acquired
Raises:
WorkspaceNotFoundError: If workspace doesn't exist
WorkspaceLockedError: If already locked by another
"""
workspace = await self.get_workspace(project_id)
if workspace is None:
raise WorkspaceNotFoundError(project_id)
# Check if already locked by someone else
if workspace.state == WorkspaceState.LOCKED and workspace.lock_holder != holder:
# Check if lock expired
if workspace.lock_expires and workspace.lock_expires > datetime.now(UTC):
raise WorkspaceLockedError(project_id, workspace.lock_holder)
# Calculate lock expiry
lock_timeout = timeout or self.settings.workspace_lock_timeout
lock_expires = datetime.now(UTC) + timedelta(seconds=lock_timeout)
# Update metadata with lock info
await self._update_metadata(
project_id,
lock_holder=holder,
lock_expires=lock_expires.isoformat(),
)
logger.info(f"Workspace {project_id} locked by {holder}")
return True
async def unlock_workspace(
self,
project_id: str,
holder: str,
force: bool = False,
) -> bool:
"""
Release a lock on a workspace.
Args:
project_id: Project identifier
holder: Lock holder identifier
force: Force unlock regardless of holder
Returns:
True if unlocked
"""
workspace = await self.get_workspace(project_id)
if workspace is None:
raise WorkspaceNotFoundError(project_id)
# Verify holder
if not force and workspace.lock_holder and workspace.lock_holder != holder:
raise WorkspaceLockedError(project_id, workspace.lock_holder)
# Clear lock
await self._update_metadata(
project_id,
lock_holder=None,
lock_expires=None,
)
logger.info(f"Workspace {project_id} unlocked by {holder}")
return True
async def touch_workspace(self, project_id: str) -> None:
"""
Update last accessed time for a workspace.
Args:
project_id: Project identifier
"""
await self._update_metadata(
project_id,
last_accessed=datetime.now(UTC).isoformat(),
)
async def update_workspace_branch(
self,
project_id: str,
branch: str,
) -> None:
"""
Update the current branch in workspace metadata.
Args:
project_id: Project identifier
branch: Current branch name
"""
await self._update_metadata(
project_id,
current_branch=branch,
last_accessed=datetime.now(UTC).isoformat(),
)
async def check_size_limit(self, project_id: str) -> bool:
"""
Check if workspace exceeds size limit.
Args:
project_id: Project identifier
Returns:
True if within limits
Raises:
WorkspaceSizeExceededError: If size exceeds limit
"""
workspace_path = self._get_workspace_path(project_id)
if not workspace_path.exists():
return True
size_bytes = await self._calculate_size(workspace_path)
size_gb = size_bytes / (1024**3)
max_size_gb = self.settings.workspace_max_size_gb
if size_gb > max_size_gb:
raise WorkspaceSizeExceededError(project_id, size_gb, max_size_gb)
return True
async def list_workspaces(
self,
include_stale: bool = False,
) -> list[WorkspaceInfo]:
"""
List all workspaces.
Args:
include_stale: Include stale workspaces
Returns:
List of WorkspaceInfo
"""
workspaces: list[WorkspaceInfo] = []
if not self.base_path.exists():
return workspaces
for entry in self.base_path.iterdir():
if entry.is_dir() and not entry.name.startswith("."):
# Extract project_id from directory name
workspace = await self.get_workspace(entry.name)
if workspace:
if not include_stale and workspace.state == WorkspaceState.STALE:
continue
workspaces.append(workspace)
return workspaces
async def cleanup_stale_workspaces(self) -> int:
"""
Clean up stale workspaces.
Returns:
Number of workspaces cleaned up
"""
cleaned = 0
workspaces = await self.list_workspaces(include_stale=True)
for workspace in workspaces:
if workspace.state == WorkspaceState.STALE:
try:
await self.delete_workspace(workspace.project_id, force=True)
cleaned += 1
except Exception as e:
logger.error(
f"Failed to cleanup stale workspace {workspace.project_id}: {e}"
)
if cleaned > 0:
logger.info(f"Cleaned up {cleaned} stale workspaces")
return cleaned
async def get_total_size(self) -> int:
"""
Get total size of all workspaces.
Returns:
Total size in bytes
"""
return await self._calculate_size(self.base_path)
# Private methods
async def _load_metadata(self, project_id: str) -> dict[str, Any] | None:
"""Load workspace metadata from file."""
metadata_path = self._get_metadata_path(project_id)
if not metadata_path.exists():
return None
try:
async with aiofiles.open(metadata_path) as f:
content = await f.read()
return json.loads(content)
except Exception as e:
logger.warning(f"Failed to load metadata for {project_id}: {e}")
return None
async def _save_metadata(
self,
project_id: str,
metadata: dict[str, Any],
) -> None:
"""Save workspace metadata to file."""
metadata_path = self._get_metadata_path(project_id)
# Ensure parent directory exists
metadata_path.parent.mkdir(parents=True, exist_ok=True)
try:
async with aiofiles.open(metadata_path, "w") as f:
await f.write(json.dumps(metadata, indent=2))
except Exception as e:
logger.error(f"Failed to save metadata for {project_id}: {e}")
async def _update_metadata(
self,
project_id: str,
**updates: Any,
) -> None:
"""Update specific fields in workspace metadata."""
metadata = await self._load_metadata(project_id) or {}
# Handle None values (to clear fields)
for key, value in updates.items():
if value is None:
metadata.pop(key, None)
else:
metadata[key] = value
await self._save_metadata(project_id, metadata)
async def _calculate_size(self, path: Path) -> int:
"""Calculate total size of a directory."""
def _calc_size() -> int:
total = 0
try:
for entry in path.rglob("*"):
if entry.is_file():
try:
total += entry.stat().st_size
except OSError:
pass
except Exception:
pass
return total
# Run in executor for async compatibility
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, _calc_size)
class WorkspaceLock:
"""
Context manager for workspace locking.
Provides automatic locking/unlocking with proper cleanup.
"""
def __init__(
self,
manager: WorkspaceManager,
project_id: str,
holder: str,
timeout: int | None = None,
) -> None:
"""
Initialize workspace lock.
Args:
manager: WorkspaceManager instance
project_id: Project identifier
holder: Lock holder identifier
timeout: Lock timeout in seconds
"""
self.manager = manager
self.project_id = project_id
self.holder = holder
self.timeout = timeout
self._acquired = False
async def __aenter__(self) -> "WorkspaceLock":
"""Acquire lock on enter."""
await self.manager.lock_workspace(
self.project_id,
self.holder,
self.timeout,
)
self._acquired = True
return self
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
"""Release lock on exit."""
if self._acquired:
try:
await self.manager.unlock_workspace(
self.project_id,
self.holder,
)
except Exception as e:
logger.warning(f"Failed to release lock for {self.project_id}: {e}")
class FileLockManager:
"""
File-based locking for single-instance deployments.
Uses filelock for local locking when Redis is not available.
"""
def __init__(self, lock_dir: Path) -> None:
"""
Initialize file lock manager.
Args:
lock_dir: Directory for lock files
"""
self.lock_dir = lock_dir
self.lock_dir.mkdir(parents=True, exist_ok=True)
self._locks: dict[str, FileLock] = {}
def _get_lock(self, key: str) -> FileLock:
"""Get or create a file lock for a key."""
if key not in self._locks:
lock_path = self.lock_dir / f"{key}.lock"
self._locks[key] = FileLock(lock_path)
return self._locks[key]
def acquire(
self,
key: str,
timeout: float = 10.0,
) -> bool:
"""
Acquire a lock.
Args:
key: Lock key
timeout: Timeout in seconds
Returns:
True if acquired
"""
lock = self._get_lock(key)
try:
lock.acquire(timeout=timeout)
return True
except Timeout:
return False
def release(self, key: str) -> bool:
"""
Release a lock.
Args:
key: Lock key
Returns:
True if released
"""
if key in self._locks:
try:
self._locks[key].release()
return True
except Exception:
pass
return False
def is_locked(self, key: str) -> bool:
"""Check if a key is locked."""
lock = self._get_lock(key)
return lock.is_locked