forked from cardosofelipe/fast-next-template
- Added `mcp-git-ops` service to `docker-compose.dev.yml` with health checks and configurations. - Integrated SSRF protection in repository URL validation for enhanced security. - Expanded `pyproject.toml` mypy settings and adjusted code to meet stricter type checking. - Improved workspace management and GitWrapper operations with error handling refinements. - Updated input validation, branching, and repository operations to align with new error structure. - Shut down thread pool executor gracefully during server cleanup.
615 lines
18 KiB
Python
615 lines
18 KiB
Python
"""
|
|
Workspace management for Git Operations MCP Server.
|
|
|
|
Handles isolated workspaces for each project, including creation,
|
|
locking, cleanup, and size management.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import shutil
|
|
from datetime import UTC, datetime, timedelta
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import aiofiles # type: ignore[import-untyped]
|
|
from filelock import FileLock, Timeout
|
|
|
|
from config import Settings, get_settings
|
|
from exceptions import (
|
|
WorkspaceLockedError,
|
|
WorkspaceNotFoundError,
|
|
WorkspaceSizeExceededError,
|
|
)
|
|
from models import WorkspaceInfo, WorkspaceState
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Metadata file name
|
|
WORKSPACE_METADATA_FILE = ".syndarix-workspace.json"
|
|
|
|
|
|
class WorkspaceManager:
|
|
"""
|
|
Manages git workspaces for projects.
|
|
|
|
Each project gets an isolated workspace directory for git operations.
|
|
Supports distributed locking via Redis or local file locks.
|
|
"""
|
|
|
|
def __init__(self, settings: Settings | None = None) -> None:
|
|
"""
|
|
Initialize WorkspaceManager.
|
|
|
|
Args:
|
|
settings: Optional settings override
|
|
"""
|
|
self.settings = settings or get_settings()
|
|
self.base_path = self.settings.workspace_base_path
|
|
self._ensure_base_path()
|
|
|
|
def _ensure_base_path(self) -> None:
|
|
"""Ensure the base workspace directory exists."""
|
|
self.base_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
def _get_workspace_path(self, project_id: str) -> Path:
|
|
"""Get the path for a project workspace with path traversal protection."""
|
|
# Sanitize project ID for filesystem
|
|
safe_id = "".join(c if c.isalnum() or c in "-_" else "_" for c in project_id)
|
|
|
|
# Reject reserved names
|
|
reserved_names = {".", "..", "con", "prn", "aux", "nul"}
|
|
if safe_id.lower() in reserved_names:
|
|
raise ValueError(f"Invalid project ID: reserved name '{project_id}'")
|
|
|
|
# Construct path and verify it's within base_path (prevent path traversal)
|
|
workspace_path = (self.base_path / safe_id).resolve()
|
|
base_resolved = self.base_path.resolve()
|
|
|
|
if not workspace_path.is_relative_to(base_resolved):
|
|
raise ValueError(
|
|
f"Invalid project ID: path traversal detected '{project_id}'"
|
|
)
|
|
|
|
return workspace_path
|
|
|
|
def _get_lock_path(self, project_id: str) -> Path:
|
|
"""Get the lock file path for a workspace."""
|
|
return self._get_workspace_path(project_id) / ".lock"
|
|
|
|
def _get_metadata_path(self, project_id: str) -> Path:
|
|
"""Get the metadata file path for a workspace."""
|
|
return self._get_workspace_path(project_id) / WORKSPACE_METADATA_FILE
|
|
|
|
async def get_workspace(self, project_id: str) -> WorkspaceInfo | None:
|
|
"""
|
|
Get workspace info for a project.
|
|
|
|
Args:
|
|
project_id: Project identifier
|
|
|
|
Returns:
|
|
WorkspaceInfo or None if not found
|
|
"""
|
|
workspace_path = self._get_workspace_path(project_id)
|
|
|
|
if not workspace_path.exists():
|
|
return None
|
|
|
|
# Load metadata
|
|
metadata = await self._load_metadata(project_id)
|
|
|
|
# Calculate size
|
|
size_bytes = await self._calculate_size(workspace_path)
|
|
|
|
# Check lock status
|
|
lock_holder = None
|
|
lock_expires = None
|
|
if metadata:
|
|
lock_holder = metadata.get("lock_holder")
|
|
if metadata.get("lock_expires"):
|
|
lock_expires = datetime.fromisoformat(metadata["lock_expires"])
|
|
# Clear expired locks
|
|
if lock_expires < datetime.now(UTC):
|
|
lock_holder = None
|
|
lock_expires = None
|
|
|
|
# Determine state
|
|
state = WorkspaceState.READY
|
|
if lock_holder:
|
|
state = WorkspaceState.LOCKED
|
|
|
|
# Check if stale
|
|
last_accessed = datetime.now(UTC)
|
|
if metadata and metadata.get("last_accessed"):
|
|
last_accessed = datetime.fromisoformat(metadata["last_accessed"])
|
|
stale_threshold = datetime.now(UTC) - timedelta(
|
|
days=self.settings.workspace_stale_days
|
|
)
|
|
if last_accessed < stale_threshold:
|
|
state = WorkspaceState.STALE
|
|
|
|
return WorkspaceInfo(
|
|
project_id=project_id,
|
|
path=str(workspace_path),
|
|
state=state,
|
|
repo_url=metadata.get("repo_url") if metadata else None,
|
|
current_branch=metadata.get("current_branch") if metadata else None,
|
|
last_accessed=last_accessed,
|
|
size_bytes=size_bytes,
|
|
lock_holder=lock_holder,
|
|
lock_expires=lock_expires,
|
|
)
|
|
|
|
async def create_workspace(
|
|
self,
|
|
project_id: str,
|
|
repo_url: str | None = None,
|
|
) -> WorkspaceInfo:
|
|
"""
|
|
Create or get a workspace for a project.
|
|
|
|
Args:
|
|
project_id: Project identifier
|
|
repo_url: Optional repository URL
|
|
|
|
Returns:
|
|
WorkspaceInfo for the workspace
|
|
"""
|
|
workspace_path = self._get_workspace_path(project_id)
|
|
|
|
if workspace_path.exists():
|
|
# Workspace already exists, update metadata
|
|
await self._update_metadata(project_id, repo_url=repo_url)
|
|
workspace = await self.get_workspace(project_id)
|
|
if workspace:
|
|
return workspace
|
|
|
|
# Create workspace directory
|
|
workspace_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Create initial metadata
|
|
metadata = {
|
|
"project_id": project_id,
|
|
"repo_url": repo_url,
|
|
"created_at": datetime.now(UTC).isoformat(),
|
|
"last_accessed": datetime.now(UTC).isoformat(),
|
|
}
|
|
await self._save_metadata(project_id, metadata)
|
|
|
|
return WorkspaceInfo(
|
|
project_id=project_id,
|
|
path=str(workspace_path),
|
|
state=WorkspaceState.INITIALIZING,
|
|
repo_url=repo_url,
|
|
last_accessed=datetime.now(UTC),
|
|
size_bytes=0,
|
|
)
|
|
|
|
async def delete_workspace(self, project_id: str, force: bool = False) -> bool:
|
|
"""
|
|
Delete a workspace.
|
|
|
|
Args:
|
|
project_id: Project identifier
|
|
force: Force delete even if locked
|
|
|
|
Returns:
|
|
True if deleted
|
|
"""
|
|
workspace_path = self._get_workspace_path(project_id)
|
|
|
|
if not workspace_path.exists():
|
|
return True
|
|
|
|
# Check lock
|
|
if not force:
|
|
workspace = await self.get_workspace(project_id)
|
|
if workspace and workspace.state == WorkspaceState.LOCKED:
|
|
raise WorkspaceLockedError(project_id, workspace.lock_holder)
|
|
|
|
try:
|
|
# Use shutil.rmtree for robust deletion
|
|
shutil.rmtree(workspace_path)
|
|
logger.info(f"Deleted workspace for project: {project_id}")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Failed to delete workspace {project_id}: {e}")
|
|
return False
|
|
|
|
async def lock_workspace(
|
|
self,
|
|
project_id: str,
|
|
holder: str,
|
|
timeout: int | None = None,
|
|
) -> bool:
|
|
"""
|
|
Acquire a lock on a workspace.
|
|
|
|
Args:
|
|
project_id: Project identifier
|
|
holder: Lock holder identifier (agent_id)
|
|
timeout: Lock timeout in seconds
|
|
|
|
Returns:
|
|
True if lock acquired
|
|
|
|
Raises:
|
|
WorkspaceNotFoundError: If workspace doesn't exist
|
|
WorkspaceLockedError: If already locked by another
|
|
"""
|
|
workspace = await self.get_workspace(project_id)
|
|
|
|
if workspace is None:
|
|
raise WorkspaceNotFoundError(project_id)
|
|
|
|
# Check if already locked by someone else
|
|
if workspace.state == WorkspaceState.LOCKED and workspace.lock_holder != holder:
|
|
# Check if lock expired
|
|
if workspace.lock_expires and workspace.lock_expires > datetime.now(UTC):
|
|
raise WorkspaceLockedError(project_id, workspace.lock_holder)
|
|
|
|
# Calculate lock expiry
|
|
lock_timeout = timeout or self.settings.workspace_lock_timeout
|
|
lock_expires = datetime.now(UTC) + timedelta(seconds=lock_timeout)
|
|
|
|
# Update metadata with lock info
|
|
await self._update_metadata(
|
|
project_id,
|
|
lock_holder=holder,
|
|
lock_expires=lock_expires.isoformat(),
|
|
)
|
|
|
|
logger.info(f"Workspace {project_id} locked by {holder}")
|
|
return True
|
|
|
|
async def unlock_workspace(
|
|
self,
|
|
project_id: str,
|
|
holder: str,
|
|
force: bool = False,
|
|
) -> bool:
|
|
"""
|
|
Release a lock on a workspace.
|
|
|
|
Args:
|
|
project_id: Project identifier
|
|
holder: Lock holder identifier
|
|
force: Force unlock regardless of holder
|
|
|
|
Returns:
|
|
True if unlocked
|
|
"""
|
|
workspace = await self.get_workspace(project_id)
|
|
|
|
if workspace is None:
|
|
raise WorkspaceNotFoundError(project_id)
|
|
|
|
# Verify holder
|
|
if not force and workspace.lock_holder and workspace.lock_holder != holder:
|
|
raise WorkspaceLockedError(project_id, workspace.lock_holder)
|
|
|
|
# Clear lock
|
|
await self._update_metadata(
|
|
project_id,
|
|
lock_holder=None,
|
|
lock_expires=None,
|
|
)
|
|
|
|
logger.info(f"Workspace {project_id} unlocked by {holder}")
|
|
return True
|
|
|
|
async def touch_workspace(self, project_id: str) -> None:
|
|
"""
|
|
Update last accessed time for a workspace.
|
|
|
|
Args:
|
|
project_id: Project identifier
|
|
"""
|
|
await self._update_metadata(
|
|
project_id,
|
|
last_accessed=datetime.now(UTC).isoformat(),
|
|
)
|
|
|
|
async def update_workspace_branch(
|
|
self,
|
|
project_id: str,
|
|
branch: str,
|
|
) -> None:
|
|
"""
|
|
Update the current branch in workspace metadata.
|
|
|
|
Args:
|
|
project_id: Project identifier
|
|
branch: Current branch name
|
|
"""
|
|
await self._update_metadata(
|
|
project_id,
|
|
current_branch=branch,
|
|
last_accessed=datetime.now(UTC).isoformat(),
|
|
)
|
|
|
|
async def check_size_limit(self, project_id: str) -> bool:
|
|
"""
|
|
Check if workspace exceeds size limit.
|
|
|
|
Args:
|
|
project_id: Project identifier
|
|
|
|
Returns:
|
|
True if within limits
|
|
|
|
Raises:
|
|
WorkspaceSizeExceededError: If size exceeds limit
|
|
"""
|
|
workspace_path = self._get_workspace_path(project_id)
|
|
|
|
if not workspace_path.exists():
|
|
return True
|
|
|
|
size_bytes = await self._calculate_size(workspace_path)
|
|
size_gb = size_bytes / (1024**3)
|
|
max_size_gb = self.settings.workspace_max_size_gb
|
|
|
|
if size_gb > max_size_gb:
|
|
raise WorkspaceSizeExceededError(project_id, size_gb, max_size_gb)
|
|
|
|
return True
|
|
|
|
async def list_workspaces(
|
|
self,
|
|
include_stale: bool = False,
|
|
) -> list[WorkspaceInfo]:
|
|
"""
|
|
List all workspaces.
|
|
|
|
Args:
|
|
include_stale: Include stale workspaces
|
|
|
|
Returns:
|
|
List of WorkspaceInfo
|
|
"""
|
|
workspaces: list[WorkspaceInfo] = []
|
|
|
|
if not self.base_path.exists():
|
|
return workspaces
|
|
|
|
for entry in self.base_path.iterdir():
|
|
if entry.is_dir() and not entry.name.startswith("."):
|
|
# Extract project_id from directory name
|
|
workspace = await self.get_workspace(entry.name)
|
|
if workspace:
|
|
if not include_stale and workspace.state == WorkspaceState.STALE:
|
|
continue
|
|
workspaces.append(workspace)
|
|
|
|
return workspaces
|
|
|
|
async def cleanup_stale_workspaces(self) -> int:
|
|
"""
|
|
Clean up stale workspaces.
|
|
|
|
Returns:
|
|
Number of workspaces cleaned up
|
|
"""
|
|
cleaned = 0
|
|
workspaces = await self.list_workspaces(include_stale=True)
|
|
|
|
for workspace in workspaces:
|
|
if workspace.state == WorkspaceState.STALE:
|
|
try:
|
|
await self.delete_workspace(workspace.project_id, force=True)
|
|
cleaned += 1
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Failed to cleanup stale workspace {workspace.project_id}: {e}"
|
|
)
|
|
|
|
if cleaned > 0:
|
|
logger.info(f"Cleaned up {cleaned} stale workspaces")
|
|
|
|
return cleaned
|
|
|
|
async def get_total_size(self) -> int:
|
|
"""
|
|
Get total size of all workspaces.
|
|
|
|
Returns:
|
|
Total size in bytes
|
|
"""
|
|
return await self._calculate_size(self.base_path)
|
|
|
|
# Private methods
|
|
|
|
async def _load_metadata(self, project_id: str) -> dict[str, Any] | None:
|
|
"""Load workspace metadata from file."""
|
|
metadata_path = self._get_metadata_path(project_id)
|
|
|
|
if not metadata_path.exists():
|
|
return None
|
|
|
|
try:
|
|
async with aiofiles.open(metadata_path) as f:
|
|
content = await f.read()
|
|
return json.loads(content)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to load metadata for {project_id}: {e}")
|
|
return None
|
|
|
|
async def _save_metadata(
|
|
self,
|
|
project_id: str,
|
|
metadata: dict[str, Any],
|
|
) -> None:
|
|
"""Save workspace metadata to file."""
|
|
metadata_path = self._get_metadata_path(project_id)
|
|
|
|
# Ensure parent directory exists
|
|
metadata_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
try:
|
|
async with aiofiles.open(metadata_path, "w") as f:
|
|
await f.write(json.dumps(metadata, indent=2))
|
|
except Exception as e:
|
|
logger.error(f"Failed to save metadata for {project_id}: {e}")
|
|
|
|
async def _update_metadata(
|
|
self,
|
|
project_id: str,
|
|
**updates: Any,
|
|
) -> None:
|
|
"""Update specific fields in workspace metadata."""
|
|
metadata = await self._load_metadata(project_id) or {}
|
|
|
|
# Handle None values (to clear fields)
|
|
for key, value in updates.items():
|
|
if value is None:
|
|
metadata.pop(key, None)
|
|
else:
|
|
metadata[key] = value
|
|
|
|
await self._save_metadata(project_id, metadata)
|
|
|
|
async def _calculate_size(self, path: Path) -> int:
|
|
"""Calculate total size of a directory."""
|
|
|
|
def _calc_size() -> int:
|
|
total = 0
|
|
try:
|
|
for entry in path.rglob("*"):
|
|
if entry.is_file():
|
|
try:
|
|
total += entry.stat().st_size
|
|
except OSError:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
return total
|
|
|
|
# Run in executor for async compatibility
|
|
loop = asyncio.get_event_loop()
|
|
return await loop.run_in_executor(None, _calc_size)
|
|
|
|
|
|
class WorkspaceLock:
|
|
"""
|
|
Context manager for workspace locking.
|
|
|
|
Provides automatic locking/unlocking with proper cleanup.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
manager: WorkspaceManager,
|
|
project_id: str,
|
|
holder: str,
|
|
timeout: int | None = None,
|
|
) -> None:
|
|
"""
|
|
Initialize workspace lock.
|
|
|
|
Args:
|
|
manager: WorkspaceManager instance
|
|
project_id: Project identifier
|
|
holder: Lock holder identifier
|
|
timeout: Lock timeout in seconds
|
|
"""
|
|
self.manager = manager
|
|
self.project_id = project_id
|
|
self.holder = holder
|
|
self.timeout = timeout
|
|
self._acquired = False
|
|
|
|
async def __aenter__(self) -> "WorkspaceLock":
|
|
"""Acquire lock on enter."""
|
|
await self.manager.lock_workspace(
|
|
self.project_id,
|
|
self.holder,
|
|
self.timeout,
|
|
)
|
|
self._acquired = True
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
"""Release lock on exit."""
|
|
if self._acquired:
|
|
try:
|
|
await self.manager.unlock_workspace(
|
|
self.project_id,
|
|
self.holder,
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to release lock for {self.project_id}: {e}")
|
|
|
|
|
|
class FileLockManager:
|
|
"""
|
|
File-based locking for single-instance deployments.
|
|
|
|
Uses filelock for local locking when Redis is not available.
|
|
"""
|
|
|
|
def __init__(self, lock_dir: Path) -> None:
|
|
"""
|
|
Initialize file lock manager.
|
|
|
|
Args:
|
|
lock_dir: Directory for lock files
|
|
"""
|
|
self.lock_dir = lock_dir
|
|
self.lock_dir.mkdir(parents=True, exist_ok=True)
|
|
self._locks: dict[str, FileLock] = {}
|
|
|
|
def _get_lock(self, key: str) -> FileLock:
|
|
"""Get or create a file lock for a key."""
|
|
if key not in self._locks:
|
|
lock_path = self.lock_dir / f"{key}.lock"
|
|
self._locks[key] = FileLock(lock_path)
|
|
return self._locks[key]
|
|
|
|
def acquire(
|
|
self,
|
|
key: str,
|
|
timeout: float = 10.0,
|
|
) -> bool:
|
|
"""
|
|
Acquire a lock.
|
|
|
|
Args:
|
|
key: Lock key
|
|
timeout: Timeout in seconds
|
|
|
|
Returns:
|
|
True if acquired
|
|
"""
|
|
lock = self._get_lock(key)
|
|
try:
|
|
lock.acquire(timeout=timeout)
|
|
return True
|
|
except Timeout:
|
|
return False
|
|
|
|
def release(self, key: str) -> bool:
|
|
"""
|
|
Release a lock.
|
|
|
|
Args:
|
|
key: Lock key
|
|
|
|
Returns:
|
|
True if released
|
|
"""
|
|
if key in self._locks:
|
|
try:
|
|
self._locks[key].release()
|
|
return True
|
|
except Exception:
|
|
pass
|
|
return False
|
|
|
|
def is_locked(self, key: str) -> bool:
|
|
"""Check if a key is locked."""
|
|
lock = self._get_lock(key)
|
|
return lock.is_locked
|