import asyncio import os from concurrent.futures import ThreadPoolExecutor from datetime import datetime from typing import List, Dict, Optional import paramiko from fastapi import HTTPException from app.core.config import settings from app.models.sample import Sample class SampleManager: def __init__(self): self.sftp_client = None self.cache_dir = "cache/samples" self.last_sync = None self.file_index: Dict[str, datetime] = {} self.memory_cache: Dict[str, memoryview] = {} self.executor = ThreadPoolExecutor(max_workers=4) self._ensure_cache_dir() def _ensure_cache_dir(self): """Ensure cache directory exists""" os.makedirs(self.cache_dir, exist_ok=True) async def _connect_sftp(self): """Create SFTP connection using SSH key""" try: # Expand the key path (handles ~/) key_path = os.path.expanduser(settings.SFTP_KEY_PATH) # Create a new SSH client ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Connect using the SSH key ssh.connect( hostname=settings.SFTP_HOST, username=settings.SFTP_USER, port=settings.SFTP_PORT, key_filename=key_path, ) # Create SFTP client from the SSH client self.sftp_client = ssh.open_sftp() except Exception as e: raise HTTPException(status_code=500, detail=f"SFTP Connection failed: {str(e)}") def _disconnect_sftp(self): """Close SFTP connection""" if self.sftp_client: self.sftp_client.close() self.sftp_client = None def _download_to_memory(self, remote_path: str) -> memoryview: """Download file directly to memory""" try: with self.sftp_client.file(remote_path, 'rb') as remote_file: # Read the entire file into memory data = remote_file.read() return memoryview(data) except Exception as e: raise HTTPException(status_code=500, detail=f"Download failed: {str(e)}") async def _sync_files(self): """Sync remote files to memory cache""" if not self.sftp_client: await self._connect_sftp() try: # Get remote files list - using listdir_attr directly on sftp_client remote_files = self.sftp_client.listdir_attr(settings.SFTP_PATH) # Update file index and download new files for attr in remote_files: remote_path = f"{settings.SFTP_PATH}/{attr.filename}" # Check if file is new or updated if (attr.filename not in self.file_index or datetime.fromtimestamp(attr.st_mtime) > self.file_index[attr.filename]): # Download file to memory loop = asyncio.get_event_loop() self.memory_cache[attr.filename] = await loop.run_in_executor( self.executor, self._download_to_memory, remote_path ) self.file_index[attr.filename] = datetime.fromtimestamp(attr.st_mtime) self.last_sync = datetime.now() except Exception as e: raise HTTPException(status_code=500, detail=f"Sync failed: {str(e)}") finally: self._disconnect_sftp() async def ensure_synced(self, max_age_seconds: int = 30): """Ensure memory cache is synced if too old""" if (not self.last_sync or (datetime.now() - self.last_sync).total_seconds() > max_age_seconds): await self._sync_files() async def list_samples(self, limit: int = 20, offset: int = 0) -> List[Sample]: """List sample images with pagination""" await self.ensure_synced() # Get sorted list of files files = sorted( [(f, self.file_index[f]) for f in self.file_index], key=lambda x: x[1], reverse=True ) # Apply pagination files = files[offset:offset + limit] # Create Sample objects return [ Sample( filename=filename, url=f"/api/v1/samples/image/{filename}", created_at=created_at ) for filename, created_at in files ] async def get_latest_samples(self, count: int = 5) -> List[Sample]: """Get most recent samples""" return await self.list_samples(limit=count, offset=0) async def get_sample_data(self, filename: str) -> Optional[memoryview]: """Get image data from memory cache""" await self.ensure_synced() if filename not in self.memory_cache: raise HTTPException(status_code=404, detail="Sample not found") return self.memory_cache[filename] def cleanup_old_files(self, max_files: int = 1000): """Cleanup old files from memory cache""" if len(self.memory_cache) > max_files: # Sort files by date and keep only the newest files = sorted( [(f, self.file_index[f]) for f in self.file_index], key=lambda x: x[1], reverse=True ) # Keep only max_files files_to_keep = {f[0] for f in files[:max_files]} # Remove old files from cache for filename in list(self.memory_cache.keys()): if filename not in files_to_keep: del self.memory_cache[filename] del self.file_index[filename]