# app/services/memory/procedural/memory.py """ Procedural Memory Implementation. Provides storage and retrieval for learned procedures (skills) derived from successful task execution patterns. """ import logging import time from datetime import UTC, datetime from typing import Any from uuid import UUID from sqlalchemy import and_, desc, or_, select, update from sqlalchemy.ext.asyncio import AsyncSession from app.models.memory.procedure import Procedure as ProcedureModel from app.services.memory.config import get_memory_settings from app.services.memory.types import Procedure, ProcedureCreate, RetrievalResult, Step logger = logging.getLogger(__name__) def _model_to_procedure(model: ProcedureModel) -> Procedure: """Convert SQLAlchemy model to Procedure dataclass.""" return Procedure( id=model.id, # type: ignore[arg-type] project_id=model.project_id, # type: ignore[arg-type] agent_type_id=model.agent_type_id, # type: ignore[arg-type] name=model.name, # type: ignore[arg-type] trigger_pattern=model.trigger_pattern, # type: ignore[arg-type] steps=model.steps or [], # type: ignore[arg-type] success_count=model.success_count, # type: ignore[arg-type] failure_count=model.failure_count, # type: ignore[arg-type] last_used=model.last_used, # type: ignore[arg-type] embedding=None, # Don't expose raw embedding created_at=model.created_at, # type: ignore[arg-type] updated_at=model.updated_at, # type: ignore[arg-type] ) class ProceduralMemory: """ Procedural Memory Service. Provides procedure storage and retrieval: - Record procedures from successful task patterns - Find matching procedures by trigger pattern - Track success/failure rates - Get best procedure for a task type - Update procedure steps Performance target: <50ms P95 for matching """ def __init__( self, session: AsyncSession, embedding_generator: Any | None = None, ) -> None: """ Initialize procedural memory. Args: session: Database session embedding_generator: Optional embedding generator for semantic matching """ self._session = session self._embedding_generator = embedding_generator self._settings = get_memory_settings() @classmethod async def create( cls, session: AsyncSession, embedding_generator: Any | None = None, ) -> "ProceduralMemory": """ Factory method to create ProceduralMemory. Args: session: Database session embedding_generator: Optional embedding generator Returns: Configured ProceduralMemory instance """ return cls(session=session, embedding_generator=embedding_generator) # ========================================================================= # Procedure Recording # ========================================================================= async def record_procedure(self, procedure: ProcedureCreate) -> Procedure: """ Record a new procedure or update an existing one. If a procedure with the same name exists in the same scope, its steps will be updated and success count incremented. Args: procedure: Procedure data to record Returns: The created or updated procedure """ # Check for existing procedure with same name existing = await self._find_existing_procedure( project_id=procedure.project_id, agent_type_id=procedure.agent_type_id, name=procedure.name, ) if existing is not None: # Update existing procedure return await self._update_existing_procedure( existing=existing, new_steps=procedure.steps, new_trigger=procedure.trigger_pattern, ) # Create new procedure now = datetime.now(UTC) # Generate embedding if possible embedding = None if self._embedding_generator is not None: embedding_text = self._create_embedding_text(procedure) embedding = await self._embedding_generator.generate(embedding_text) model = ProcedureModel( project_id=procedure.project_id, agent_type_id=procedure.agent_type_id, name=procedure.name, trigger_pattern=procedure.trigger_pattern, steps=procedure.steps, success_count=1, # New procedures start with 1 success (they worked) failure_count=0, last_used=now, embedding=embedding, ) self._session.add(model) await self._session.flush() await self._session.refresh(model) logger.info( f"Recorded new procedure: {procedure.name} with {len(procedure.steps)} steps" ) return _model_to_procedure(model) async def _find_existing_procedure( self, project_id: UUID | None, agent_type_id: UUID | None, name: str, ) -> ProcedureModel | None: """Find an existing procedure with the same name in the same scope.""" query = select(ProcedureModel).where(ProcedureModel.name == name) if project_id is not None: query = query.where(ProcedureModel.project_id == project_id) else: query = query.where(ProcedureModel.project_id.is_(None)) if agent_type_id is not None: query = query.where(ProcedureModel.agent_type_id == agent_type_id) else: query = query.where(ProcedureModel.agent_type_id.is_(None)) result = await self._session.execute(query) return result.scalar_one_or_none() async def _update_existing_procedure( self, existing: ProcedureModel, new_steps: list[dict[str, Any]], new_trigger: str, ) -> Procedure: """Update an existing procedure with new steps.""" now = datetime.now(UTC) # Merge steps intelligently - keep existing order, add new steps merged_steps = self._merge_steps( existing.steps or [], # type: ignore[arg-type] new_steps, ) stmt = ( update(ProcedureModel) .where(ProcedureModel.id == existing.id) .values( steps=merged_steps, trigger_pattern=new_trigger, success_count=ProcedureModel.success_count + 1, last_used=now, updated_at=now, ) .returning(ProcedureModel) ) result = await self._session.execute(stmt) updated_model = result.scalar_one() await self._session.flush() logger.info(f"Updated existing procedure: {existing.name}") return _model_to_procedure(updated_model) def _merge_steps( self, existing_steps: list[dict[str, Any]], new_steps: list[dict[str, Any]], ) -> list[dict[str, Any]]: """Merge steps from a new execution with existing steps.""" if not existing_steps: return new_steps if not new_steps: return existing_steps # For now, use the new steps if they differ significantly # In production, this could use more sophisticated merging if len(new_steps) != len(existing_steps): # If structure changed, prefer newer steps return new_steps # Merge step-by-step, preferring new data where available merged = [] for i, new_step in enumerate(new_steps): if i < len(existing_steps): # Merge with existing step step = {**existing_steps[i], **new_step} else: step = new_step merged.append(step) return merged def _create_embedding_text(self, procedure: ProcedureCreate) -> str: """Create text for embedding from procedure data.""" steps_text = " ".join(step.get("action", "") for step in procedure.steps) return f"{procedure.name} {procedure.trigger_pattern} {steps_text}" # ========================================================================= # Procedure Retrieval # ========================================================================= async def find_matching( self, context: str, project_id: UUID | None = None, agent_type_id: UUID | None = None, limit: int = 5, ) -> list[Procedure]: """ Find procedures matching the given context. Args: context: Context/trigger to match against project_id: Optional project to search within agent_type_id: Optional agent type filter limit: Maximum results Returns: List of matching procedures """ result = await self._find_matching_with_metadata( context=context, project_id=project_id, agent_type_id=agent_type_id, limit=limit, ) return result.items async def _find_matching_with_metadata( self, context: str, project_id: UUID | None = None, agent_type_id: UUID | None = None, limit: int = 5, ) -> RetrievalResult[Procedure]: """Find matching procedures with full result metadata.""" start_time = time.perf_counter() # Build base query - prioritize by success rate stmt = ( select(ProcedureModel) .order_by( desc( ProcedureModel.success_count / (ProcedureModel.success_count + ProcedureModel.failure_count + 1) ), desc(ProcedureModel.last_used), ) .limit(limit) ) # Apply scope filters if project_id is not None: stmt = stmt.where( or_( ProcedureModel.project_id == project_id, ProcedureModel.project_id.is_(None), ) ) if agent_type_id is not None: stmt = stmt.where( or_( ProcedureModel.agent_type_id == agent_type_id, ProcedureModel.agent_type_id.is_(None), ) ) # Text-based matching on trigger pattern and name # TODO: Implement proper vector similarity search when pgvector is integrated search_terms = context.lower().split()[:5] # Limit to 5 terms if search_terms: conditions = [] for term in search_terms: term_pattern = f"%{term}%" conditions.append( or_( ProcedureModel.trigger_pattern.ilike(term_pattern), ProcedureModel.name.ilike(term_pattern), ) ) if conditions: stmt = stmt.where(or_(*conditions)) result = await self._session.execute(stmt) models = list(result.scalars().all()) latency_ms = (time.perf_counter() - start_time) * 1000 return RetrievalResult( items=[_model_to_procedure(m) for m in models], total_count=len(models), query=context, retrieval_type="procedural", latency_ms=latency_ms, metadata={"project_id": str(project_id) if project_id else None}, ) async def get_best_procedure( self, task_type: str, project_id: UUID | None = None, agent_type_id: UUID | None = None, min_success_rate: float = 0.5, min_uses: int = 1, ) -> Procedure | None: """ Get the best procedure for a given task type. Returns the procedure with the highest success rate that meets the minimum thresholds. Args: task_type: Task type to find procedure for project_id: Optional project scope agent_type_id: Optional agent type scope min_success_rate: Minimum required success rate min_uses: Minimum number of uses required Returns: Best matching procedure or None """ # Build query for procedures matching task type stmt = ( select(ProcedureModel) .where( and_( (ProcedureModel.success_count + ProcedureModel.failure_count) >= min_uses, or_( ProcedureModel.trigger_pattern.ilike(f"%{task_type}%"), ProcedureModel.name.ilike(f"%{task_type}%"), ), ) ) .order_by( desc( ProcedureModel.success_count / (ProcedureModel.success_count + ProcedureModel.failure_count + 1) ), desc(ProcedureModel.last_used), ) .limit(10) ) # Apply scope filters if project_id is not None: stmt = stmt.where( or_( ProcedureModel.project_id == project_id, ProcedureModel.project_id.is_(None), ) ) if agent_type_id is not None: stmt = stmt.where( or_( ProcedureModel.agent_type_id == agent_type_id, ProcedureModel.agent_type_id.is_(None), ) ) result = await self._session.execute(stmt) models = list(result.scalars().all()) # Filter by success rate in Python (SQLAlchemy division in WHERE is complex) for model in models: success = float(model.success_count) failure = float(model.failure_count) total = success + failure if total > 0 and (success / total) >= min_success_rate: logger.debug( f"Found best procedure for '{task_type}': {model.name} " f"(success_rate={success / total:.2%})" ) return _model_to_procedure(model) return None async def get_by_id(self, procedure_id: UUID) -> Procedure | None: """Get a procedure by ID.""" query = select(ProcedureModel).where(ProcedureModel.id == procedure_id) result = await self._session.execute(query) model = result.scalar_one_or_none() return _model_to_procedure(model) if model else None # ========================================================================= # Outcome Recording # ========================================================================= async def record_outcome( self, procedure_id: UUID, success: bool, ) -> Procedure: """ Record the outcome of using a procedure. Updates the success or failure count and last_used timestamp. Args: procedure_id: Procedure that was used success: Whether the procedure succeeded Returns: Updated procedure Raises: ValueError: If procedure not found """ query = select(ProcedureModel).where(ProcedureModel.id == procedure_id) result = await self._session.execute(query) model = result.scalar_one_or_none() if model is None: raise ValueError(f"Procedure not found: {procedure_id}") now = datetime.now(UTC) if success: stmt = ( update(ProcedureModel) .where(ProcedureModel.id == procedure_id) .values( success_count=ProcedureModel.success_count + 1, last_used=now, updated_at=now, ) .returning(ProcedureModel) ) else: stmt = ( update(ProcedureModel) .where(ProcedureModel.id == procedure_id) .values( failure_count=ProcedureModel.failure_count + 1, last_used=now, updated_at=now, ) .returning(ProcedureModel) ) result = await self._session.execute(stmt) updated_model = result.scalar_one() await self._session.flush() outcome = "success" if success else "failure" logger.info( f"Recorded {outcome} for procedure {procedure_id}: " f"success_rate={updated_model.success_rate:.2%}" ) return _model_to_procedure(updated_model) # ========================================================================= # Step Management # ========================================================================= async def update_steps( self, procedure_id: UUID, steps: list[Step], ) -> Procedure: """ Update the steps of a procedure. Args: procedure_id: Procedure to update steps: New steps Returns: Updated procedure Raises: ValueError: If procedure not found """ query = select(ProcedureModel).where(ProcedureModel.id == procedure_id) result = await self._session.execute(query) model = result.scalar_one_or_none() if model is None: raise ValueError(f"Procedure not found: {procedure_id}") # Convert Step objects to dictionaries steps_dict = [ { "order": step.order, "action": step.action, "parameters": step.parameters, "expected_outcome": step.expected_outcome, "fallback_action": step.fallback_action, } for step in steps ] now = datetime.now(UTC) stmt = ( update(ProcedureModel) .where(ProcedureModel.id == procedure_id) .values( steps=steps_dict, updated_at=now, ) .returning(ProcedureModel) ) result = await self._session.execute(stmt) updated_model = result.scalar_one() await self._session.flush() logger.info(f"Updated steps for procedure {procedure_id}: {len(steps)} steps") return _model_to_procedure(updated_model) # ========================================================================= # Statistics & Management # ========================================================================= async def get_stats( self, project_id: UUID | None = None, agent_type_id: UUID | None = None, ) -> dict[str, Any]: """ Get statistics about procedural memory. Args: project_id: Optional project to get stats for agent_type_id: Optional agent type filter Returns: Dictionary with statistics """ query = select(ProcedureModel) if project_id is not None: query = query.where( or_( ProcedureModel.project_id == project_id, ProcedureModel.project_id.is_(None), ) ) if agent_type_id is not None: query = query.where( or_( ProcedureModel.agent_type_id == agent_type_id, ProcedureModel.agent_type_id.is_(None), ) ) result = await self._session.execute(query) models = list(result.scalars().all()) if not models: return { "total_procedures": 0, "avg_success_rate": 0.0, "avg_steps_count": 0.0, "total_uses": 0, "high_success_count": 0, "low_success_count": 0, } success_rates = [m.success_rate for m in models] step_counts = [len(m.steps or []) for m in models] total_uses = sum(m.total_uses for m in models) return { "total_procedures": len(models), "avg_success_rate": sum(success_rates) / len(success_rates), "avg_steps_count": sum(step_counts) / len(step_counts), "total_uses": total_uses, "high_success_count": sum(1 for r in success_rates if r >= 0.8), "low_success_count": sum(1 for r in success_rates if r < 0.5), } async def count( self, project_id: UUID | None = None, agent_type_id: UUID | None = None, ) -> int: """ Count procedures in scope. Args: project_id: Optional project to count for agent_type_id: Optional agent type filter Returns: Number of procedures """ query = select(ProcedureModel) if project_id is not None: query = query.where( or_( ProcedureModel.project_id == project_id, ProcedureModel.project_id.is_(None), ) ) if agent_type_id is not None: query = query.where( or_( ProcedureModel.agent_type_id == agent_type_id, ProcedureModel.agent_type_id.is_(None), ) ) result = await self._session.execute(query) return len(list(result.scalars().all())) async def delete(self, procedure_id: UUID) -> bool: """ Delete a procedure. Args: procedure_id: Procedure to delete Returns: True if deleted, False if not found """ query = select(ProcedureModel).where(ProcedureModel.id == procedure_id) result = await self._session.execute(query) model = result.scalar_one_or_none() if model is None: return False await self._session.delete(model) await self._session.flush() logger.info(f"Deleted procedure {procedure_id}") return True async def get_procedures_by_success_rate( self, min_rate: float = 0.0, max_rate: float = 1.0, project_id: UUID | None = None, limit: int = 20, ) -> list[Procedure]: """ Get procedures within a success rate range. Args: min_rate: Minimum success rate max_rate: Maximum success rate project_id: Optional project scope limit: Maximum results Returns: List of procedures """ query = ( select(ProcedureModel) .order_by(desc(ProcedureModel.last_used)) .limit(limit * 2) # Fetch more since we filter in Python ) if project_id is not None: query = query.where( or_( ProcedureModel.project_id == project_id, ProcedureModel.project_id.is_(None), ) ) result = await self._session.execute(query) models = list(result.scalars().all()) # Filter by success rate in Python filtered = [m for m in models if min_rate <= m.success_rate <= max_rate][:limit] return [_model_to_procedure(m) for m in filtered]