#!/usr/bin/env bash # Apply power profile and system tuning for LLM inference workloads # Requires root. Settings are volatile — use the systemd service for persistence. set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "$SCRIPT_DIR/../../lib/common.sh" source "$SCRIPT_DIR/../../lib/format.sh" require_root # ── Power limits via ryzenadj ───────────────────────────── STAPM=85000 FAST=85000 SLOW=85000 APU_SLOW=85000 if is_cmd ryzenadj; then log_header "Power Profile (ryzenadj)" log_info "Setting STAPM=${STAPM}mW, Fast=${FAST}mW, Slow=${SLOW}mW, APU=${APU_SLOW}mW" ryzenadj \ --stapm-limit=$STAPM \ --fast-limit=$FAST \ --slow-limit=$SLOW \ --apu-slow-limit=$APU_SLOW 2>&1 | grep -E 'Successfully|Error|not supported' || true # Verify what actually took effect log_info "Verifying limits..." ryzenadj -i 2>&1 | grep -E 'LIMIT|VALUE' | head -8 echo "" log_warn "Note: HP firmware may cap PPT SLOW/APU at 70W regardless of setting" else log_error "ryzenadj not found. Install: cd /tmp && git clone https://github.com/FlyGoat/RyzenAdj.git && cd RyzenAdj && mkdir build && cd build && cmake .. && make && sudo cp ryzenadj /usr/local/bin/" exit 1 fi # ── VM sysctl tuning ────────────────────────────────────── log_header "VM Sysctl Tuning" declare -A SYSCTLS=( [vm.swappiness]=1 [vm.dirty_ratio]=40 [vm.dirty_background_ratio]=10 [vm.max_map_count]=500000 [vm.zone_reclaim_mode]=0 ) for KEY in "${!SYSCTLS[@]}"; do VAL="${SYSCTLS[$KEY]}" CURRENT=$(sysctl -n "$KEY" 2>/dev/null || echo "?") if [[ "$CURRENT" == "$VAL" ]]; then log_success "$KEY = $VAL (already set)" else sysctl -w "$KEY=$VAL" > /dev/null 2>&1 log_success "$KEY = $VAL (was $CURRENT)" fi done # Persist sysctl settings SYSCTL_CONF="/etc/sysctl.d/99-llm-inference.conf" if [[ ! -f "$SYSCTL_CONF" ]]; then log_info "Persisting to $SYSCTL_CONF" cat > "$SYSCTL_CONF" << 'EOF' # LLM inference optimizations vm.swappiness = 1 vm.dirty_ratio = 40 vm.dirty_background_ratio = 10 vm.max_map_count = 500000 vm.zone_reclaim_mode = 0 EOF log_success "Sysctl config saved (persists across reboots)" else log_info "Sysctl config already exists at $SYSCTL_CONF" fi # ── Transparent Huge Pages ──────────────────────────────── log_header "Transparent Huge Pages" THP_ENABLED=$(cat /sys/kernel/mm/transparent_hugepage/enabled 2>/dev/null || echo "unknown") if [[ "$THP_ENABLED" == *"[always]"* ]]; then log_success "THP = always (already set)" else echo always > /sys/kernel/mm/transparent_hugepage/enabled 2>/dev/null || true echo defer+madvise > /sys/kernel/mm/transparent_hugepage/defrag 2>/dev/null || true log_success "THP = always, defrag = defer+madvise" fi log_info "For persistence, add to kernel cmdline: transparent_hugepage=always" # ── RADV nogttspill ─────────────────────────────────────── log_header "Vulkan RADV Environment" RADV_CONF="/etc/environment.d/radv-llm.conf" if [[ ! -f "$RADV_CONF" ]]; then mkdir -p /etc/environment.d echo 'RADV_PERFTEST=nogttspill' > "$RADV_CONF" log_success "RADV_PERFTEST=nogttspill persisted to $RADV_CONF" log_info "Takes effect on next login. For this session: export RADV_PERFTEST=nogttspill" else log_success "RADV config already exists at $RADV_CONF" fi # ── Summary ─────────────────────────────────────────────── log_header "Phase 2 Optimization Summary" log_success "Power profile: ryzenadj limits applied (volatile — resets on reboot)" log_success "VM tuning: sysctl applied and persisted" log_success "THP: enabled (volatile — add to kernel cmdline for persistence)" log_success "RADV: nogttspill persisted" echo "" log_info "To persist ryzenadj across reboots:" log_info " sudo cp $SCRIPT_DIR/../../configs/ryzenadj-llm.service /etc/systemd/system/" log_info " sudo systemctl enable --now ryzenadj-llm.service"