#!/usr/bin/env bash # Background metric collector — samples GPU and system stats to CSV set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "$SCRIPT_DIR/../../lib/common.sh" source "$SCRIPT_DIR/../../lib/detect.sh" INTERVAL=2 OUTPUT="" DURATION=0 # 0 = indefinite while [[ $# -gt 0 ]]; do case "$1" in --output|-o) OUTPUT="$2"; shift 2 ;; --interval|-i) INTERVAL="$2"; shift 2 ;; --duration|-d) DURATION="$2"; shift 2 ;; --help|-h) echo "Usage: log-metrics.sh [--output FILE] [--interval SECS] [--duration SECS]" exit 0 ;; *) log_warn "Unknown argument: $1"; shift ;; esac done # Validate numeric args [[ "$INTERVAL" =~ ^[0-9]+$ ]] || { log_error "--interval must be a positive integer"; exit 1; } [[ "$DURATION" =~ ^[0-9]+$ ]] || { log_error "--duration must be a positive integer"; exit 1; } if [[ -z "$OUTPUT" ]]; then OUTPUT="$(data_dir logs)/metrics-$(timestamp).csv" fi mkdir -p "$(dirname "$OUTPUT")" # Cache sysfs paths once (avoid re-globbing every iteration) SYSFS_GPU_BUSY="$GPU_SYSFS/gpu_busy_percent" SYSFS_VRAM_USED="$GPU_SYSFS/mem_info_vram_used" SYSFS_GTT_USED="$GPU_SYSFS/mem_info_gtt_used" SYSFS_TEMP="" SYSFS_POWER="" for f in "$GPU_SYSFS"/hwmon/hwmon*/temp1_input; do [[ -f "$f" ]] && SYSFS_TEMP="$f" && break done for f in "$GPU_SYSFS"/hwmon/hwmon*/power1_average; do [[ -f "$f" ]] && SYSFS_POWER="$f" && break done # Write CSV header echo "timestamp,gpu_busy_pct,vram_used_mib,gtt_used_mib,gpu_temp_c,gpu_power_w,cpu_pct,ram_used_mib" > "$OUTPUT" log_info "Logging metrics every ${INTERVAL}s → $OUTPUT" [[ $DURATION -gt 0 ]] && log_info "Will stop after ${DURATION}s" start_time=$SECONDS stopped=false cleanup() { $stopped && return stopped=true local lines lines=$(( $(wc -l < "$OUTPUT") - 1 )) log_info "Metric logger stopped. $lines samples in $OUTPUT" } trap cleanup EXIT # Read /proc/stat fields into variables using bash builtins read_cpu_stat() { local line read -r line < /proc/stat # "cpu user nice system idle iowait irq softirq steal" set -- $line shift # drop "cpu" CPU_TOTAL=$(( $1 + $2 + $3 + $4 + $5 + $6 + $7 + ${8:-0} )) CPU_IDLE=$4 } while true; do ts="$(printf '%(%Y-%m-%d %H:%M:%S)T' -1)" # GPU metrics — direct reads, no subshells read -r gpu_busy < "$SYSFS_GPU_BUSY" 2>/dev/null || gpu_busy=0 read -r vram_bytes < "$SYSFS_VRAM_USED" 2>/dev/null || vram_bytes=0 read -r gtt_bytes < "$SYSFS_GTT_USED" 2>/dev/null || gtt_bytes=0 read -r temp_mc < "$SYSFS_TEMP" 2>/dev/null || temp_mc=0 read -r power_uw < "$SYSFS_POWER" 2>/dev/null || power_uw=0 vram_mib=$(( vram_bytes / 1048576 )) gtt_mib=$(( gtt_bytes / 1048576 )) gpu_temp_c=$(( temp_mc / 1000 )).$(( (temp_mc % 1000) / 100 )) gpu_power_w=$(( power_uw / 1000000 )).$(( (power_uw % 1000000) / 100000 )) # CPU usage (snapshot delta) read_cpu_stat prev_total=$CPU_TOTAL prev_idle=$CPU_IDLE sleep 0.1 read_cpu_stat delta_total=$(( CPU_TOTAL - prev_total )) delta_idle=$(( CPU_IDLE - prev_idle )) if (( delta_total > 0 )); then cpu_pct=$(( (delta_total - delta_idle) * 1000 / delta_total )) # Format N as N/10 . N%10, handling single-digit values (e.g., 5 → 0.5) cpu_pct_fmt="$(( cpu_pct / 10 )).$(( cpu_pct % 10 ))" else cpu_pct_fmt="0.0" fi # RAM used (bash builtins only) local_mem_total=0 local_mem_avail=0 while IFS=': ' read -r key val _; do case "$key" in MemTotal) local_mem_total=$val ;; MemAvailable) local_mem_avail=$val; break ;; esac done < /proc/meminfo ram_used_mib=$(( (local_mem_total - local_mem_avail) / 1024 )) echo "$ts,$gpu_busy,$vram_mib,$gtt_mib,$gpu_temp_c,$gpu_power_w,$cpu_pct_fmt,$ram_used_mib" >> "$OUTPUT" # Check duration if (( DURATION > 0 && SECONDS - start_time >= DURATION )); then break fi sleep "$INTERVAL" done