Initial commit

This commit is contained in:
Felipe Cardoso
2026-03-25 20:13:15 +01:00
commit c596e38e9e
26 changed files with 2345 additions and 0 deletions

View File

@@ -0,0 +1,127 @@
#!/usr/bin/env bash
# Background metric collector — samples GPU and system stats to CSV
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$SCRIPT_DIR/../../lib/common.sh"
source "$SCRIPT_DIR/../../lib/detect.sh"
INTERVAL=2
OUTPUT=""
DURATION=0 # 0 = indefinite
while [[ $# -gt 0 ]]; do
case "$1" in
--output|-o) OUTPUT="$2"; shift 2 ;;
--interval|-i) INTERVAL="$2"; shift 2 ;;
--duration|-d) DURATION="$2"; shift 2 ;;
--help|-h)
echo "Usage: log-metrics.sh [--output FILE] [--interval SECS] [--duration SECS]"
exit 0 ;;
*) log_warn "Unknown argument: $1"; shift ;;
esac
done
# Validate numeric args
[[ "$INTERVAL" =~ ^[0-9]+$ ]] || { log_error "--interval must be a positive integer"; exit 1; }
[[ "$DURATION" =~ ^[0-9]+$ ]] || { log_error "--duration must be a positive integer"; exit 1; }
if [[ -z "$OUTPUT" ]]; then
OUTPUT="$(data_dir logs)/metrics-$(timestamp).csv"
fi
mkdir -p "$(dirname "$OUTPUT")"
# Cache sysfs paths once (avoid re-globbing every iteration)
SYSFS_GPU_BUSY="$GPU_SYSFS/gpu_busy_percent"
SYSFS_VRAM_USED="$GPU_SYSFS/mem_info_vram_used"
SYSFS_GTT_USED="$GPU_SYSFS/mem_info_gtt_used"
SYSFS_TEMP=""
SYSFS_POWER=""
for f in "$GPU_SYSFS"/hwmon/hwmon*/temp1_input; do
[[ -f "$f" ]] && SYSFS_TEMP="$f" && break
done
for f in "$GPU_SYSFS"/hwmon/hwmon*/power1_average; do
[[ -f "$f" ]] && SYSFS_POWER="$f" && break
done
# Write CSV header
echo "timestamp,gpu_busy_pct,vram_used_mib,gtt_used_mib,gpu_temp_c,gpu_power_w,cpu_pct,ram_used_mib" > "$OUTPUT"
log_info "Logging metrics every ${INTERVAL}s → $OUTPUT"
[[ $DURATION -gt 0 ]] && log_info "Will stop after ${DURATION}s"
start_time=$SECONDS
stopped=false
cleanup() {
$stopped && return
stopped=true
local lines
lines=$(( $(wc -l < "$OUTPUT") - 1 ))
log_info "Metric logger stopped. $lines samples in $OUTPUT"
}
trap cleanup EXIT
# Read /proc/stat fields into variables using bash builtins
read_cpu_stat() {
local line
read -r line < /proc/stat
# "cpu user nice system idle iowait irq softirq steal"
set -- $line
shift # drop "cpu"
CPU_TOTAL=$(( $1 + $2 + $3 + $4 + $5 + $6 + $7 + ${8:-0} ))
CPU_IDLE=$4
}
while true; do
ts="$(printf '%(%Y-%m-%d %H:%M:%S)T' -1)"
# GPU metrics — direct reads, no subshells
read -r gpu_busy < "$SYSFS_GPU_BUSY" 2>/dev/null || gpu_busy=0
read -r vram_bytes < "$SYSFS_VRAM_USED" 2>/dev/null || vram_bytes=0
read -r gtt_bytes < "$SYSFS_GTT_USED" 2>/dev/null || gtt_bytes=0
read -r temp_mc < "$SYSFS_TEMP" 2>/dev/null || temp_mc=0
read -r power_uw < "$SYSFS_POWER" 2>/dev/null || power_uw=0
vram_mib=$(( vram_bytes / 1048576 ))
gtt_mib=$(( gtt_bytes / 1048576 ))
gpu_temp_c=$(( temp_mc / 1000 )).$(( (temp_mc % 1000) / 100 ))
gpu_power_w=$(( power_uw / 1000000 )).$(( (power_uw % 1000000) / 100000 ))
# CPU usage (snapshot delta)
read_cpu_stat
prev_total=$CPU_TOTAL
prev_idle=$CPU_IDLE
sleep 0.1
read_cpu_stat
delta_total=$(( CPU_TOTAL - prev_total ))
delta_idle=$(( CPU_IDLE - prev_idle ))
if (( delta_total > 0 )); then
cpu_pct=$(( (delta_total - delta_idle) * 1000 / delta_total ))
# Format N as N/10 . N%10, handling single-digit values (e.g., 5 → 0.5)
cpu_pct_fmt="$(( cpu_pct / 10 )).$(( cpu_pct % 10 ))"
else
cpu_pct_fmt="0.0"
fi
# RAM used (bash builtins only)
local_mem_total=0
local_mem_avail=0
while IFS=': ' read -r key val _; do
case "$key" in
MemTotal) local_mem_total=$val ;;
MemAvailable) local_mem_avail=$val; break ;;
esac
done < /proc/meminfo
ram_used_mib=$(( (local_mem_total - local_mem_avail) / 1024 ))
echo "$ts,$gpu_busy,$vram_mib,$gtt_mib,$gpu_temp_c,$gpu_power_w,$cpu_pct_fmt,$ram_used_mib" >> "$OUTPUT"
# Check duration
if (( DURATION > 0 && SECONDS - start_time >= DURATION )); then
break
fi
sleep "$INTERVAL"
done