313 lines
7.6 KiB
Bash
Executable File
313 lines
7.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# Runs llama-bench in RPC mode against remote toolbox environments.
|
|
# Customize REMOTE_* variables or export them before invoking the script.
|
|
|
|
SCRIPT_DIR="$(cd -- "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)"
|
|
RESULTDIR="${RESULTDIR:-$SCRIPT_DIR/results-rpc}"
|
|
mkdir -p "$RESULTDIR"
|
|
|
|
REMOTE_TARGET="${REMOTE_HOST:-10.0.0.1}"
|
|
REMOTE_PORT="${REMOTE_PORT:-22}"
|
|
REMOTE_HOSTNAME="${REMOTE_TARGET#*@}"
|
|
RPC_HOST="${RPC_HOST:-$REMOTE_HOSTNAME}" # address the local host uses to reach the RPC server
|
|
RPC_PORT="${RPC_PORT:-50052}"
|
|
|
|
# Explicit list of models to test - edit as needed.
|
|
MODELS=(
|
|
"/mnt/storage/MiniMax-M2-GGUF/UD-Q6_K_XL/MiniMax-M2-UD-Q6_K_XL-00001-of-00004.gguf"
|
|
)
|
|
|
|
if (( ${#MODELS[@]} == 0 )); then
|
|
echo "[ERROR] MODELS list is empty - edit run_rpc_benchmarks.sh" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Toolbox containers to exercise over RPC.
|
|
declare -A TOOLBOX_IMAGES=(
|
|
[rocm6_4_4]="llama-rocm-6.4.4"
|
|
|
|
[rocm-7_2]="llama-rocm-7.2"
|
|
[rocm7-nightlies]="llama-rocm7-nightlies"
|
|
[vulkan_amdvlk]="llama-vulkan-amdvlk"
|
|
[vulkan_radv]="llama-vulkan-radv"
|
|
)
|
|
|
|
declare -A CLIENT_CMDS=(
|
|
[rocm6_4_4]="toolbox run -c llama-rocm-6.4.4 -- /usr/local/bin/llama-bench"
|
|
[rocm-7_2]="toolbox run -c llama-rocm-7.2 -- /usr/local/bin/llama-bench"
|
|
[rocm7-nightlies]="toolbox run -c llama-rocm7-nightlies -- /usr/local/bin/llama-bench"
|
|
[vulkan_amdvlk]="toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench"
|
|
[vulkan_radv]="toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench"
|
|
)
|
|
|
|
ENVIRONMENTS=(
|
|
rocm6_4_4
|
|
rocm-7_2
|
|
rocm7-nightlies
|
|
vulkan_amdvlk
|
|
vulkan_radv
|
|
)
|
|
|
|
CURRENT_REMOTE_PID=""
|
|
CURRENT_REMOTE_ENV=""
|
|
RESOLVED_MODELS=()
|
|
|
|
cleanup_remote() {
|
|
if [[ -n "${CURRENT_REMOTE_PID:-}" && -n "${CURRENT_REMOTE_ENV:-}" ]]; then
|
|
stop_remote_rpc "${CURRENT_REMOTE_ENV}" "${CURRENT_REMOTE_PID}" || true
|
|
fi
|
|
}
|
|
trap cleanup_remote EXIT
|
|
|
|
resolve_model_path() {
|
|
local raw="$1"
|
|
local expanded="$raw"
|
|
|
|
if [[ "$expanded" == ~* ]]; then
|
|
expanded="${expanded/#\~/$HOME}"
|
|
fi
|
|
|
|
local -a candidates=("$expanded")
|
|
if [[ "$expanded" != /* ]]; then
|
|
candidates+=("$SCRIPT_DIR/$expanded")
|
|
fi
|
|
|
|
for candidate in "${candidates[@]}"; do
|
|
if [[ -f "$candidate" ]]; then
|
|
printf '%s\n' "$candidate"
|
|
return 0
|
|
fi
|
|
done
|
|
|
|
return 1
|
|
}
|
|
|
|
|
|
ensure_models_exist() {
|
|
RESOLVED_MODELS=()
|
|
for m in "${MODELS[@]}"; do
|
|
local resolved
|
|
if resolved="$(resolve_model_path "$m")"; then
|
|
RESOLVED_MODELS+=("$resolved")
|
|
else
|
|
echo "[WARN] Missing model file: $m" >&2
|
|
fi
|
|
done
|
|
|
|
if (( ${#RESOLVED_MODELS[@]} == 0 )); then
|
|
echo "[ERROR] None of the listed models exist - adjust MODELS array." >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "Models to bench:"
|
|
for resolved in "${RESOLVED_MODELS[@]}"; do
|
|
echo " - $resolved"
|
|
done
|
|
}
|
|
|
|
has_pending_runs() {
|
|
local env="$1"
|
|
local suffix="$2"
|
|
|
|
for model_path in "${RESOLVED_MODELS[@]}"; do
|
|
local model_name
|
|
model_name="$(basename "${model_path}" .gguf)"
|
|
for ctx in default longctx32768 longctx65536; do
|
|
local ctx_suffix=""
|
|
if [[ "$ctx" == longctx32768 ]]; then
|
|
ctx_suffix="__longctx32768"
|
|
elif [[ "$ctx" == longctx65536 ]]; then
|
|
ctx_suffix="__longctx65536"
|
|
fi
|
|
|
|
local log_file="$RESULTDIR/${model_name}__${env}${suffix}${ctx_suffix}__rpc.log"
|
|
if [[ ! -s "$log_file" ]]; then
|
|
return 0 # still work to do
|
|
fi
|
|
done
|
|
done
|
|
|
|
return 1 # all logs already exist
|
|
}
|
|
|
|
start_remote_rpc() {
|
|
local env="$1"
|
|
local image="$2"
|
|
local suffix="$3"
|
|
local remote_log="/tmp/rpc-server-${env}${suffix}.log"
|
|
|
|
ssh -p "$REMOTE_PORT" "$REMOTE_TARGET" 'bash -s' <<EOF
|
|
set -euo pipefail
|
|
pkill -9 -f rpc-server || true
|
|
nohup toolbox run -c ${image} -- rpc-server -H 0.0.0.0 -p ${RPC_PORT} -c >${remote_log} 2>&1 < /dev/null &
|
|
echo \$!
|
|
EOF
|
|
}
|
|
|
|
stop_remote_rpc() {
|
|
local env="$1"
|
|
local pid="$2"
|
|
ssh -p "$REMOTE_PORT" "$REMOTE_TARGET" 'bash -s' <<EOF
|
|
set -euo pipefail
|
|
if [[ -n "${pid}" && -e "/proc/${pid}" ]]; then
|
|
kill -9 ${pid} || true
|
|
fi
|
|
pkill -9 -f rpc-server || true
|
|
EOF
|
|
}
|
|
|
|
wait_for_rpc() {
|
|
local host="$1"
|
|
local port="$2"
|
|
local retries="${3:-30}"
|
|
local delay="${4:-1}"
|
|
|
|
for ((i = 1; i <= retries; i++)); do
|
|
if exec 3<>"/dev/tcp/${host}/${port}" 2>/dev/null; then
|
|
exec 3>&-
|
|
exec 3<&-
|
|
return 0
|
|
fi
|
|
sleep "$delay"
|
|
done
|
|
return 1
|
|
}
|
|
|
|
kill_local_llamabench() {
|
|
if pkill -9 -f llama-bench 2>/dev/null; then
|
|
sleep 1
|
|
fi
|
|
}
|
|
|
|
run_llama_bench_rpc() {
|
|
local model_path="$1"
|
|
local env="$2"
|
|
local suffix="$3"
|
|
local model_name
|
|
model_name="$(basename "${model_path}" .gguf)"
|
|
local client_cmd="${CLIENT_CMDS[$env]:-}"
|
|
|
|
if [[ ! -f "$model_path" ]]; then
|
|
echo "[SKIP] ${model_path} does not exist."
|
|
return
|
|
fi
|
|
|
|
if [[ -z "$client_cmd" ]]; then
|
|
echo "[WARN] No client llama-bench command defined for ${env} - skipping."
|
|
return
|
|
fi
|
|
|
|
local -a client_cmd_ary
|
|
# shellcheck disable=SC2206 # intentional word splitting
|
|
client_cmd_ary=( $client_cmd )
|
|
|
|
for ctx in default longctx32768 longctx65536; do
|
|
local ctx_suffix=""
|
|
local ctx_reps=3
|
|
local -a ctx_args=()
|
|
if [[ "$ctx" == longctx32768 ]]; then
|
|
ctx_suffix="__longctx32768"
|
|
ctx_reps=1
|
|
ctx_args=( -p 2048 -n 32 -d 32768 )
|
|
if [[ "$env" == *vulkan* ]]; then
|
|
ctx_args+=( -ub 512 )
|
|
else
|
|
ctx_args+=( -ub 2048 )
|
|
fi
|
|
elif [[ "$ctx" == longctx65536 ]]; then
|
|
ctx_suffix="__longctx65536"
|
|
ctx_reps=1
|
|
ctx_args=( -p 2048 -n 32 -d 65536 )
|
|
if [[ "$env" == *vulkan* ]]; then
|
|
ctx_args+=( -ub 512 )
|
|
else
|
|
ctx_args+=( -ub 2048 )
|
|
fi
|
|
fi
|
|
|
|
local log_file="$RESULTDIR/${model_name}__${env}${suffix}${ctx_suffix}__rpc.log"
|
|
if [[ -s "$log_file" ]]; then
|
|
echo "[SKIP] ${log_file} already exists."
|
|
continue
|
|
fi
|
|
|
|
kill_local_llamabench
|
|
|
|
echo
|
|
echo "> [${env}${suffix}] ${model_name} (${ctx})"
|
|
echo " -> log: ${log_file}"
|
|
|
|
local -a cmd=(
|
|
"${client_cmd_ary[@]}"
|
|
-mmp 0
|
|
-m "$model_path"
|
|
-fa 1
|
|
"${ctx_args[@]}"
|
|
-r "$ctx_reps"
|
|
--rpc "${RPC_HOST}:${RPC_PORT}"
|
|
)
|
|
|
|
printf " -> cmd: %s\n" "${cmd[*]}"
|
|
if "${cmd[@]}" >"$log_file" 2>&1; then
|
|
echo " [OK] Completed"
|
|
else
|
|
echo "[ERROR] llama-bench failed for ${env} / ${model_name} (see ${log_file})"
|
|
fi
|
|
done
|
|
}
|
|
|
|
run_all() {
|
|
ensure_models_exist
|
|
|
|
for env in "${ENVIRONMENTS[@]}"; do
|
|
local image="${TOOLBOX_IMAGES[$env]:-}"
|
|
if [[ -z "${image}" ]]; then
|
|
echo "[WARN] No toolbox mapping defined for ${env} - skipping."
|
|
continue
|
|
fi
|
|
|
|
local suffix=""
|
|
|
|
echo
|
|
echo "==== ${env}${suffix} -> ${image} ===="
|
|
|
|
if ! has_pending_runs "$env" "$suffix"; then
|
|
echo "[SKIP] ${env}${suffix} already has logs for all models - moving on."
|
|
continue
|
|
fi
|
|
|
|
CURRENT_REMOTE_ENV="${env}${suffix}"
|
|
local remote_pid
|
|
remote_pid="$(start_remote_rpc "$env" "$image" "$suffix" | tr -d '\r')"
|
|
|
|
if [[ -z "$remote_pid" ]]; then
|
|
echo "[ERROR] Failed to start RPC server for ${env}${suffix}"
|
|
CURRENT_REMOTE_ENV=""
|
|
continue
|
|
fi
|
|
|
|
CURRENT_REMOTE_PID="$remote_pid"
|
|
echo " Remote rpc-server PID: ${remote_pid}"
|
|
|
|
if ! wait_for_rpc "$RPC_HOST" "$RPC_PORT"; then
|
|
echo "[ERROR] RPC server on ${RPC_HOST}:${RPC_PORT} did not become ready."
|
|
stop_remote_rpc "$env" "$remote_pid" || true
|
|
CURRENT_REMOTE_PID=""
|
|
CURRENT_REMOTE_ENV=""
|
|
continue
|
|
fi
|
|
|
|
for model in "${RESOLVED_MODELS[@]}"; do
|
|
run_llama_bench_rpc "$model" "$env" "$suffix"
|
|
done
|
|
|
|
stop_remote_rpc "$env" "$remote_pid" || true
|
|
CURRENT_REMOTE_PID=""
|
|
CURRENT_REMOTE_ENV=""
|
|
done
|
|
}
|
|
|
|
run_all
|