Adding new benchmarks
This commit is contained in:
@@ -1,108 +1,118 @@
|
||||
#!/usr/bin/env python3
|
||||
import re, glob, os, argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
PP_RE = re.compile(r"\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|\s*pp512\s*\|\s*([\d.]+)\s*±\s*([\d.]+)")
|
||||
TG_RE = re.compile(r"\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|\s*tg128\s*\|\s*([\d.]+)\s*±\s*([\d.]+)")
|
||||
LOAD_ERR = re.compile(r"failed to load model|Device memory allocation.*failed", re.IGNORECASE)
|
||||
HANG_ERR = re.compile(r"GPU Hang|HW Exception", re.IGNORECASE)
|
||||
GEN_ERR = re.compile(r"error:|exit \d+", re.IGNORECASE)
|
||||
# --- Config ---
|
||||
RESULTS_JSON = Path("../docs/results.json")
|
||||
|
||||
ENV_ORDER = [
|
||||
"vulkan_amdvlk",
|
||||
"vulkan_radv",
|
||||
"rocm6_4_2",
|
||||
"rocm6_4_2-rocwmma",
|
||||
"rocm7_beta",
|
||||
"rocm7_rc"
|
||||
]
|
||||
|
||||
ENV_ORDER = ["vulkan_amdvlk","vulkan_radv","rocm6_4_2","rocm7_beta","rocm7_rc"]
|
||||
COL_NAMES = {
|
||||
"vulkan_amdvlk":"Vulkan (AMDVLK)",
|
||||
"vulkan_radv":"Vulkan (RADV)",
|
||||
"rocm6_4_2":"ROCm 6.4.2",
|
||||
"rocm7_beta":"ROCm 7.0 Beta",
|
||||
"rocm7_rc":"ROCm 7.0 RC",
|
||||
"vulkan_amdvlk": "Vulkan (AMDVLK)",
|
||||
"vulkan_radv": "Vulkan (RADV)",
|
||||
"rocm6_4_2": "ROCm 6.4.2",
|
||||
"rocm6_4_2-rocwmma": "ROCm 6.4.2 + ROCWMMA",
|
||||
"rocm7_beta": "ROCm 7.0 Beta",
|
||||
"rocm7_rc": "ROCm 7.0 RC"
|
||||
}
|
||||
WINNER = {
|
||||
"vulkan_amdvlk":"AMDVLK",
|
||||
"vulkan_radv":"RADV",
|
||||
"rocm6_4_2":"ROCm6.4.2",
|
||||
"rocm7_beta":"ROCm7 Beta",
|
||||
"rocm7_rc":"ROCm7 RC",
|
||||
|
||||
WINNER_LABELS = {
|
||||
"vulkan_amdvlk": "AMDVLK",
|
||||
"vulkan_radv": "RADV",
|
||||
"rocm6_4_2": "ROCm6.4.2",
|
||||
"rocm6_4_2-rocwmma": "ROCm6.4.2+ROCWMMA",
|
||||
"rocm7_beta": "ROCm7 Beta",
|
||||
"rocm7_rc": "ROCm7 RC"
|
||||
}
|
||||
|
||||
DEFAULT_MODELS = [
|
||||
("Gemma3 12B Q8_0", "gemma-3-12b-it-UD-Q8_K_XL"),
|
||||
("Gemma3 27B BF16", "gemma-3-27b-it-BF16"),
|
||||
("Llama-4-Scout 17B Q8_0", "Llama-4-Scout-17B-16E-Instruct-Q8_0"),
|
||||
("Llama-4-Scout 17B Q4_K XL", "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL"),
|
||||
("Qwen3 30B BF16", "Qwen3-30B-A3B-BF16"),
|
||||
("Qwen3-235B Q3_K XL", "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL"),
|
||||
("GLM-4.5-Air-UD-Q4_K_XL", "GLM-4.5-Air-UD-Q4_K_XL"),
|
||||
("GLM-4.5-Air-UD-Q6_K_XL", "GLM-4.5-Air-UD-Q6_K_XL"),
|
||||
("gpt-oss-120b-mxfp4", "gpt-oss-120b-mxfp4"),
|
||||
("gpt-oss-20b-mxfp4", "gpt-oss-20b-mxfp4"),
|
||||
("Gemma3 12B Q8_0", "gemma-3-12b-it-UD-Q8_K_XL"),
|
||||
("Gemma3 27B BF16", "gemma-3-27b-it-BF16"),
|
||||
("Llama-4-Scout 17B Q8_0", "Llama-4-Scout-17B-16E-Instruct-Q8_0"),
|
||||
("Llama-4-Scout 17B Q4_K XL", "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL"),
|
||||
("Qwen3 30B BF16", "Qwen3-30B-A3B-BF16"),
|
||||
("Qwen3-235B Q3_K XL", "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL"),
|
||||
("GLM-4.5-Air-Q4_K_XL", "GLM-4.5-Air-UD-Q4_K_XL"),
|
||||
("GLM-4.5-Air-Q6_K_XL", "GLM-4.5-Air-UD-Q6_K_XL"),
|
||||
("gpt-oss-120b-mxfp4", "gpt-oss-120b-mxfp4"),
|
||||
("gpt-oss-20b-mxfp4", "gpt-oss-20b-mxfp4"),
|
||||
]
|
||||
|
||||
CLEAN = lambda s: re.sub(r"-000\d+-of-000\d+", "", s)
|
||||
ERROR_LABELS = {
|
||||
"load": "⚠️ Load Error",
|
||||
"hang": "⚠️ GPU Hang",
|
||||
"runtime": "⚠️ Runtime Error"
|
||||
}
|
||||
|
||||
def parse_logs():
|
||||
data = {}
|
||||
for p in glob.glob(os.path.join("results","*.log")):
|
||||
base = os.path.basename(p)[:-4]
|
||||
if "__" not in base:
|
||||
continue
|
||||
model_raw, env = base.split("__", 1)
|
||||
key = CLEAN(model_raw)
|
||||
t = open(p, errors="ignore").read()
|
||||
pp = PP_RE.search(t)
|
||||
tg = TG_RE.search(t)
|
||||
et = None
|
||||
if LOAD_ERR.search(t): et = "load"
|
||||
elif HANG_ERR.search(t): et = "hang"
|
||||
elif GEN_ERR.search(t) and not (pp and tg): et = "runtime"
|
||||
data.setdefault(key, {"pp512": {}, "tg128": {}})
|
||||
data[key]["pp512"][env] = {"mean": float(pp.group(1)) if (pp and et is None) else None,
|
||||
"error": et is not None, "etype": et}
|
||||
data[key]["tg128"][env] = {"mean": float(tg.group(1)) if (tg and et is None) else None,
|
||||
"error": et is not None, "etype": et}
|
||||
return data
|
||||
# --- Helpers ---
|
||||
def load_results():
|
||||
data = json.loads(Path(RESULTS_JSON).read_text())
|
||||
return data["runs"]
|
||||
|
||||
def best(env_data):
|
||||
vals = {e:d["mean"] for e,d in env_data.items() if (not d["error"]) and d["mean"] is not None}
|
||||
return max(vals, key=vals.get) if vals else None
|
||||
|
||||
def cell(pp, tg):
|
||||
if (pp is None) or (tg is None):
|
||||
return "—"
|
||||
if pp["error"] or tg["error"]:
|
||||
m = pp["etype"] or tg["etype"] or "runtime"
|
||||
return {"load":"⚠️ Load Error","hang":"⚠️ GPU Hang","runtime":"⚠️ Runtime Error"}.get(m, "⚠️ Error")
|
||||
return f"{int(round(pp['mean']))} pp / {tg['mean']:.1f} tg"
|
||||
|
||||
def find_key(keys, prefix):
|
||||
for k in keys:
|
||||
if k.startswith(prefix):
|
||||
return k
|
||||
def filter_runs(runs, model_prefix, env):
|
||||
for r in runs:
|
||||
if r["model_clean"].startswith(model_prefix) and r["env"] == env:
|
||||
return r
|
||||
return None
|
||||
|
||||
def format_cell(pp_run, tg_run):
|
||||
if not pp_run or not tg_run:
|
||||
return "—"
|
||||
if pp_run["error"] or tg_run["error"]:
|
||||
return ERROR_LABELS.get(pp_run["error_type"] or tg_run["error_type"], "⚠️ Error")
|
||||
if pp_run["tps_mean"] is None or tg_run["tps_mean"] is None:
|
||||
return "—"
|
||||
return f"{int(round(pp_run['tps_mean']))} pp / {tg_run['tps_mean']:.1f} tg"
|
||||
|
||||
def find_winner(runs, model_prefix, bench_type):
|
||||
vals = {}
|
||||
for env in ENV_ORDER:
|
||||
r = filter_runs(runs, model_prefix, env)
|
||||
if r and not r["error"] and r["test"] == bench_type and r["tps_mean"] is not None:
|
||||
vals[env] = r["tps_mean"]
|
||||
if not vals:
|
||||
return None
|
||||
return max(vals, key=vals.get)
|
||||
|
||||
# --- Main ---
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("models", nargs="*", help="Optional model prefixes to include")
|
||||
args = ap.parse_args()
|
||||
data = parse_logs()
|
||||
want = [(m,m) for m in args.models] if args.models else DEFAULT_MODELS
|
||||
runs = load_results()
|
||||
|
||||
header = ["Model"] + [COL_NAMES[e] for e in ENV_ORDER] + ["🏆 Best PP","🏆 Best TG"]
|
||||
header = ["Model"] + [COL_NAMES[e] for e in ENV_ORDER] + ["🏆 Best PP", "🏆 Best TG"]
|
||||
print("| " + " | ".join(header) + " |")
|
||||
print("|" + "|".join(["---"]*len(header)) + "|")
|
||||
print("|" + "|".join(["---"] * len(header)) + "|")
|
||||
|
||||
for disp, patt in want:
|
||||
key = find_key(data.keys(), patt)
|
||||
row = [f"**{disp}**"]
|
||||
if not key:
|
||||
row += ["—"]*len(ENV_ORDER) + ["—","—"]
|
||||
print("| " + " | ".join(row) + " |")
|
||||
continue
|
||||
ppd, tgd = data[key]["pp512"], data[key]["tg128"]
|
||||
for disp_name, model_prefix in DEFAULT_MODELS:
|
||||
row = [f"**{disp_name}**"]
|
||||
for env in ENV_ORDER:
|
||||
row.append(cell(ppd.get(env), tgd.get(env)))
|
||||
bpp, btg = best(ppd), best(tgd)
|
||||
row.append(f"🏆 **{WINNER[bpp]}**" if bpp else "—")
|
||||
row.append(f"🏆 **{WINNER[btg]}**" if btg else "—")
|
||||
pp_run = filter_runs(runs, model_prefix, env)
|
||||
tg_run = filter_runs(runs, model_prefix, env)
|
||||
pp = None
|
||||
tg = None
|
||||
if pp_run and pp_run["test"] == "pp512":
|
||||
pp = pp_run
|
||||
if tg_run and tg_run["test"] == "tg128":
|
||||
tg = tg_run
|
||||
# match pp and tg runs by env
|
||||
pp_env_run = next((r for r in runs if r["model_clean"].startswith(model_prefix) and r["env"] == env and r["test"] == "pp512"), None)
|
||||
tg_env_run = next((r for r in runs if r["model_clean"].startswith(model_prefix) and r["env"] == env and r["test"] == "tg128"), None)
|
||||
row.append(format_cell(pp_env_run, tg_env_run))
|
||||
|
||||
bpp = find_winner(runs, model_prefix, "pp512")
|
||||
btg = find_winner(runs, model_prefix, "tg128")
|
||||
row.append(f"🏆 **{WINNER_LABELS[bpp]}**" if bpp else "—")
|
||||
row.append(f"🏆 **{WINNER_LABELS[btg]}**" if btg else "—")
|
||||
|
||||
print("| " + " | ".join(row) + " |")
|
||||
|
||||
print("\nFull interactive results: [Live Benchmark Viewer](https://your-live-results-url)")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from statistics import mean
|
||||
|
||||
# CONFIG
|
||||
TOLERANCE_MULTIPLIER = 1.0 # multiplier for std dev to count as "within best"
|
||||
|
||||
def within_tolerance(best_mean, best_std, contender_mean, contender_std):
|
||||
# Winner if contender is within (best_mean - best_std * tol) of best_mean
|
||||
return contender_mean >= (best_mean - TOLERANCE_MULTIPLIER * best_std)
|
||||
|
||||
# --- Load data ---
|
||||
with open("../docs/results.json", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
runs = data["runs"]
|
||||
|
||||
# --- Group by benchmark type ---
|
||||
benchmarks = defaultdict(list)
|
||||
for r in runs:
|
||||
if r["error"]:
|
||||
continue
|
||||
if r["test"] in ("pp512", "tg128"):
|
||||
benchmarks[r["test"]].append(r)
|
||||
|
||||
summary = {}
|
||||
|
||||
for bench_type, results in benchmarks.items():
|
||||
winners_count = defaultdict(int)
|
||||
backend_perf = defaultdict(list)
|
||||
|
||||
# Group results by model
|
||||
models = defaultdict(list)
|
||||
for r in results:
|
||||
models[r["model_clean"]].append(r)
|
||||
|
||||
for model, entries in models.items():
|
||||
# Find the best mean
|
||||
best_entry = max(entries, key=lambda x: x["tps_mean"])
|
||||
best_mean = best_entry["tps_mean"]
|
||||
best_std = best_entry["tps_std"] or 0
|
||||
|
||||
# Find all within tolerance
|
||||
for e in entries:
|
||||
if e["tps_mean"] is None:
|
||||
continue
|
||||
if within_tolerance(best_mean, best_std, e["tps_mean"], e["tps_std"] or 0):
|
||||
label = f"{e['env']}{' (FA on)' if e['fa'] else ' (FA off)'}"
|
||||
winners_count[label] += 1
|
||||
|
||||
# Collect performance data for average TPS
|
||||
for e in entries:
|
||||
label = f"{e['env']}{' (FA on)' if e['fa'] else ' (FA off)'}"
|
||||
if e["tps_mean"] is not None:
|
||||
backend_perf[label].append(e["tps_mean"])
|
||||
|
||||
# Store summary
|
||||
summary[bench_type] = {
|
||||
"winners": dict(sorted(winners_count.items(), key=lambda x: -x[1])),
|
||||
"avg_perf": {k: round(mean(v), 2) for k, v in backend_perf.items()},
|
||||
"total_models": len(models),
|
||||
}
|
||||
|
||||
# --- Print human-readable analysis ---
|
||||
for bench_type in ("pp512", "tg128"):
|
||||
if bench_type not in summary:
|
||||
continue
|
||||
print(f"\n=== {bench_type.upper()} ===")
|
||||
print(f"Models tested: {summary[bench_type]['total_models']}")
|
||||
print("Winner counts (within tolerance):")
|
||||
for backend, count in summary[bench_type]["winners"].items():
|
||||
print(f" {backend}: {count} models")
|
||||
print("Average throughput (tokens/sec):")
|
||||
for backend, avg in sorted(summary[bench_type]["avg_perf"].items(), key=lambda x: -x[1]):
|
||||
print(f" {backend}: {avg}")
|
||||
|
||||
@@ -1,147 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to remove host-related entries from log files and delete host files.
|
||||
"""
|
||||
|
||||
import os
|
||||
import glob
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def remove_host_entries_from_log(log_file):
|
||||
"""
|
||||
Remove all entries that start with '[host]' from the log file.
|
||||
Each entry is separated by empty lines.
|
||||
"""
|
||||
if not os.path.exists(log_file):
|
||||
print(f"Log file {log_file} not found!")
|
||||
return False
|
||||
|
||||
# Create backup
|
||||
backup_file = f"{log_file}.backup"
|
||||
shutil.copy2(log_file, backup_file)
|
||||
print(f"Created backup: {backup_file}")
|
||||
|
||||
with open(log_file, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
filtered_lines = []
|
||||
i = 0
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
# Check if this line starts a host entry
|
||||
if line.startswith('▶ [host]'):
|
||||
# Skip this entry by finding the next empty line or end of file
|
||||
i += 1
|
||||
while i < len(lines) and lines[i].strip() != '':
|
||||
i += 1
|
||||
# Skip the empty line too if we found one
|
||||
if i < len(lines) and lines[i].strip() == '':
|
||||
i += 1
|
||||
else:
|
||||
# Keep this line
|
||||
filtered_lines.append(lines[i])
|
||||
i += 1
|
||||
|
||||
# Write the filtered content back
|
||||
with open(log_file, 'w', encoding='utf-8') as f:
|
||||
f.writelines(filtered_lines)
|
||||
|
||||
print(f"Removed host entries from {log_file}")
|
||||
return True
|
||||
|
||||
|
||||
def remove_host_files():
|
||||
"""Remove all files with 'host' in their filename."""
|
||||
host_files = glob.glob('*host*')
|
||||
|
||||
if not host_files:
|
||||
print("No files with 'host' in filename found.")
|
||||
return
|
||||
|
||||
print("Files to be removed:")
|
||||
for file in host_files:
|
||||
print(f" - {file}")
|
||||
|
||||
for file in host_files:
|
||||
try:
|
||||
os.remove(file)
|
||||
print(f"Removed: {file}")
|
||||
except OSError as e:
|
||||
print(f"Error removing {file}: {e}")
|
||||
|
||||
|
||||
def preview_host_entries(log_file):
|
||||
"""Preview what host entries would be removed."""
|
||||
if not os.path.exists(log_file):
|
||||
print(f"Log file {log_file} not found!")
|
||||
return
|
||||
|
||||
with open(log_file, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
print("Host entries that would be removed:")
|
||||
print("-" * 50)
|
||||
|
||||
i = 0
|
||||
entry_count = 0
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
if line.startswith('▶ [host]'):
|
||||
entry_count += 1
|
||||
print(f"Entry {entry_count}:")
|
||||
|
||||
# Print this entry until we hit an empty line
|
||||
while i < len(lines) and lines[i].strip() != '':
|
||||
print(lines[i].rstrip())
|
||||
i += 1
|
||||
print() # Add empty line after entry
|
||||
else:
|
||||
i += 1
|
||||
|
||||
print(f"Total host entries found: {entry_count}")
|
||||
|
||||
|
||||
def main():
|
||||
log_file = "run_benchmarks.log" # Change this to your actual log file name
|
||||
|
||||
print("Host Entry and File Removal Script")
|
||||
print("=" * 40)
|
||||
|
||||
# Preview what would be removed
|
||||
preview_host_entries(log_file)
|
||||
|
||||
# Show files that would be removed
|
||||
host_files = glob.glob('*host*')
|
||||
if host_files:
|
||||
print(f"\nFiles with 'host' in filename ({len(host_files)} found):")
|
||||
for file in host_files:
|
||||
print(f" - {file}")
|
||||
|
||||
print("\nThis script will:")
|
||||
print(f"1. Remove host entries from log file: {log_file}")
|
||||
print("2. Remove all files with 'host' in the filename")
|
||||
|
||||
response = input("\nContinue? (y/N): ").strip().lower()
|
||||
|
||||
if response == 'y' or response == 'yes':
|
||||
# Remove host entries from log
|
||||
if remove_host_entries_from_log(log_file):
|
||||
print("✓ Host entries removed from log file")
|
||||
|
||||
# Remove host files
|
||||
remove_host_files()
|
||||
print("✓ Host files removed")
|
||||
|
||||
print("\nDone!")
|
||||
else:
|
||||
print("Aborted.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user