Files
amd-strix-halo-toolboxes/benchmark/generate_results_json.py
T

297 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
import re, glob, os, json, time
from pathlib import Path
RESULT_SOURCES = [
("results", False), # regular single-node runs
("results-rpc", True), # distributed RPC runs across two servers
]
OUT_JSON = "../docs/results.json"
# --- Regexes ---------------------------------------------------------------
# Table headers come in two shapes (with or without "fa" column)
HEADER_RE = re.compile(r"^\|\s*model\s*\|", re.IGNORECASE)
SEP_RE = re.compile(r"^\|\s*-+")
# Build line, e.g. "build: cd6983d5 (6119)"
BUILD_RE = re.compile(r"build:\s*([0-9a-f]{7,})\s*\((\d+)\)", re.IGNORECASE)
# Error classifiers (same spirit as your table script)
LOAD_ERR = re.compile(r"failed to load model|Device memory allocation.*failed|⚠️\s*Fail", re.IGNORECASE)
HANG_ERR = re.compile(r"GPU Hang|HW Exception", re.IGNORECASE)
GENERIC_ERR= re.compile(r"error:|exit \d+|runtime error|⚠️\s*Runtime Error", re.IGNORECASE)
# Extract numeric ± numeric from the last column
TS_RE = re.compile(r"([\d.]+)\s*±\s*([\d.]+)")
# Quantization from model name
QUANT_RE = re.compile(r"(Q\d+_[A-Z0-9_]+|BF16|F16|F32|mxfp\d+)", re.IGNORECASE)
PARAMS_RE = re.compile(r"([\d.,]+)\s*B", re.IGNORECASE)
GIB_RE = re.compile(r"([\d.,]+)\s*GiB", re.IGNORECASE)
# "30B", "235B" from model name
NAME_B_RE = re.compile(r"(\d+(?:\.\d+)?)B")
# Shard suffix in filenames
SHARD_RE = re.compile(r"-000\d+-of-000\d+", re.IGNORECASE)
# Long-context suffix in filenames (e.g., __longctx32768)
LONGCTX_RE = re.compile(r"longctx(\d+)", re.IGNORECASE)
# --- Helpers ---------------------------------------------------------------
ENV_CANON = {
"rocm7_1_1": "rocm7.1.1",
"rocm7_alpha": "rocm7-nightlies",
"rocm-7alpha": "rocm7-nightlies",
}
def clean_model_name(raw):
base = SHARD_RE.sub("", raw)
return base
def canonicalize_env(env):
if not env:
return env
for raw, canon in ENV_CANON.items():
prefix = f"{raw}-"
if env == raw:
return canon
if env.startswith(prefix):
return canon + env[len(raw):]
return env
def parse_env_flags(basename):
"""
pattern: <model>__<env>[__fa1][__hblt0][__longctx32768][__rpc]
Returns (env, fa, context_tag, context_tokens, rpc_flag)
"""
parts = basename.split("__")
if len(parts) < 2:
return None, False, "default", None, False
env = parts[1]
fa = False
context_tag = "default"
context_tokens = None
rpc_flag = False
for raw_suffix in parts[2:]:
suffix = raw_suffix.lower()
if suffix == "fa1":
fa = True
elif suffix == "hblt0":
env = f"{env}-hblt0"
elif suffix.startswith("longctx"):
context_tag = suffix
m = LONGCTX_RE.search(suffix)
if m:
try:
context_tokens = int(m.group(1))
except ValueError:
context_tokens = None
elif suffix == "rpc":
rpc_flag = True
return env, fa, context_tag, context_tokens, rpc_flag
def env_base_and_variant(env):
# e.g. "rocm6_4_2-rocwmma" -> ("rocm6_4_2", "rocwmma")
if "-" in env:
base, variant = env.split("-", 1)
return base, variant
return env, None
def detect_error(text):
if LOAD_ERR.search(text):
return True, "load"
if HANG_ERR.search(text):
return True, "hang"
if GENERIC_ERR.search(text):
return True, "runtime"
return False, None
def parse_table(text):
"""
Returns list of rows parsed from the markdown-like table.
Each row is a dict of the parsed columns, normalized by header names.
Handles presence/absence of the 'fa' column.
"""
lines = text.splitlines()
rows = []
header = None
col_idx = {}
for i, line in enumerate(lines):
if HEADER_RE.search(line):
# header line
header = [c.strip().lower() for c in line.strip().strip("|").split("|")]
# next line should be the separator; skip it
# build index map
for idx, name in enumerate(header):
col_idx[name] = idx
continue
if header and (SEP_RE.search(line) or not line.strip()):
# skip separators / blanks after header
continue
if header and line.startswith("|"):
parts = [c.strip() for c in line.strip().strip("|").split("|")]
# guard for short lines
if len(parts) < len(header):
continue
row = {}
for name, idx in col_idx.items():
row[name] = parts[idx]
rows.append(row)
# stop parsing block when a blank line after some rows appears
if header and line.strip() == "" and rows:
break
return rows
def coerce_float(m, default=None):
try:
return float(m)
except:
return default
def extract_quant(model_name):
m = QUANT_RE.search(model_name)
return (m.group(1).upper() if m else None)
def b_from_name(model_name):
m = NAME_B_RE.search(model_name)
return coerce_float(m.group(1)) if m else None
# --- Main scan -------------------------------------------------------------
runs = []
builds = set()
envs = set()
for results_dir, is_rpc_source in RESULT_SOURCES:
glob_pattern = os.path.join(results_dir, "*.log")
for path in sorted(glob.glob(glob_pattern)):
base = os.path.basename(path).rsplit(".log", 1)[0]
if "__" not in base:
continue
model_raw, _rest = base.split("__", 1)
env, fa_from_name, context_tag, context_tokens, rpc_flag = parse_env_flags(base)
env = canonicalize_env(env)
if env:
envs.add(env)
model_clean = clean_model_name(model_raw)
with open(path, errors="ignore") as f:
text = f.read()
# build info (take the last match in file if many)
build_hash, build_num = None, None
for m in BUILD_RE.finditer(text):
build_hash, build_num = m.group(1), m.group(2)
if build_hash:
builds.add((build_hash, build_num))
# detect error (if there is no valid table rows)
table_rows = parse_table(text)
# If table rows exist, well still mark errors only if no perf found
has_pp = any(r.get("test","").lower()=="pp512" for r in table_rows)
has_tg = any(r.get("test","").lower()=="tg128" for r in table_rows)
error, etype = (False, None)
if not (has_pp or has_tg):
error, etype = detect_error(text)
# Determine FA flag:
# prefer explicit column "fa" if present, else fallback to filename "__fa1"
fa_in_table = None
for r in table_rows:
if "fa" in r:
try:
fa_in_table = int(r["fa"]) == 1
except:
fa_in_table = None
break
fa_enabled = fa_in_table if fa_in_table is not None else fa_from_name
# Normalize env base / variant (e.g., rocwmma)
env_base, env_variant = env_base_and_variant(env)
# Emit one run per row (pp512 / tg128)
for r in table_rows or [{}]:
test = r.get("test", "").lower() if table_rows else None
tps_mean, tps_std = None, None
if table_rows:
ts_field = r.get("t/s", "")
m = TS_RE.search(ts_field)
if m:
tps_mean = coerce_float(m.group(1))
tps_std = coerce_float(m.group(2))
# parse numeric helpers from row (if present)
params_b = None
file_size_gib = None
if "params" in r:
pm = PARAMS_RE.search(r["params"])
if pm:
params_b = coerce_float(pm.group(1).replace(",", ""))
if "size" in r:
sm = GIB_RE.search(r["size"])
if sm:
file_size_gib = coerce_float(sm.group(1).replace(",", ""))
# quant from model name (unchanged)
quant = extract_quant(model_clean)
# name_params_b: prefer table value; else fall back to B in model name
name_params_b = params_b if params_b is not None else b_from_name(model_clean)
backend = r.get("backend")
ngl = r.get("ngl")
mmap = r.get("mmap")
run = {
"model": model_raw,
"model_clean": model_clean,
"env": env,
"env_base": env_base,
"env_variant": env_variant, # e.g. "rocwmma"
"fa": bool(fa_enabled),
"context": context_tag or "default",
"context_tokens": context_tokens,
"test": test, # "pp512" | "tg128" | None (if error)
"tps_mean": tps_mean,
"tps_std": tps_std,
"error": bool(error),
"error_type": etype, # "load" | "hang" | "runtime" | None
"backend": backend,
"ngl": (int(ngl) if (ngl and ngl.isdigit()) else None),
"mmap": (int(mmap) if (mmap and mmap.isdigit()) else None),
"params_b": params_b, # from table, if available
"file_size_gib": file_size_gib, # from table, if available
"name_params_b": name_params_b, # parsed from model name (e.g., 30B -> 30.0)
"quant": quant,
"log": path,
"rpc": bool(is_rpc_source or rpc_flag),
"build": {"hash": build_hash, "number": build_num} if build_hash else None,
}
runs.append(run)
# Meta
meta = {
"generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"os_kernel": "Fedora 42 — Linux 6.15.9-201.fc42.x86_64 (Sat Aug 2 11:37:34 UTC 2025)",
"llamacpp_builds": [{"hash": h, "number": n} for (h, n) in sorted(builds)],
"environments": sorted(envs),
"notes": "pp512 = prompt processing; tg128 = text generation; t/s = tokens/second",
}
out = {"meta": meta, "runs": runs}
Path(OUT_JSON).write_text(json.dumps(out, indent=2))
print(f"Wrote {OUT_JSON} with {len(runs)} rows.")