adding raw benchmark results

This commit is contained in:
Donato Capitella
2025-08-09 10:44:09 +01:00
parent 0dd1f8d047
commit 8972ef01ff
320 changed files with 3580 additions and 10779 deletions
+239
View File
@@ -0,0 +1,239 @@
#!/usr/bin/env python3
import re, glob, os, json, time
from pathlib import Path
RESULTS_DIR = "results"
OUT_JSON = "results.json"
# --- Regexes ---------------------------------------------------------------
# Table headers come in two shapes (with or without "fa" column)
HEADER_RE = re.compile(r"^\|\s*model\s*\|", re.IGNORECASE)
SEP_RE = re.compile(r"^\|\s*-+")
# Build line, e.g. "build: cd6983d5 (6119)"
BUILD_RE = re.compile(r"build:\s*([0-9a-f]{7,})\s*\((\d+)\)", re.IGNORECASE)
# Error classifiers (same spirit as your table script)
LOAD_ERR = re.compile(r"failed to load model|Device memory allocation.*failed|⚠️\s*Fail", re.IGNORECASE)
HANG_ERR = re.compile(r"GPU Hang|HW Exception", re.IGNORECASE)
GENERIC_ERR= re.compile(r"error:|exit \d+|runtime error|⚠️\s*Runtime Error", re.IGNORECASE)
# Extract numeric ± numeric from the last column
TS_RE = re.compile(r"([\d.]+)\s*±\s*([\d.]+)")
# Quantization from model name
QUANT_RE = re.compile(r"(Q\d+_[A-Z_]+|BF16|F16|F32|mxfp\d+)", re.IGNORECASE)
# Params like "235.09 B" from the table
PARAMS_RE = re.compile(r"([\d.]+)\s*B", re.IGNORECASE)
# File size like "96.99 GiB" from the table
GIB_RE = re.compile(r"([\d.]+)\s*GiB", re.IGNORECASE)
# "30B", "235B" from model name
NAME_B_RE = re.compile(r"(\d+(?:\.\d+)?)B")
# Shard suffix in filenames
SHARD_RE = re.compile(r"-000\d+-of-000\d+", re.IGNORECASE)
# --- Helpers ---------------------------------------------------------------
def clean_model_name(raw):
base = SHARD_RE.sub("", raw)
return base
def parse_env_and_fa(basename):
# pattern: <model>__<env>[__fa1]
parts = basename.split("__")
if len(parts) < 2:
return None, False
env = parts[1]
fa = (len(parts) > 2 and parts[2].lower() == "fa1")
return env, fa
def env_base_and_variant(env):
# e.g. "rocm6_4_2-rocwmma" -> ("rocm6_4_2", "rocwmma")
if "-" in env:
base, variant = env.split("-", 1)
return base, variant
return env, None
def detect_error(text):
if LOAD_ERR.search(text):
return True, "load"
if HANG_ERR.search(text):
return True, "hang"
if GENERIC_ERR.search(text):
return True, "runtime"
return False, None
def parse_table(text):
"""
Returns list of rows parsed from the markdown-like table.
Each row is a dict of the parsed columns, normalized by header names.
Handles presence/absence of the 'fa' column.
"""
lines = text.splitlines()
rows = []
header = None
col_idx = {}
for i, line in enumerate(lines):
if HEADER_RE.search(line):
# header line
header = [c.strip().lower() for c in line.strip().strip("|").split("|")]
# next line should be the separator; skip it
# build index map
for idx, name in enumerate(header):
col_idx[name] = idx
continue
if header and (SEP_RE.search(line) or not line.strip()):
# skip separators / blanks after header
continue
if header and line.startswith("|"):
parts = [c.strip() for c in line.strip().strip("|").split("|")]
# guard for short lines
if len(parts) < len(header):
continue
row = {}
for name, idx in col_idx.items():
row[name] = parts[idx]
rows.append(row)
# stop parsing block when a blank line after some rows appears
if header and line.strip() == "" and rows:
break
return rows
def coerce_float(m, default=None):
try:
return float(m)
except:
return default
def extract_quant(model_name):
m = QUANT_RE.search(model_name)
return (m.group(1).upper() if m else None)
def b_from_name(model_name):
m = NAME_B_RE.search(model_name)
return coerce_float(m.group(1)) if m else None
# --- Main scan -------------------------------------------------------------
runs = []
builds = set()
envs = set()
for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))):
base = os.path.basename(path).rsplit(".log", 1)[0]
if "__" not in base:
continue
model_raw, _rest = base.split("__", 1)
env, fa_from_name = parse_env_and_fa(base)
envs.add(env)
model_clean = clean_model_name(model_raw)
with open(path, errors="ignore") as f:
text = f.read()
# build info (take the last match in file if many)
build_hash, build_num = None, None
for m in BUILD_RE.finditer(text):
build_hash, build_num = m.group(1), m.group(2)
if build_hash:
builds.add((build_hash, build_num))
# detect error (if there is no valid table rows)
table_rows = parse_table(text)
# If table rows exist, well still mark errors only if no perf found
has_pp = any(r.get("test","").lower()=="pp512" for r in table_rows)
has_tg = any(r.get("test","").lower()=="tg128" for r in table_rows)
error, etype = (False, None)
if not (has_pp or has_tg):
error, etype = detect_error(text)
# Determine FA flag:
# prefer explicit column "fa" if present, else fallback to filename "__fa1"
fa_in_table = None
for r in table_rows:
if "fa" in r:
try:
fa_in_table = int(r["fa"]) == 1
except:
fa_in_table = None
break
fa_enabled = fa_in_table if fa_in_table is not None else fa_from_name
# Normalize env base / variant (e.g., rocwmma)
env_base, env_variant = env_base_and_variant(env)
# Emit one run per row (pp512 / tg128)
for r in table_rows or [{}]:
test = r.get("test", "").lower() if table_rows else None
tps_mean, tps_std = None, None
if table_rows:
ts_field = r.get("t/s", "")
m = TS_RE.search(ts_field)
if m:
tps_mean = coerce_float(m.group(1))
tps_std = coerce_float(m.group(2))
# parse numeric helpers from row (if present)
params_b = None
file_size_gib = None
if "params" in r:
pm = PARAMS_RE.search(r["params"])
if pm: params_b = coerce_float(pm.group(1))
if "size" in r:
sm = GIB_RE.search(r["size"])
if sm: file_size_gib = coerce_float(sm.group(1))
backend = r.get("backend")
ngl = r.get("ngl")
mmap = r.get("mmap")
# quant from model name
quant = extract_quant(model_clean)
name_params_b = b_from_name(model_clean)
run = {
"model": model_raw,
"model_clean": model_clean,
"env": env,
"env_base": env_base,
"env_variant": env_variant, # e.g. "rocwmma"
"fa": bool(fa_enabled),
"test": test, # "pp512" | "tg128" | None (if error)
"tps_mean": tps_mean,
"tps_std": tps_std,
"error": bool(error),
"error_type": etype, # "load" | "hang" | "runtime" | None
"backend": backend,
"ngl": (int(ngl) if (ngl and ngl.isdigit()) else None),
"mmap": (int(mmap) if (mmap and mmap.isdigit()) else None),
"params_b": params_b, # from table, if available
"file_size_gib": file_size_gib, # from table, if available
"name_params_b": name_params_b, # parsed from model name (e.g., 30B -> 30.0)
"quant": quant,
"log": path,
"build": {"hash": build_hash, "number": build_num} if build_hash else None,
}
runs.append(run)
# Meta
meta = {
"generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"os_kernel": "Fedora 42 — Linux fedora 6.16.0-264.vanilla.fc42.x86_64 (2025-07-28)",
"llamacpp_builds": [{"hash": h, "number": n} for (h, n) in sorted(builds)],
"environments": sorted(envs),
"notes": "pp512 = prompt processing; tg128 = text generation; t/s = tokens/second",
}
out = {"meta": meta, "runs": runs}
Path(OUT_JSON).write_text(json.dumps(out, indent=2))
print(f"Wrote {OUT_JSON} with {len(runs)} rows.")