neclean up of legacy toolboxes, removal of rocwmma and renamed rocm7-alpha to rocm-7nightlies. Added new benchmarks

2026-01-10 10:31:04 +00:00
parent f0e9bc8865
commit 783998589e
1155 changed files with 20997 additions and 27513 deletions
@@ -0,0 +1,296 @@
+#!/usr/bin/env python3
+import re, glob, os, json, time
+from pathlib import Path
+
+RESULT_SOURCES = [
+    ("results", False),       # regular single-node runs
+    ("results-rpc", True),    # distributed RPC runs across two servers
+]
+OUT_JSON = "../docs/results.json"
+
+# --- Regexes ---------------------------------------------------------------
+
+# Table headers come in two shapes (with or without "fa" column)
+HEADER_RE = re.compile(r"^\|\s*model\s*\|", re.IGNORECASE)
+SEP_RE    = re.compile(r"^\|\s*-+")
+
+# Build line, e.g. "build: cd6983d5 (6119)"
+BUILD_RE  = re.compile(r"build:\s*([0-9a-f]{7,})\s*\((\d+)\)", re.IGNORECASE)
+
+# Error classifiers (same spirit as your table script)
+LOAD_ERR   = re.compile(r"failed to load model|Device memory allocation.*failed|⚠️\s*Fail", re.IGNORECASE)
+HANG_ERR   = re.compile(r"GPU Hang|HW Exception", re.IGNORECASE)
+GENERIC_ERR= re.compile(r"error:|exit \d+|runtime error|⚠️\s*Runtime Error", re.IGNORECASE)
+
+# Extract numeric ± numeric from the last column
+TS_RE      = re.compile(r"([\d.]+)\s*±\s*([\d.]+)")
+
+# Quantization from model name
+QUANT_RE = re.compile(r"(Q\d+_[A-Z0-9_]+|BF16|F16|F32|mxfp\d+)", re.IGNORECASE)
+
+PARAMS_RE = re.compile(r"([\d.,]+)\s*B", re.IGNORECASE)
+GIB_RE    = re.compile(r"([\d.,]+)\s*GiB", re.IGNORECASE)
+
+# "30B", "235B" from model name
+NAME_B_RE = re.compile(r"(\d+(?:\.\d+)?)B")
+
+# Shard suffix in filenames
+SHARD_RE = re.compile(r"-000\d+-of-000\d+", re.IGNORECASE)
+
+# Long-context suffix in filenames (e.g., __longctx32768)
+LONGCTX_RE = re.compile(r"longctx(\d+)", re.IGNORECASE)
+
+# --- Helpers ---------------------------------------------------------------
+
+ENV_CANON = {
+    "rocm7_1_1": "rocm7.1.1",
+    "rocm7_alpha": "rocm7-nightlies",
+    "rocm-7alpha": "rocm7-nightlies",
+}
+
+def clean_model_name(raw):
+    base = SHARD_RE.sub("", raw)
+    return base
+
+def canonicalize_env(env):
+    if not env:
+        return env
+    for raw, canon in ENV_CANON.items():
+        prefix = f"{raw}-"
+        if env == raw:
+            return canon
+        if env.startswith(prefix):
+            return canon + env[len(raw):]
+    return env
+
+def parse_env_flags(basename):
+    """
+    pattern: <model>__<env>[__fa1][__hblt0][__longctx32768][__rpc]
+    Returns (env, fa, context_tag, context_tokens, rpc_flag)
+    """
+    parts = basename.split("__")
+    if len(parts) < 2:
+        return None, False, "default", None, False
+
+    env = parts[1]
+    fa = False
+    context_tag = "default"
+    context_tokens = None
+    rpc_flag = False
+
+    for raw_suffix in parts[2:]:
+        suffix = raw_suffix.lower()
+        if suffix == "fa1":
+            fa = True
+        elif suffix == "hblt0":
+            env = f"{env}-hblt0"
+        elif suffix.startswith("longctx"):
+            context_tag = suffix
+            m = LONGCTX_RE.search(suffix)
+            if m:
+                try:
+                    context_tokens = int(m.group(1))
+                except ValueError:
+                    context_tokens = None
+        elif suffix == "rpc":
+            rpc_flag = True
+
+    return env, fa, context_tag, context_tokens, rpc_flag
+
+def env_base_and_variant(env):
+    # e.g. "rocm6_4_2-rocwmma" -> ("rocm6_4_2", "rocwmma")
+    if "-" in env:
+        base, variant = env.split("-", 1)
+        return base, variant
+    return env, None
+
+def detect_error(text):
+    if LOAD_ERR.search(text):
+        return True, "load"
+    if HANG_ERR.search(text):
+        return True, "hang"
+    if GENERIC_ERR.search(text):
+        return True, "runtime"
+    return False, None
+
+def parse_table(text):
+    """
+    Returns list of rows parsed from the markdown-like table.
+    Each row is a dict of the parsed columns, normalized by header names.
+    Handles presence/absence of the 'fa' column.
+    """
+    lines = text.splitlines()
+    rows = []
+    header = None
+    col_idx = {}
+
+    for i, line in enumerate(lines):
+        if HEADER_RE.search(line):
+            # header line
+            header = [c.strip().lower() for c in line.strip().strip("|").split("|")]
+            # next line should be the separator; skip it
+            # build index map
+            for idx, name in enumerate(header):
+                col_idx[name] = idx
+            continue
+        if header and (SEP_RE.search(line) or not line.strip()):
+            # skip separators / blanks after header
+            continue
+        if header and line.startswith("|"):
+            parts = [c.strip() for c in line.strip().strip("|").split("|")]
+            # guard for short lines
+            if len(parts) < len(header):
+                continue
+            row = {}
+            for name, idx in col_idx.items():
+                row[name] = parts[idx]
+            rows.append(row)
+        # stop parsing block when a blank line after some rows appears
+        if header and line.strip() == "" and rows:
+            break
+
+    return rows
+
+def coerce_float(m, default=None):
+    try:
+        return float(m)
+    except:
+        return default
+
+def extract_quant(model_name):
+    m = QUANT_RE.search(model_name)
+    return (m.group(1).upper() if m else None)
+
+def b_from_name(model_name):
+    m = NAME_B_RE.search(model_name)
+    return coerce_float(m.group(1)) if m else None
+
+# --- Main scan -------------------------------------------------------------
+
+runs = []
+builds = set()
+envs  = set()
+
+for results_dir, is_rpc_source in RESULT_SOURCES:
+    glob_pattern = os.path.join(results_dir, "*.log")
+    for path in sorted(glob.glob(glob_pattern)):
+        base = os.path.basename(path).rsplit(".log", 1)[0]
+        if "__" not in base:
+            continue
+
+        model_raw, _rest = base.split("__", 1)
+        env, fa_from_name, context_tag, context_tokens, rpc_flag = parse_env_flags(base)
+        env = canonicalize_env(env)
+        if env:
+            envs.add(env)
+
+        model_clean = clean_model_name(model_raw)
+
+        with open(path, errors="ignore") as f:
+            text = f.read()
+
+        # build info (take the last match in file if many)
+        build_hash, build_num = None, None
+        for m in BUILD_RE.finditer(text):
+            build_hash, build_num = m.group(1), m.group(2)
+        if build_hash:
+            builds.add((build_hash, build_num))
+
+        # detect error (if there is no valid table rows)
+        table_rows = parse_table(text)
+
+        # If table rows exist, we’ll still mark errors only if no perf found
+        has_pp = any(r.get("test","").lower()=="pp512" for r in table_rows)
+        has_tg = any(r.get("test","").lower()=="tg128" for r in table_rows)
+        error, etype = (False, None)
+        if not (has_pp or has_tg):
+            error, etype = detect_error(text)
+
+        # Determine FA flag:
+        #   prefer explicit column "fa" if present, else fallback to filename "__fa1"
+        fa_in_table = None
+        for r in table_rows:
+            if "fa" in r:
+                try:
+                    fa_in_table = int(r["fa"]) == 1
+                except:
+                    fa_in_table = None
+                break
+        fa_enabled = fa_in_table if fa_in_table is not None else fa_from_name
+
+        # Normalize env base / variant (e.g., rocwmma)
+        env_base, env_variant = env_base_and_variant(env)
+
+        # Emit one run per row (pp512 / tg128)
+        for r in table_rows or [{}]:
+            test = r.get("test", "").lower() if table_rows else None
+            tps_mean, tps_std = None, None
+            if table_rows:
+                ts_field = r.get("t/s", "")
+                m = TS_RE.search(ts_field)
+                if m:
+                    tps_mean = coerce_float(m.group(1))
+                    tps_std  = coerce_float(m.group(2))
+
+            # parse numeric helpers from row (if present)
+            params_b = None
+            file_size_gib = None
+            if "params" in r:
+                pm = PARAMS_RE.search(r["params"])
+                if pm:
+                    params_b = coerce_float(pm.group(1).replace(",", ""))
+            if "size" in r:
+                sm = GIB_RE.search(r["size"])
+                if sm:
+                    file_size_gib = coerce_float(sm.group(1).replace(",", ""))
+
+            # quant from model name (unchanged)
+            quant = extract_quant(model_clean)
+
+            # name_params_b: prefer table value; else fall back to B in model name
+            name_params_b = params_b if params_b is not None else b_from_name(model_clean)
+
+            backend = r.get("backend")
+            ngl = r.get("ngl")
+            mmap = r.get("mmap")
+
+            run = {
+                "model": model_raw,
+                "model_clean": model_clean,
+                "env": env,
+                "env_base": env_base,
+                "env_variant": env_variant,         # e.g. "rocwmma"
+                "fa": bool(fa_enabled),
+                "context": context_tag or "default",
+                "context_tokens": context_tokens,
+                "test": test,                       # "pp512" | "tg128" | None (if error)
+                "tps_mean": tps_mean,
+                "tps_std": tps_std,
+                "error": bool(error),
+                "error_type": etype,                # "load" | "hang" | "runtime" | None
+                "backend": backend,
+                "ngl": (int(ngl) if (ngl and ngl.isdigit()) else None),
+                "mmap": (int(mmap) if (mmap and mmap.isdigit()) else None),
+                "params_b": params_b,               # from table, if available
+                "file_size_gib": file_size_gib,     # from table, if available
+                "name_params_b": name_params_b,     # parsed from model name (e.g., 30B -> 30.0)
+                "quant": quant,
+                "log": path,
+                "rpc": bool(is_rpc_source or rpc_flag),
+                "build": {"hash": build_hash, "number": build_num} if build_hash else None,
+            }
+            runs.append(run)
+
+# Meta
+meta = {
+    "generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+    "os_kernel": "Fedora 42 — Linux 6.15.9-201.fc42.x86_64 (Sat Aug  2 11:37:34 UTC 2025)",
+    "llamacpp_builds": [{"hash": h, "number": n} for (h, n) in sorted(builds)],
+    "environments": sorted(envs),
+    "notes": "pp512 = prompt processing; tg128 = text generation; t/s = tokens/second",
+}
+
+out = {"meta": meta, "runs": runs}
+
+Path(OUT_JSON).write_text(json.dumps(out, indent=2))
+print(f"Wrote {OUT_JSON} with {len(runs)} rows.")