120 lines
4.2 KiB
Python
120 lines
4.2 KiB
Python
#!/usr/bin/env python3
|
|
import re, glob, os
|
|
|
|
# This script parses llama-bench logs in 'results/' to produce
|
|
# Markdown tables for pp512 (prompt processing) and tg128 (text generation).
|
|
|
|
# Regex patterns to extract tokens/sec rows
|
|
PP_RE = re.compile(r"\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|\s*pp512\s*\|\s*([\d.]+)\s*±\s*([\d.]+)")
|
|
TG_RE = re.compile(r"\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|\s*tg128\s*\|\s*([\d.]+)\s*±\s*([\d.]+)")
|
|
|
|
# Patterns to classify errors
|
|
LOAD_ERR = re.compile(r"failed to load model|Device memory allocation.*failed", re.IGNORECASE)
|
|
HANG_ERR = re.compile(r"GPU Hang|HW Exception", re.IGNORECASE)
|
|
GENERIC_ERR = re.compile(r"error:|exit \d+", re.IGNORECASE)
|
|
|
|
# Env ordering
|
|
ENV_ORDER = ["vulkan_radv","vulkan_amdvlk","rocm6_4_2","rocm7_beta","rocm7_rc"]
|
|
|
|
data = {}
|
|
|
|
# Utility to clean model names
|
|
def clean_name(raw):
|
|
return re.sub(r"-000\d+-of-000\d+", "", raw)
|
|
|
|
# Scan logs
|
|
glob_pattern = os.path.join("results", "*.log")
|
|
for path in sorted(glob.glob(glob_pattern)):
|
|
# Fix: use rsplit, not rssplit
|
|
base = os.path.basename(path).rsplit('.log',1)[0]
|
|
if '__' not in base:
|
|
continue
|
|
model_raw, env = base.split('__',1)
|
|
model = clean_name(model_raw)
|
|
|
|
text = open(path, errors='ignore').read()
|
|
# Determine error type
|
|
if LOAD_ERR.search(text):
|
|
err_type = 'load'
|
|
elif HANG_ERR.search(text):
|
|
err_type = 'hang'
|
|
elif GENERIC_ERR.search(text) and not (PP_RE.search(text) and TG_RE.search(text)):
|
|
err_type = 'runtime'
|
|
else:
|
|
err_type = None
|
|
|
|
# Extract performance if no load error
|
|
pp_match = PP_RE.search(text) if err_type is None else None
|
|
tg_match = TG_RE.search(text) if err_type is None else None
|
|
|
|
for key, match in [('pp512', pp_match), ('tg128', tg_match)]:
|
|
cell = {
|
|
'mean': match.group(1) if match else None,
|
|
'std': match.group(2) if match else None,
|
|
'error': err_type is not None,
|
|
'etype': err_type
|
|
}
|
|
data.setdefault(model, {}).setdefault(key, {})[env] = cell
|
|
|
|
# Select winner
|
|
def pick_winner(env_data):
|
|
scores = {e: float(d['mean']) for e,d in env_data.items() if not d['error'] and d['mean']}
|
|
if not scores:
|
|
return '—'
|
|
best = max(scores, key=scores.get)
|
|
others = [v for k,v in scores.items() if k!=best]
|
|
tag = f"🏆 **{best}**"
|
|
if others:
|
|
gain = (scores[best]/max(others)-1)*100
|
|
tag += f" (+{gain:.0f}%)"
|
|
return tag
|
|
|
|
# Render table with distinct error messages
|
|
def render_table(test_label, display_name):
|
|
print(f"### {display_name} — tokens/second\n")
|
|
header = ['Model'] + [e.replace('_',' ').title() for e in ENV_ORDER] + ['Winner']
|
|
print("| " + " | ".join(header) + " |")
|
|
print("|" + "|".join(['---']*len(header)) + "|")
|
|
|
|
for model in sorted(data, key=lambda s: s.lower()):
|
|
row = [f"**{model}**"]
|
|
env_data = data[model].get(test_label, {})
|
|
for env in ENV_ORDER:
|
|
d = env_data.get(env)
|
|
if not d:
|
|
cell = '—'
|
|
elif d['error']:
|
|
et = d['etype']
|
|
if et=='load':
|
|
cell = '⚠️ Load Error'
|
|
elif et=='hang':
|
|
cell = '⚠️ GPU Hang'
|
|
else:
|
|
cell = '⚠️ Runtime Error'
|
|
else:
|
|
cell = f"{float(d['mean']):.2f} ± {float(d['std']):.2f}"
|
|
row.append(cell)
|
|
row.append(pick_winner(env_data))
|
|
print("| " + " | ".join(row) + " |")
|
|
print()
|
|
|
|
# Output tables
|
|
render_table('pp512','Prompt Processing (pp512)')
|
|
render_table('tg128','Text Generation (tg128)')
|
|
|
|
# Summary of failures by type
|
|
fail_lines = []
|
|
for model in sorted(data, key=lambda s: s.lower()):
|
|
for test_label, envs in data[model].items():
|
|
for env,d in envs.items():
|
|
if d['error']:
|
|
et = d['etype'] or 'unknown'
|
|
desc = {
|
|
'load':'failed to load',
|
|
'hang':'GPU hang',
|
|
'runtime':'runtime error',
|
|
}.get(et, 'error')
|
|
fail_lines.append(f"- **{model}** [{test_label}] on *{env}*: {desc}")
|
|
if fail_lines:
|
|
print("## Failed Runs\n")
|
|
print("\n".join(fail_lines)) |