Updated benchmarks
This commit is contained in:
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
|
||||
RESULTS_DIR_DEFAULT = "results"
|
||||
|
||||
# Same detection logic as your extractor
|
||||
HEADER_RE = re.compile(r"^\|\s*model\s*\|", re.IGNORECASE)
|
||||
SEP_RE = re.compile(r"^\|\s*-+")
|
||||
|
||||
LOAD_ERR = re.compile(r"failed to load model|Device memory allocation.*failed|⚠️\s*Fail", re.IGNORECASE)
|
||||
HANG_ERR = re.compile(r"GPU Hang|HW Exception", re.IGNORECASE)
|
||||
GENERIC_ERR = re.compile(r"error:|exit \d+|runtime error|⚠️\s*Runtime Error", re.IGNORECASE)
|
||||
|
||||
|
||||
def parse_table(text):
|
||||
lines = text.splitlines()
|
||||
rows = []
|
||||
header = None
|
||||
col_idx = {}
|
||||
|
||||
for line in lines:
|
||||
if HEADER_RE.search(line):
|
||||
header = [c.strip().lower() for c in line.strip().strip("|").split("|")]
|
||||
for idx, name in enumerate(header):
|
||||
col_idx[name] = idx
|
||||
continue
|
||||
|
||||
if header and (SEP_RE.search(line) or not line.strip()):
|
||||
continue
|
||||
|
||||
if header and line.startswith("|"):
|
||||
parts = [c.strip() for c in line.strip().strip("|").split("|")]
|
||||
if len(parts) < len(header):
|
||||
continue
|
||||
row = {}
|
||||
for name, idx in col_idx.items():
|
||||
row[name] = parts[idx]
|
||||
rows.append(row)
|
||||
|
||||
if header and line.strip() == "" and rows:
|
||||
break
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def detect_error(text):
|
||||
if LOAD_ERR.search(text):
|
||||
return True
|
||||
if HANG_ERR.search(text):
|
||||
return True
|
||||
if GENERIC_ERR.search(text):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_non_transient_vram_issue(text):
|
||||
# Do NOT delete logs with this kind of Vulkan OOM
|
||||
return (
|
||||
"ggml_vulkan: Device memory allocation of size" in text
|
||||
and "Requested buffer size exceeds device buffer size limit" in text
|
||||
)
|
||||
|
||||
|
||||
def is_failed_run(text):
|
||||
table_rows = parse_table(text)
|
||||
|
||||
has_pp = any(r.get("test", "").lower() == "pp512" for r in table_rows)
|
||||
has_tg = any(r.get("test", "").lower() == "tg128" for r in table_rows)
|
||||
|
||||
if has_pp or has_tg:
|
||||
return False
|
||||
|
||||
return detect_error(text)
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(
|
||||
description="Delete transient-failure benchmark logs in results/"
|
||||
)
|
||||
ap.add_argument(
|
||||
"--results-dir",
|
||||
default=RESULTS_DIR_DEFAULT,
|
||||
help="Directory containing *.log files (default: results)",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Only print what would be deleted",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
|
||||
results_dir = args.results_dir
|
||||
pattern = os.path.join(results_dir, "*.log")
|
||||
|
||||
to_delete = []
|
||||
skipped_non_transient = []
|
||||
|
||||
for path in sorted(glob.glob(pattern)):
|
||||
try:
|
||||
with open(path, errors="ignore") as f:
|
||||
text = f.read()
|
||||
except OSError as e:
|
||||
print(f"Could not read {path}: {e}")
|
||||
continue
|
||||
|
||||
if not is_failed_run(text):
|
||||
continue
|
||||
|
||||
if is_non_transient_vram_issue(text):
|
||||
skipped_non_transient.append(path)
|
||||
continue
|
||||
|
||||
to_delete.append(path)
|
||||
|
||||
if not to_delete and not skipped_non_transient:
|
||||
print("No failed logs found.")
|
||||
return
|
||||
|
||||
if skipped_non_transient:
|
||||
print("Keeping logs with non transient VRAM issues:")
|
||||
for p in skipped_non_transient:
|
||||
print(f" KEEP {p}")
|
||||
|
||||
if to_delete:
|
||||
print("Deleting logs with transient failures:")
|
||||
for p in to_delete:
|
||||
print(f" DELETE {p}")
|
||||
if not args.dry_run:
|
||||
try:
|
||||
os.remove(p)
|
||||
except OSError as e:
|
||||
print(f" Failed to delete {p}: {e}")
|
||||
else:
|
||||
print("No logs to delete.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user