feat: add pre-flight cleanup and port conflict resolution to benchmark runner
This commit is contained in:
@@ -137,7 +137,60 @@ def capture_system_info(results_dir: Path):
|
|||||||
print(f"📋 Captured system info → {path}")
|
print(f"📋 Captured system info → {path}")
|
||||||
|
|
||||||
|
|
||||||
# ── Container lifecycle ──────────────────────────────────────────────────────
|
# ── Cleanup & container lifecycle ────────────────────────────────────────────
|
||||||
|
|
||||||
|
def check_port_free(port: int) -> bool:
|
||||||
|
"""Check if a port is free. If not, identify and report what's using it."""
|
||||||
|
import socket
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||||
|
try:
|
||||||
|
s.bind(("127.0.0.1", port))
|
||||||
|
return True
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def kill_port_holder(port: int):
|
||||||
|
"""Find and kill whatever process is holding the port."""
|
||||||
|
result = subprocess.run(
|
||||||
|
["fuser", f"{port}/tcp"],
|
||||||
|
capture_output=True, text=True,
|
||||||
|
)
|
||||||
|
pids = result.stdout.strip().split()
|
||||||
|
if pids:
|
||||||
|
print(f" ⚠️ Port {port} held by PID(s): {', '.join(pids)} — killing...")
|
||||||
|
for pid in pids:
|
||||||
|
pid = pid.strip()
|
||||||
|
if pid.isdigit():
|
||||||
|
subprocess.run(["kill", "-9", pid], capture_output=True)
|
||||||
|
time.sleep(2)
|
||||||
|
else:
|
||||||
|
# fuser didn't find it, try ss as fallback
|
||||||
|
result = subprocess.run(
|
||||||
|
["ss", "-tlnp", f"sport = :{port}"],
|
||||||
|
capture_output=True, text=True,
|
||||||
|
)
|
||||||
|
if result.stdout.strip():
|
||||||
|
print(f" ⚠️ Port {port} is in use (could not identify PID):")
|
||||||
|
print(f" {result.stdout.strip()}")
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup(port: int):
|
||||||
|
"""Full cleanup: stop stale containers and free the port."""
|
||||||
|
# Stop any leftover benchmark container
|
||||||
|
stop_container()
|
||||||
|
|
||||||
|
# Check for port conflicts
|
||||||
|
if not check_port_free(port):
|
||||||
|
print(f" ⚠️ Port {port} is already in use — attempting cleanup...")
|
||||||
|
kill_port_holder(port)
|
||||||
|
time.sleep(1)
|
||||||
|
if not check_port_free(port):
|
||||||
|
print(f" ❌ Port {port} is still in use after cleanup. Aborting.")
|
||||||
|
return False
|
||||||
|
print(f" ✅ Port {port} is now free.")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def stop_container():
|
def stop_container():
|
||||||
"""Stop and remove the benchmark container if it exists."""
|
"""Stop and remove the benchmark container if it exists."""
|
||||||
@@ -150,7 +203,8 @@ def stop_container():
|
|||||||
def start_server(toolbox: dict, gguf: str, models_dir: Path,
|
def start_server(toolbox: dict, gguf: str, models_dir: Path,
|
||||||
spec_flags: list[str], port: int) -> bool:
|
spec_flags: list[str], port: int) -> bool:
|
||||||
"""Start llama-server in a podman container. Returns True on success."""
|
"""Start llama-server in a podman container. Returns True on success."""
|
||||||
stop_container()
|
if not cleanup(port):
|
||||||
|
return False
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
"podman", "run", "--rm", "-d",
|
"podman", "run", "--rm", "-d",
|
||||||
@@ -375,6 +429,13 @@ def main():
|
|||||||
# Capture system info
|
# Capture system info
|
||||||
capture_system_info(results_dir)
|
capture_system_info(results_dir)
|
||||||
|
|
||||||
|
# Pre-flight cleanup
|
||||||
|
print("🧹 Pre-flight cleanup...")
|
||||||
|
if not cleanup(args.port):
|
||||||
|
print("❌ Cannot free port — exiting.")
|
||||||
|
sys.exit(1)
|
||||||
|
print("✅ Environment clean.\n")
|
||||||
|
|
||||||
# Run benchmarks
|
# Run benchmarks
|
||||||
run_count = 0
|
run_count = 0
|
||||||
for tb_name, tb_config in toolboxes.items():
|
for tb_name, tb_config in toolboxes.items():
|
||||||
|
|||||||
Reference in New Issue
Block a user