Files
amd-strix-halo-toolboxes/docs/mtp-summary.json
T

1430 lines
36 KiB
JSON

[
{
"model": "Qwen3.6-27B-UD-Q8_K_XL",
"toolbox": "rocm-7.2.3",
"mode": "baseline",
"avg_prompt_tok_s": 104.9,
"avg_tok_s": 6.4,
"accept_rate": null,
"wall_s_total": 274.48,
"results": [
{
"name": "code_python",
"wall_s": 30.306,
"prompt_n": 30,
"prompt_ms": 509.43,
"prompt_per_second": 58.89,
"predicted_n": 192,
"predicted_per_second": 6.45,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "code_cpp",
"wall_s": 30.249,
"prompt_n": 40,
"prompt_ms": 428.45,
"prompt_per_second": 93.36,
"predicted_n": 192,
"predicted_per_second": 6.45,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "explain_concept",
"wall_s": 30.3,
"prompt_n": 27,
"prompt_ms": 469.46,
"prompt_per_second": 57.51,
"predicted_n": 192,
"predicted_per_second": 6.44,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "summarize",
"wall_s": 30.292,
"prompt_n": 62,
"prompt_ms": 472.42,
"prompt_per_second": 131.24,
"predicted_n": 192,
"predicted_per_second": 6.45,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "qa_factual",
"wall_s": 30.31,
"prompt_n": 24,
"prompt_ms": 467.84,
"prompt_per_second": 51.3,
"predicted_n": 192,
"predicted_per_second": 6.44,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "translation",
"wall_s": 30.299,
"prompt_n": 25,
"prompt_ms": 467.72,
"prompt_per_second": 53.45,
"predicted_n": 192,
"predicted_per_second": 6.44,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "creative_short",
"wall_s": 30.312,
"prompt_n": 21,
"prompt_ms": 476.88,
"prompt_per_second": 44.04,
"predicted_n": 192,
"predicted_per_second": 6.44,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "stepwise_math",
"wall_s": 30.29,
"prompt_n": 60,
"prompt_ms": 469.75,
"prompt_per_second": 127.73,
"predicted_n": 192,
"predicted_per_second": 6.45,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "long_code_review",
"wall_s": 32.122,
"prompt_n": 731,
"prompt_ms": 2238.33,
"prompt_per_second": 326.58,
"predicted_n": 192,
"predicted_per_second": 6.43,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
}
]
},
{
"model": "Qwen3.6-27B-UD-Q8_K_XL",
"toolbox": "rocm-7.2.3",
"mode": "mtp-2",
"avg_prompt_tok_s": 97.38,
"avg_tok_s": 12.6,
"accept_rate": 0.7971,
"wall_s_total": 144.29,
"results": [
{
"name": "code_python",
"wall_s": 15.171,
"prompt_n": 30,
"prompt_ms": 527.23,
"prompt_per_second": 56.9,
"predicted_n": 192,
"predicted_per_second": 13.12,
"draft_n": 140,
"draft_n_accepted": 120,
"accept_rate": 0.8571
},
{
"name": "code_cpp",
"wall_s": 15.787,
"prompt_n": 40,
"prompt_ms": 461.35,
"prompt_per_second": 86.7,
"predicted_n": 192,
"predicted_per_second": 12.56,
"draft_n": 147,
"draft_n_accepted": 117,
"accept_rate": 0.7959
},
{
"name": "explain_concept",
"wall_s": 15.203,
"prompt_n": 27,
"prompt_ms": 495.97,
"prompt_per_second": 54.44,
"predicted_n": 192,
"predicted_per_second": 13.09,
"draft_n": 141,
"draft_n_accepted": 120,
"accept_rate": 0.8511
},
{
"name": "summarize",
"wall_s": 14.803,
"prompt_n": 62,
"prompt_ms": 508.33,
"prompt_per_second": 121.97,
"predicted_n": 192,
"predicted_per_second": 13.47,
"draft_n": 137,
"draft_n_accepted": 122,
"accept_rate": 0.8905
},
{
"name": "qa_factual",
"wall_s": 15.822,
"prompt_n": 24,
"prompt_ms": 495.12,
"prompt_per_second": 48.47,
"predicted_n": 192,
"predicted_per_second": 12.56,
"draft_n": 148,
"draft_n_accepted": 117,
"accept_rate": 0.7905
},
{
"name": "translation",
"wall_s": 16.442,
"prompt_n": 25,
"prompt_ms": 496.05,
"prompt_per_second": 50.4,
"predicted_n": 192,
"predicted_per_second": 12.07,
"draft_n": 153,
"draft_n_accepted": 114,
"accept_rate": 0.7451
},
{
"name": "creative_short",
"wall_s": 17.224,
"prompt_n": 21,
"prompt_ms": 492.99,
"prompt_per_second": 42.6,
"predicted_n": 192,
"predicted_per_second": 11.5,
"draft_n": 160,
"draft_n_accepted": 110,
"accept_rate": 0.6875
},
{
"name": "stepwise_math",
"wall_s": 15.005,
"prompt_n": 60,
"prompt_ms": 503.13,
"prompt_per_second": 119.25,
"predicted_n": 192,
"predicted_per_second": 13.28,
"draft_n": 139,
"draft_n_accepted": 121,
"accept_rate": 0.8705
},
{
"name": "long_code_review",
"wall_s": 18.833,
"prompt_n": 731,
"prompt_ms": 2471.97,
"prompt_per_second": 295.72,
"predicted_n": 192,
"predicted_per_second": 11.76,
"draft_n": 156,
"draft_n_accepted": 112,
"accept_rate": 0.7179
}
]
},
{
"model": "Qwen3.6-27B-UD-Q8_K_XL",
"toolbox": "rocm-7.2.3",
"mode": "mtp-3",
"avg_prompt_tok_s": 95.5,
"avg_tok_s": 13.7,
"accept_rate": 0.744,
"wall_s_total": 133.44,
"results": [
{
"name": "code_python",
"wall_s": 13.55,
"prompt_n": 30,
"prompt_ms": 542.34,
"prompt_per_second": 55.32,
"predicted_n": 192,
"predicted_per_second": 14.78,
"draft_n": 163,
"draft_n_accepted": 136,
"accept_rate": 0.8344
},
{
"name": "code_cpp",
"wall_s": 14.924,
"prompt_n": 40,
"prompt_ms": 473.5,
"prompt_per_second": 84.48,
"predicted_n": 192,
"predicted_per_second": 13.32,
"draft_n": 181,
"draft_n_accepted": 130,
"accept_rate": 0.7182
},
{
"name": "explain_concept",
"wall_s": 14.489,
"prompt_n": 27,
"prompt_ms": 510.02,
"prompt_per_second": 52.94,
"predicted_n": 192,
"predicted_per_second": 13.77,
"draft_n": 175,
"draft_n_accepted": 132,
"accept_rate": 0.7543
},
{
"name": "summarize",
"wall_s": 13.973,
"prompt_n": 62,
"prompt_ms": 522.38,
"prompt_per_second": 118.69,
"predicted_n": 192,
"predicted_per_second": 14.32,
"draft_n": 168,
"draft_n_accepted": 134,
"accept_rate": 0.7976
},
{
"name": "qa_factual",
"wall_s": 14.491,
"prompt_n": 24,
"prompt_ms": 508.76,
"prompt_per_second": 47.17,
"predicted_n": 192,
"predicted_per_second": 13.77,
"draft_n": 176,
"draft_n_accepted": 132,
"accept_rate": 0.75
},
{
"name": "translation",
"wall_s": 15.448,
"prompt_n": 25,
"prompt_ms": 508.67,
"prompt_per_second": 49.15,
"predicted_n": 192,
"predicted_per_second": 12.88,
"draft_n": 189,
"draft_n_accepted": 128,
"accept_rate": 0.6772
},
{
"name": "creative_short",
"wall_s": 15.432,
"prompt_n": 21,
"prompt_ms": 505.51,
"prompt_per_second": 41.54,
"predicted_n": 192,
"predicted_per_second": 12.9,
"draft_n": 187,
"draft_n_accepted": 128,
"accept_rate": 0.6845
},
{
"name": "stepwise_math",
"wall_s": 13.969,
"prompt_n": 60,
"prompt_ms": 516.53,
"prompt_per_second": 116.16,
"predicted_n": 192,
"predicted_per_second": 14.31,
"draft_n": 168,
"draft_n_accepted": 134,
"accept_rate": 0.7976
},
{
"name": "long_code_review",
"wall_s": 17.16,
"prompt_n": 731,
"prompt_ms": 2486.03,
"prompt_per_second": 294.04,
"predicted_n": 192,
"predicted_per_second": 13.12,
"draft_n": 183,
"draft_n_accepted": 129,
"accept_rate": 0.7049
}
]
},
{
"model": "Qwen3.6-27B-UD-Q8_K_XL",
"toolbox": "vulkan-radv",
"mode": "baseline",
"avg_prompt_tok_s": 97.03,
"avg_tok_s": 6.3,
"accept_rate": null,
"wall_s_total": 280.34,
"results": [
{
"name": "code_python",
"wall_s": 30.889,
"prompt_n": 30,
"prompt_ms": 497.72,
"prompt_per_second": 60.27,
"predicted_n": 192,
"predicted_per_second": 6.32,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "code_cpp",
"wall_s": 30.922,
"prompt_n": 40,
"prompt_ms": 513.86,
"prompt_per_second": 77.84,
"predicted_n": 192,
"predicted_per_second": 6.32,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "explain_concept",
"wall_s": 30.807,
"prompt_n": 27,
"prompt_ms": 404.26,
"prompt_per_second": 66.79,
"predicted_n": 192,
"predicted_per_second": 6.33,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "summarize",
"wall_s": 30.907,
"prompt_n": 62,
"prompt_ms": 490.66,
"prompt_per_second": 126.36,
"predicted_n": 192,
"predicted_per_second": 6.32,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "qa_factual",
"wall_s": 30.809,
"prompt_n": 24,
"prompt_ms": 400.38,
"prompt_per_second": 59.94,
"predicted_n": 192,
"predicted_per_second": 6.32,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "translation",
"wall_s": 30.815,
"prompt_n": 25,
"prompt_ms": 401.5,
"prompt_per_second": 62.27,
"predicted_n": 192,
"predicted_per_second": 6.32,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "creative_short",
"wall_s": 30.809,
"prompt_n": 21,
"prompt_ms": 397.41,
"prompt_per_second": 52.84,
"predicted_n": 192,
"predicted_per_second": 6.32,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "stepwise_math",
"wall_s": 30.9,
"prompt_n": 60,
"prompt_ms": 488.22,
"prompt_per_second": 122.9,
"predicted_n": 192,
"predicted_per_second": 6.32,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "long_code_review",
"wall_s": 33.483,
"prompt_n": 731,
"prompt_ms": 2995.01,
"prompt_per_second": 244.07,
"predicted_n": 192,
"predicted_per_second": 6.31,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
}
]
},
{
"model": "Qwen3.6-27B-UD-Q8_K_XL",
"toolbox": "vulkan-radv",
"mode": "mtp-2",
"avg_prompt_tok_s": 88.14,
"avg_tok_s": 11.7,
"accept_rate": 0.8024,
"wall_s_total": 156.11,
"results": [
{
"name": "code_python",
"wall_s": 16.264,
"prompt_n": 30,
"prompt_ms": 538.04,
"prompt_per_second": 55.76,
"predicted_n": 192,
"predicted_per_second": 12.22,
"draft_n": 140,
"draft_n_accepted": 121,
"accept_rate": 0.8643
},
{
"name": "code_cpp",
"wall_s": 16.926,
"prompt_n": 40,
"prompt_ms": 564.77,
"prompt_per_second": 70.83,
"predicted_n": 192,
"predicted_per_second": 11.76,
"draft_n": 146,
"draft_n_accepted": 118,
"accept_rate": 0.8082
},
{
"name": "explain_concept",
"wall_s": 17.671,
"prompt_n": 27,
"prompt_ms": 438.41,
"prompt_per_second": 61.59,
"predicted_n": 192,
"predicted_per_second": 11.16,
"draft_n": 152,
"draft_n_accepted": 114,
"accept_rate": 0.75
},
{
"name": "summarize",
"wall_s": 15.168,
"prompt_n": 62,
"prompt_ms": 541.89,
"prompt_per_second": 114.41,
"predicted_n": 192,
"predicted_per_second": 13.17,
"draft_n": 130,
"draft_n_accepted": 126,
"accept_rate": 0.9692
},
{
"name": "qa_factual",
"wall_s": 16.558,
"prompt_n": 24,
"prompt_ms": 438.01,
"prompt_per_second": 54.79,
"predicted_n": 192,
"predicted_per_second": 11.95,
"draft_n": 142,
"draft_n_accepted": 119,
"accept_rate": 0.838
},
{
"name": "translation",
"wall_s": 17.52,
"prompt_n": 25,
"prompt_ms": 437.99,
"prompt_per_second": 57.08,
"predicted_n": 192,
"predicted_per_second": 11.27,
"draft_n": 151,
"draft_n_accepted": 115,
"accept_rate": 0.7616
},
{
"name": "creative_short",
"wall_s": 18.359,
"prompt_n": 21,
"prompt_ms": 433.87,
"prompt_per_second": 48.4,
"predicted_n": 192,
"predicted_per_second": 10.74,
"draft_n": 157,
"draft_n_accepted": 111,
"accept_rate": 0.707
},
{
"name": "stepwise_math",
"wall_s": 16.929,
"prompt_n": 60,
"prompt_ms": 530.6,
"prompt_per_second": 113.08,
"predicted_n": 192,
"predicted_per_second": 11.74,
"draft_n": 145,
"draft_n_accepted": 118,
"accept_rate": 0.8138
},
{
"name": "long_code_review",
"wall_s": 20.716,
"prompt_n": 731,
"prompt_ms": 3363.15,
"prompt_per_second": 217.36,
"predicted_n": 192,
"predicted_per_second": 11.1,
"draft_n": 153,
"draft_n_accepted": 114,
"accept_rate": 0.7451
}
]
},
{
"model": "Qwen3.6-27B-UD-Q8_K_XL",
"toolbox": "vulkan-radv",
"mode": "mtp-3",
"avg_prompt_tok_s": 86.42,
"avg_tok_s": 13.3,
"accept_rate": 0.7301,
"wall_s_total": 137.86,
"results": [
{
"name": "code_python",
"wall_s": 13.782,
"prompt_n": 30,
"prompt_ms": 550.68,
"prompt_per_second": 54.48,
"predicted_n": 192,
"predicted_per_second": 14.53,
"draft_n": 163,
"draft_n_accepted": 136,
"accept_rate": 0.8344
},
{
"name": "code_cpp",
"wall_s": 14.755,
"prompt_n": 40,
"prompt_ms": 578.77,
"prompt_per_second": 69.11,
"predicted_n": 192,
"predicted_per_second": 13.58,
"draft_n": 175,
"draft_n_accepted": 132,
"accept_rate": 0.7543
},
{
"name": "explain_concept",
"wall_s": 16.075,
"prompt_n": 27,
"prompt_ms": 452.06,
"prompt_per_second": 59.73,
"predicted_n": 192,
"predicted_per_second": 12.32,
"draft_n": 195,
"draft_n_accepted": 126,
"accept_rate": 0.6462
},
{
"name": "summarize",
"wall_s": 14.065,
"prompt_n": 62,
"prompt_ms": 553.79,
"prompt_per_second": 111.96,
"predicted_n": 192,
"predicted_per_second": 14.26,
"draft_n": 167,
"draft_n_accepted": 135,
"accept_rate": 0.8084
},
{
"name": "qa_factual",
"wall_s": 14.198,
"prompt_n": 24,
"prompt_ms": 450.37,
"prompt_per_second": 53.29,
"predicted_n": 192,
"predicted_per_second": 14.02,
"draft_n": 171,
"draft_n_accepted": 134,
"accept_rate": 0.7836
},
{
"name": "translation",
"wall_s": 14.908,
"prompt_n": 25,
"prompt_ms": 452.28,
"prompt_per_second": 55.28,
"predicted_n": 192,
"predicted_per_second": 13.33,
"draft_n": 179,
"draft_n_accepted": 131,
"accept_rate": 0.7318
},
{
"name": "creative_short",
"wall_s": 16.099,
"prompt_n": 21,
"prompt_ms": 448.93,
"prompt_per_second": 46.78,
"predicted_n": 192,
"predicted_per_second": 12.31,
"draft_n": 192,
"draft_n_accepted": 126,
"accept_rate": 0.6562
},
{
"name": "stepwise_math",
"wall_s": 15.415,
"prompt_n": 60,
"prompt_ms": 543.5,
"prompt_per_second": 110.4,
"predicted_n": 192,
"predicted_per_second": 12.95,
"draft_n": 183,
"draft_n_accepted": 129,
"accept_rate": 0.7049
},
{
"name": "long_code_review",
"wall_s": 18.562,
"prompt_n": 731,
"prompt_ms": 3372.45,
"prompt_per_second": 216.76,
"predicted_n": 192,
"predicted_per_second": 12.68,
"draft_n": 187,
"draft_n_accepted": 128,
"accept_rate": 0.6845
}
]
},
{
"model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
"toolbox": "rocm-7.2.3",
"mode": "baseline",
"avg_prompt_tok_s": 352.21,
"avg_tok_s": 51.1,
"accept_rate": null,
"wall_s_total": 35.79,
"results": [
{
"name": "code_python",
"wall_s": 3.904,
"prompt_n": 30,
"prompt_ms": 136.88,
"prompt_per_second": 219.17,
"predicted_n": 192,
"predicted_per_second": 51.14,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "code_cpp",
"wall_s": 3.901,
"prompt_n": 40,
"prompt_ms": 126.79,
"prompt_per_second": 315.49,
"predicted_n": 192,
"predicted_per_second": 51.15,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "explain_concept",
"wall_s": 3.883,
"prompt_n": 27,
"prompt_ms": 109.03,
"prompt_per_second": 247.65,
"predicted_n": 192,
"predicted_per_second": 51.16,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "summarize",
"wall_s": 3.931,
"prompt_n": 62,
"prompt_ms": 155.31,
"prompt_per_second": 399.2,
"predicted_n": 192,
"predicted_per_second": 51.14,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "qa_factual",
"wall_s": 3.88,
"prompt_n": 24,
"prompt_ms": 104.26,
"prompt_per_second": 230.19,
"predicted_n": 192,
"predicted_per_second": 51.15,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "translation",
"wall_s": 3.879,
"prompt_n": 25,
"prompt_ms": 104.91,
"prompt_per_second": 238.3,
"predicted_n": 192,
"predicted_per_second": 51.15,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "creative_short",
"wall_s": 3.877,
"prompt_n": 21,
"prompt_ms": 101.75,
"prompt_per_second": 206.38,
"predicted_n": 192,
"predicted_per_second": 51.15,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "stepwise_math",
"wall_s": 3.925,
"prompt_n": 60,
"prompt_ms": 148.79,
"prompt_per_second": 403.25,
"predicted_n": 192,
"predicted_per_second": 51.14,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "long_code_review",
"wall_s": 4.606,
"prompt_n": 731,
"prompt_ms": 803.04,
"prompt_per_second": 910.29,
"predicted_n": 192,
"predicted_per_second": 50.8,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
}
]
},
{
"model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
"toolbox": "rocm-7.2.3",
"mode": "mtp-2",
"avg_prompt_tok_s": 309.53,
"avg_tok_s": 67.5,
"accept_rate": 0.8183,
"wall_s_total": 27.94,
"results": [
{
"name": "code_python",
"wall_s": 2.844,
"prompt_n": 30,
"prompt_ms": 168.12,
"prompt_per_second": 178.44,
"predicted_n": 192,
"predicted_per_second": 72.12,
"draft_n": 133,
"draft_n_accepted": 123,
"accept_rate": 0.9248
},
{
"name": "code_cpp",
"wall_s": 2.906,
"prompt_n": 40,
"prompt_ms": 141.09,
"prompt_per_second": 283.51,
"predicted_n": 192,
"predicted_per_second": 69.99,
"draft_n": 139,
"draft_n_accepted": 121,
"accept_rate": 0.8705
},
{
"name": "explain_concept",
"wall_s": 3.202,
"prompt_n": 27,
"prompt_ms": 120.53,
"prompt_per_second": 224.02,
"predicted_n": 192,
"predicted_per_second": 62.77,
"draft_n": 156,
"draft_n_accepted": 113,
"accept_rate": 0.7244
},
{
"name": "summarize",
"wall_s": 2.894,
"prompt_n": 62,
"prompt_ms": 173.04,
"prompt_per_second": 358.31,
"predicted_n": 192,
"predicted_per_second": 71.13,
"draft_n": 136,
"draft_n_accepted": 122,
"accept_rate": 0.8971
},
{
"name": "qa_factual",
"wall_s": 2.88,
"prompt_n": 24,
"prompt_ms": 115.19,
"prompt_per_second": 208.36,
"predicted_n": 192,
"predicted_per_second": 70.01,
"draft_n": 139,
"draft_n_accepted": 121,
"accept_rate": 0.8705
},
{
"name": "translation",
"wall_s": 2.995,
"prompt_n": 25,
"prompt_ms": 116.14,
"prompt_per_second": 215.25,
"predicted_n": 192,
"predicted_per_second": 67.18,
"draft_n": 145,
"draft_n_accepted": 118,
"accept_rate": 0.8138
},
{
"name": "creative_short",
"wall_s": 3.171,
"prompt_n": 21,
"prompt_ms": 112.34,
"prompt_per_second": 186.93,
"predicted_n": 192,
"predicted_per_second": 63.23,
"draft_n": 154,
"draft_n_accepted": 113,
"accept_rate": 0.7338
},
{
"name": "stepwise_math",
"wall_s": 3.037,
"prompt_n": 60,
"prompt_ms": 161.11,
"prompt_per_second": 372.42,
"predicted_n": 192,
"predicted_per_second": 67.25,
"draft_n": 145,
"draft_n_accepted": 118,
"accept_rate": 0.8138
},
{
"name": "long_code_review",
"wall_s": 4.009,
"prompt_n": 731,
"prompt_ms": 963.68,
"prompt_per_second": 758.55,
"predicted_n": 192,
"predicted_per_second": 63.52,
"draft_n": 152,
"draft_n_accepted": 114,
"accept_rate": 0.75
}
]
},
{
"model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
"toolbox": "rocm-7.2.3",
"mode": "mtp-3",
"avg_prompt_tok_s": 302.26,
"avg_tok_s": 70.3,
"accept_rate": 0.7386,
"wall_s_total": 27.0,
"results": [
{
"name": "code_python",
"wall_s": 2.789,
"prompt_n": 30,
"prompt_ms": 165.27,
"prompt_per_second": 181.52,
"predicted_n": 192,
"predicted_per_second": 73.57,
"draft_n": 168,
"draft_n_accepted": 134,
"accept_rate": 0.7976
},
{
"name": "code_cpp",
"wall_s": 2.714,
"prompt_n": 40,
"prompt_ms": 146.13,
"prompt_per_second": 273.74,
"predicted_n": 192,
"predicted_per_second": 75.39,
"draft_n": 165,
"draft_n_accepted": 135,
"accept_rate": 0.8182
},
{
"name": "explain_concept",
"wall_s": 3.246,
"prompt_n": 27,
"prompt_ms": 126.13,
"prompt_per_second": 214.07,
"predicted_n": 192,
"predicted_per_second": 62.01,
"draft_n": 201,
"draft_n_accepted": 123,
"accept_rate": 0.6119
},
{
"name": "summarize",
"wall_s": 2.664,
"prompt_n": 62,
"prompt_ms": 178.24,
"prompt_per_second": 347.84,
"predicted_n": 192,
"predicted_per_second": 77.93,
"draft_n": 157,
"draft_n_accepted": 137,
"accept_rate": 0.8726
},
{
"name": "qa_factual",
"wall_s": 2.762,
"prompt_n": 24,
"prompt_ms": 119.88,
"prompt_per_second": 200.2,
"predicted_n": 192,
"predicted_per_second": 73.32,
"draft_n": 169,
"draft_n_accepted": 134,
"accept_rate": 0.7929
},
{
"name": "translation",
"wall_s": 2.875,
"prompt_n": 25,
"prompt_ms": 121.36,
"prompt_per_second": 206.0,
"predicted_n": 192,
"predicted_per_second": 70.29,
"draft_n": 177,
"draft_n_accepted": 131,
"accept_rate": 0.7401
},
{
"name": "creative_short",
"wall_s": 3.157,
"prompt_n": 21,
"prompt_ms": 116.34,
"prompt_per_second": 180.5,
"predicted_n": 192,
"predicted_per_second": 63.6,
"draft_n": 197,
"draft_n_accepted": 125,
"accept_rate": 0.6345
},
{
"name": "stepwise_math",
"wall_s": 2.912,
"prompt_n": 60,
"prompt_ms": 165.66,
"prompt_per_second": 362.18,
"predicted_n": 192,
"predicted_per_second": 70.46,
"draft_n": 177,
"draft_n_accepted": 131,
"accept_rate": 0.7401
},
{
"name": "long_code_review",
"wall_s": 3.882,
"prompt_n": 731,
"prompt_ms": 969.08,
"prompt_per_second": 754.32,
"predicted_n": 192,
"predicted_per_second": 66.48,
"draft_n": 184,
"draft_n_accepted": 128,
"accept_rate": 0.6957
}
]
},
{
"model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
"toolbox": "vulkan-radv",
"mode": "baseline",
"avg_prompt_tok_s": 302.53,
"avg_tok_s": 59.4,
"accept_rate": null,
"wall_s_total": 31.46,
"results": [
{
"name": "code_python",
"wall_s": 3.594,
"prompt_n": 30,
"prompt_ms": 314.93,
"prompt_per_second": 95.26,
"predicted_n": 192,
"predicted_per_second": 58.8,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "code_cpp",
"wall_s": 3.475,
"prompt_n": 40,
"prompt_ms": 227.75,
"prompt_per_second": 175.63,
"predicted_n": 192,
"predicted_per_second": 59.52,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "explain_concept",
"wall_s": 3.363,
"prompt_n": 27,
"prompt_ms": 119.62,
"prompt_per_second": 225.72,
"predicted_n": 192,
"predicted_per_second": 59.6,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "summarize",
"wall_s": 3.429,
"prompt_n": 62,
"prompt_ms": 175.54,
"prompt_per_second": 353.2,
"predicted_n": 192,
"predicted_per_second": 59.39,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "qa_factual",
"wall_s": 3.41,
"prompt_n": 24,
"prompt_ms": 108.41,
"prompt_per_second": 221.39,
"predicted_n": 192,
"predicted_per_second": 58.66,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "translation",
"wall_s": 3.347,
"prompt_n": 25,
"prompt_ms": 112.61,
"prompt_per_second": 222.01,
"predicted_n": 192,
"predicted_per_second": 59.87,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "creative_short",
"wall_s": 3.355,
"prompt_n": 21,
"prompt_ms": 106.77,
"prompt_per_second": 196.69,
"predicted_n": 192,
"predicted_per_second": 59.62,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "stepwise_math",
"wall_s": 3.399,
"prompt_n": 60,
"prompt_ms": 167.04,
"prompt_per_second": 359.19,
"predicted_n": 192,
"predicted_per_second": 59.93,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
},
{
"name": "long_code_review",
"wall_s": 4.093,
"prompt_n": 731,
"prompt_ms": 836.67,
"prompt_per_second": 873.7,
"predicted_n": 192,
"predicted_per_second": 59.5,
"draft_n": 0,
"draft_n_accepted": 0,
"accept_rate": null
}
]
},
{
"model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
"toolbox": "vulkan-radv",
"mode": "mtp-2",
"avg_prompt_tok_s": 255.09,
"avg_tok_s": 74.6,
"accept_rate": 0.7907,
"wall_s_total": 26.13,
"results": [
{
"name": "code_python",
"wall_s": 2.855,
"prompt_n": 30,
"prompt_ms": 373.02,
"prompt_per_second": 80.42,
"predicted_n": 192,
"predicted_per_second": 77.78,
"draft_n": 135,
"draft_n_accepted": 123,
"accept_rate": 0.9111
},
{
"name": "code_cpp",
"wall_s": 2.922,
"prompt_n": 40,
"prompt_ms": 267.27,
"prompt_per_second": 149.66,
"predicted_n": 192,
"predicted_per_second": 72.95,
"draft_n": 149,
"draft_n_accepted": 116,
"accept_rate": 0.7785
},
{
"name": "explain_concept",
"wall_s": 2.881,
"prompt_n": 27,
"prompt_ms": 133.95,
"prompt_per_second": 201.57,
"predicted_n": 192,
"predicted_per_second": 70.44,
"draft_n": 155,
"draft_n_accepted": 113,
"accept_rate": 0.729
},
{
"name": "summarize",
"wall_s": 2.546,
"prompt_n": 62,
"prompt_ms": 202.66,
"prompt_per_second": 305.94,
"predicted_n": 192,
"predicted_per_second": 82.7,
"draft_n": 134,
"draft_n_accepted": 124,
"accept_rate": 0.9254
},
{
"name": "qa_factual",
"wall_s": 2.516,
"prompt_n": 24,
"prompt_ms": 123.14,
"prompt_per_second": 194.89,
"predicted_n": 192,
"predicted_per_second": 80.95,
"draft_n": 137,
"draft_n_accepted": 122,
"accept_rate": 0.8905
},
{
"name": "translation",
"wall_s": 3.02,
"prompt_n": 25,
"prompt_ms": 126.43,
"prompt_per_second": 197.73,
"predicted_n": 192,
"predicted_per_second": 67.01,
"draft_n": 165,
"draft_n_accepted": 107,
"accept_rate": 0.6485
},
{
"name": "creative_short",
"wall_s": 2.989,
"prompt_n": 21,
"prompt_ms": 121.75,
"prompt_per_second": 172.48,
"predicted_n": 192,
"predicted_per_second": 67.62,
"draft_n": 166,
"draft_n_accepted": 108,
"accept_rate": 0.6506
},
{
"name": "stepwise_math",
"wall_s": 2.602,
"prompt_n": 60,
"prompt_ms": 186.44,
"prompt_per_second": 321.81,
"predicted_n": 192,
"predicted_per_second": 80.45,
"draft_n": 137,
"draft_n_accepted": 122,
"accept_rate": 0.8905
},
{
"name": "long_code_review",
"wall_s": 3.8,
"prompt_n": 731,
"prompt_ms": 1088.89,
"prompt_per_second": 671.32,
"predicted_n": 192,
"predicted_per_second": 71.64,
"draft_n": 150,
"draft_n_accepted": 115,
"accept_rate": 0.7667
}
]
},
{
"model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
"toolbox": "vulkan-radv",
"mode": "mtp-3",
"avg_prompt_tok_s": 248.42,
"avg_tok_s": 75.7,
"accept_rate": 0.7374,
"wall_s_total": 25.87,
"results": [
{
"name": "code_python",
"wall_s": 2.738,
"prompt_n": 30,
"prompt_ms": 375.67,
"prompt_per_second": 79.86,
"predicted_n": 192,
"predicted_per_second": 81.75,
"draft_n": 165,
"draft_n_accepted": 136,
"accept_rate": 0.8242
},
{
"name": "code_cpp",
"wall_s": 2.946,
"prompt_n": 40,
"prompt_ms": 274.8,
"prompt_per_second": 145.56,
"predicted_n": 192,
"predicted_per_second": 72.5,
"draft_n": 185,
"draft_n_accepted": 129,
"accept_rate": 0.6973
},
{
"name": "explain_concept",
"wall_s": 2.814,
"prompt_n": 27,
"prompt_ms": 137.5,
"prompt_per_second": 196.37,
"predicted_n": 192,
"predicted_per_second": 72.37,
"draft_n": 185,
"draft_n_accepted": 129,
"accept_rate": 0.6973
},
{
"name": "summarize",
"wall_s": 2.516,
"prompt_n": 62,
"prompt_ms": 208.53,
"prompt_per_second": 297.32,
"predicted_n": 192,
"predicted_per_second": 84.04,
"draft_n": 159,
"draft_n_accepted": 138,
"accept_rate": 0.8679
},
{
"name": "qa_factual",
"wall_s": 2.536,
"prompt_n": 24,
"prompt_ms": 129.09,
"prompt_per_second": 185.91,
"predicted_n": 192,
"predicted_per_second": 80.54,
"draft_n": 165,
"draft_n_accepted": 135,
"accept_rate": 0.8182
},
{
"name": "translation",
"wall_s": 2.8,
"prompt_n": 25,
"prompt_ms": 131.97,
"prompt_per_second": 189.44,
"predicted_n": 192,
"predicted_per_second": 72.77,
"draft_n": 185,
"draft_n_accepted": 129,
"accept_rate": 0.6973
},
{
"name": "creative_short",
"wall_s": 2.943,
"prompt_n": 21,
"prompt_ms": 126.24,
"prompt_per_second": 166.35,
"predicted_n": 192,
"predicted_per_second": 68.88,
"draft_n": 198,
"draft_n_accepted": 125,
"accept_rate": 0.6313
},
{
"name": "stepwise_math",
"wall_s": 2.553,
"prompt_n": 60,
"prompt_ms": 192.58,
"prompt_per_second": 311.56,
"predicted_n": 192,
"predicted_per_second": 82.33,
"draft_n": 164,
"draft_n_accepted": 136,
"accept_rate": 0.8293
},
{
"name": "long_code_review",
"wall_s": 4.027,
"prompt_n": 731,
"prompt_ms": 1101.86,
"prompt_per_second": 663.43,
"predicted_n": 192,
"predicted_per_second": 66.37,
"draft_n": 197,
"draft_n_accepted": 125,
"accept_rate": 0.6345
}
]
}
]