diff --git a/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__rocm-7.2.3-mtp__baseline.json b/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__rocm-7.2.3-mtp__baseline.json new file mode 100644 index 0000000..7c5c8a5 --- /dev/null +++ b/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__rocm-7.2.3-mtp__baseline.json @@ -0,0 +1,99 @@ +{ + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "gguf": "Qwen3.6-27B-MTP-GGUF/Qwen3.6-27B-UD-Q8_K_XL.gguf", + "toolbox": "rocm-7.2.3-mtp", + "mode": "baseline", + "spec_flags": "(none)", + "timestamp": "2026-05-15T10:35:06.760655", + "results": [ + { + "name": "code_python", + "wall_s": 30.169, + "predicted_n": 192, + "predicted_per_second": 6.48, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "code_cpp", + "wall_s": 30.159, + "predicted_n": 192, + "predicted_per_second": 6.47, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "explain_concept", + "wall_s": 30.15, + "predicted_n": 192, + "predicted_per_second": 6.48, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "summarize", + "wall_s": 30.154, + "predicted_n": 192, + "predicted_per_second": 6.48, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "qa_factual", + "wall_s": 30.148, + "predicted_n": 192, + "predicted_per_second": 6.48, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "translation", + "wall_s": 30.148, + "predicted_n": 192, + "predicted_per_second": 6.48, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "creative_short", + "wall_s": 30.142, + "predicted_n": 192, + "predicted_per_second": 6.48, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "stepwise_math", + "wall_s": 30.154, + "predicted_n": 192, + "predicted_per_second": 6.48, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "long_code_review", + "wall_s": 32.168, + "predicted_n": 192, + "predicted_per_second": 6.46, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + } + ], + "aggregate": { + "n_requests": 9, + "total_predicted": 1728, + "total_draft": 0, + "total_draft_accepted": 0, + "aggregate_accept_rate": null, + "wall_s_total": 273.39 + } +} diff --git a/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__rocm-7.2.3-mtp__mtp-2.json b/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__rocm-7.2.3-mtp__mtp-2.json new file mode 100644 index 0000000..737decf --- /dev/null +++ b/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__rocm-7.2.3-mtp__mtp-2.json @@ -0,0 +1,99 @@ +{ + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "gguf": "Qwen3.6-27B-MTP-GGUF/Qwen3.6-27B-UD-Q8_K_XL.gguf", + "toolbox": "rocm-7.2.3-mtp", + "mode": "mtp-2", + "spec_flags": "--spec-type draft-mtp --spec-draft-n-max 2 -np 1", + "timestamp": "2026-05-15T10:37:45.833024", + "results": [ + { + "name": "code_python", + "wall_s": 15.461, + "predicted_n": 192, + "predicted_per_second": 12.88, + "draft_n": 140, + "draft_n_accepted": 120, + "accept_rate": 0.8571 + }, + { + "name": "code_cpp", + "wall_s": 16.091, + "predicted_n": 192, + "predicted_per_second": 12.33, + "draft_n": 147, + "draft_n_accepted": 117, + "accept_rate": 0.7959 + }, + { + "name": "explain_concept", + "wall_s": 15.53, + "predicted_n": 192, + "predicted_per_second": 12.81, + "draft_n": 141, + "draft_n_accepted": 120, + "accept_rate": 0.8511 + }, + { + "name": "summarize", + "wall_s": 15.094, + "predicted_n": 192, + "predicted_per_second": 13.22, + "draft_n": 137, + "draft_n_accepted": 122, + "accept_rate": 0.8905 + }, + { + "name": "qa_factual", + "wall_s": 16.124, + "predicted_n": 192, + "predicted_per_second": 12.32, + "draft_n": 148, + "draft_n_accepted": 117, + "accept_rate": 0.7905 + }, + { + "name": "translation", + "wall_s": 16.753, + "predicted_n": 192, + "predicted_per_second": 11.85, + "draft_n": 153, + "draft_n_accepted": 114, + "accept_rate": 0.7451 + }, + { + "name": "creative_short", + "wall_s": 17.547, + "predicted_n": 192, + "predicted_per_second": 11.29, + "draft_n": 160, + "draft_n_accepted": 110, + "accept_rate": 0.6875 + }, + { + "name": "stepwise_math", + "wall_s": 15.305, + "predicted_n": 192, + "predicted_per_second": 13.03, + "draft_n": 139, + "draft_n_accepted": 121, + "accept_rate": 0.8705 + }, + { + "name": "long_code_review", + "wall_s": 19.408, + "predicted_n": 192, + "predicted_per_second": 11.54, + "draft_n": 156, + "draft_n_accepted": 112, + "accept_rate": 0.7179 + } + ], + "aggregate": { + "n_requests": 9, + "total_predicted": 1728, + "total_draft": 1321, + "total_draft_accepted": 1053, + "aggregate_accept_rate": 0.7971, + "wall_s_total": 147.31 + } +} diff --git a/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__rocm-7.2.3-mtp__mtp-3.json b/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__rocm-7.2.3-mtp__mtp-3.json new file mode 100644 index 0000000..1602647 --- /dev/null +++ b/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__rocm-7.2.3-mtp__mtp-3.json @@ -0,0 +1,99 @@ +{ + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "gguf": "Qwen3.6-27B-MTP-GGUF/Qwen3.6-27B-UD-Q8_K_XL.gguf", + "toolbox": "rocm-7.2.3-mtp", + "mode": "mtp-3", + "spec_flags": "--spec-type draft-mtp --spec-draft-n-max 3 -np 1", + "timestamp": "2026-05-15T10:40:12.760327", + "results": [ + { + "name": "code_python", + "wall_s": 13.774, + "predicted_n": 192, + "predicted_per_second": 14.54, + "draft_n": 163, + "draft_n_accepted": 136, + "accept_rate": 0.8344 + }, + { + "name": "code_cpp", + "wall_s": 15.093, + "predicted_n": 192, + "predicted_per_second": 13.19, + "draft_n": 181, + "draft_n_accepted": 130, + "accept_rate": 0.7182 + }, + { + "name": "explain_concept", + "wall_s": 14.643, + "predicted_n": 192, + "predicted_per_second": 13.63, + "draft_n": 175, + "draft_n_accepted": 132, + "accept_rate": 0.7543 + }, + { + "name": "summarize", + "wall_s": 14.134, + "predicted_n": 192, + "predicted_per_second": 14.17, + "draft_n": 168, + "draft_n_accepted": 134, + "accept_rate": 0.7976 + }, + { + "name": "qa_factual", + "wall_s": 14.651, + "predicted_n": 192, + "predicted_per_second": 13.63, + "draft_n": 176, + "draft_n_accepted": 132, + "accept_rate": 0.75 + }, + { + "name": "translation", + "wall_s": 15.613, + "predicted_n": 192, + "predicted_per_second": 12.75, + "draft_n": 189, + "draft_n_accepted": 128, + "accept_rate": 0.6772 + }, + { + "name": "creative_short", + "wall_s": 15.599, + "predicted_n": 192, + "predicted_per_second": 12.76, + "draft_n": 187, + "draft_n_accepted": 128, + "accept_rate": 0.6845 + }, + { + "name": "stepwise_math", + "wall_s": 14.128, + "predicted_n": 192, + "predicted_per_second": 14.17, + "draft_n": 168, + "draft_n_accepted": 134, + "accept_rate": 0.7976 + }, + { + "name": "long_code_review", + "wall_s": 17.563, + "predicted_n": 192, + "predicted_per_second": 12.99, + "draft_n": 183, + "draft_n_accepted": 129, + "accept_rate": 0.7049 + } + ], + "aggregate": { + "n_requests": 9, + "total_predicted": 1728, + "total_draft": 1590, + "total_draft_accepted": 1183, + "aggregate_accept_rate": 0.744, + "wall_s_total": 135.2 + } +} diff --git a/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__vulkan-radv-mtp__baseline.json b/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__vulkan-radv-mtp__baseline.json new file mode 100644 index 0000000..fbf6557 --- /dev/null +++ b/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__vulkan-radv-mtp__baseline.json @@ -0,0 +1,99 @@ +{ + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "gguf": "Qwen3.6-27B-MTP-GGUF/Qwen3.6-27B-UD-Q8_K_XL.gguf", + "toolbox": "vulkan-radv-mtp", + "mode": "baseline", + "spec_flags": "(none)", + "timestamp": "2026-05-15T10:47:21.199537", + "results": [ + { + "name": "code_python", + "wall_s": 31.21, + "predicted_n": 192, + "predicted_per_second": 6.31, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "code_cpp", + "wall_s": 31.194, + "predicted_n": 192, + "predicted_per_second": 6.32, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "explain_concept", + "wall_s": 31.126, + "predicted_n": 192, + "predicted_per_second": 6.32, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "summarize", + "wall_s": 31.223, + "predicted_n": 192, + "predicted_per_second": 6.32, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "qa_factual", + "wall_s": 31.103, + "predicted_n": 192, + "predicted_per_second": 6.32, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "translation", + "wall_s": 31.113, + "predicted_n": 192, + "predicted_per_second": 6.32, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "creative_short", + "wall_s": 31.093, + "predicted_n": 192, + "predicted_per_second": 6.32, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "stepwise_math", + "wall_s": 31.2, + "predicted_n": 192, + "predicted_per_second": 6.32, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "long_code_review", + "wall_s": 34.595, + "predicted_n": 192, + "predicted_per_second": 6.31, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + } + ], + "aggregate": { + "n_requests": 9, + "total_predicted": 1728, + "total_draft": 0, + "total_draft_accepted": 0, + "aggregate_accept_rate": null, + "wall_s_total": 283.86 + } +} diff --git a/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__vulkan-radv-mtp__mtp-2.json b/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__vulkan-radv-mtp__mtp-2.json new file mode 100644 index 0000000..164b263 --- /dev/null +++ b/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__vulkan-radv-mtp__mtp-2.json @@ -0,0 +1,99 @@ +{ + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "gguf": "Qwen3.6-27B-MTP-GGUF/Qwen3.6-27B-UD-Q8_K_XL.gguf", + "toolbox": "vulkan-radv-mtp", + "mode": "mtp-2", + "spec_flags": "--spec-type draft-mtp --spec-draft-n-max 2 -np 1", + "timestamp": "2026-05-15T10:50:15.329789", + "results": [ + { + "name": "code_python", + "wall_s": 16.53, + "predicted_n": 192, + "predicted_per_second": 12.25, + "draft_n": 140, + "draft_n_accepted": 121, + "accept_rate": 0.8643 + }, + { + "name": "code_cpp", + "wall_s": 17.205, + "predicted_n": 192, + "predicted_per_second": 11.75, + "draft_n": 146, + "draft_n_accepted": 118, + "accept_rate": 0.8082 + }, + { + "name": "explain_concept", + "wall_s": 17.95, + "predicted_n": 192, + "predicted_per_second": 11.18, + "draft_n": 152, + "draft_n_accepted": 114, + "accept_rate": 0.75 + }, + { + "name": "summarize", + "wall_s": 15.481, + "predicted_n": 192, + "predicted_per_second": 13.18, + "draft_n": 130, + "draft_n_accepted": 126, + "accept_rate": 0.9692 + }, + { + "name": "qa_factual", + "wall_s": 16.827, + "predicted_n": 192, + "predicted_per_second": 11.96, + "draft_n": 142, + "draft_n_accepted": 119, + "accept_rate": 0.838 + }, + { + "name": "translation", + "wall_s": 17.797, + "predicted_n": 192, + "predicted_per_second": 11.29, + "draft_n": 151, + "draft_n_accepted": 115, + "accept_rate": 0.7616 + }, + { + "name": "creative_short", + "wall_s": 18.605, + "predicted_n": 192, + "predicted_per_second": 10.76, + "draft_n": 157, + "draft_n_accepted": 111, + "accept_rate": 0.707 + }, + { + "name": "stepwise_math", + "wall_s": 17.225, + "predicted_n": 192, + "predicted_per_second": 11.75, + "draft_n": 145, + "draft_n_accepted": 118, + "accept_rate": 0.8138 + }, + { + "name": "long_code_review", + "wall_s": 21.792, + "predicted_n": 192, + "predicted_per_second": 11.12, + "draft_n": 153, + "draft_n_accepted": 114, + "accept_rate": 0.7451 + } + ], + "aggregate": { + "n_requests": 9, + "total_predicted": 1728, + "total_draft": 1316, + "total_draft_accepted": 1056, + "aggregate_accept_rate": 0.8024, + "wall_s_total": 159.41 + } +} diff --git a/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__vulkan-radv-mtp__mtp-3.json b/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__vulkan-radv-mtp__mtp-3.json new file mode 100644 index 0000000..911d870 --- /dev/null +++ b/benchmark/results-mtp/Qwen3.6-27B-UD-Q8_K_XL__vulkan-radv-mtp__mtp-3.json @@ -0,0 +1,99 @@ +{ + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "gguf": "Qwen3.6-27B-MTP-GGUF/Qwen3.6-27B-UD-Q8_K_XL.gguf", + "toolbox": "vulkan-radv-mtp", + "mode": "mtp-3", + "spec_flags": "--spec-type draft-mtp --spec-draft-n-max 3 -np 1", + "timestamp": "2026-05-15T10:52:51.775869", + "results": [ + { + "name": "code_python", + "wall_s": 14.105, + "predicted_n": 192, + "predicted_per_second": 14.52, + "draft_n": 163, + "draft_n_accepted": 136, + "accept_rate": 0.8344 + }, + { + "name": "code_cpp", + "wall_s": 15.083, + "predicted_n": 192, + "predicted_per_second": 13.52, + "draft_n": 175, + "draft_n_accepted": 132, + "accept_rate": 0.7543 + }, + { + "name": "explain_concept", + "wall_s": 16.414, + "predicted_n": 192, + "predicted_per_second": 12.29, + "draft_n": 195, + "draft_n_accepted": 126, + "accept_rate": 0.6462 + }, + { + "name": "summarize", + "wall_s": 14.446, + "predicted_n": 192, + "predicted_per_second": 14.2, + "draft_n": 167, + "draft_n_accepted": 135, + "accept_rate": 0.8084 + }, + { + "name": "qa_factual", + "wall_s": 14.5, + "predicted_n": 192, + "predicted_per_second": 14.0, + "draft_n": 171, + "draft_n_accepted": 134, + "accept_rate": 0.7836 + }, + { + "name": "translation", + "wall_s": 15.252, + "predicted_n": 192, + "predicted_per_second": 13.29, + "draft_n": 179, + "draft_n_accepted": 131, + "accept_rate": 0.7318 + }, + { + "name": "creative_short", + "wall_s": 16.421, + "predicted_n": 192, + "predicted_per_second": 12.28, + "draft_n": 192, + "draft_n_accepted": 126, + "accept_rate": 0.6562 + }, + { + "name": "stepwise_math", + "wall_s": 15.78, + "predicted_n": 192, + "predicted_per_second": 12.91, + "draft_n": 183, + "draft_n_accepted": 129, + "accept_rate": 0.7049 + }, + { + "name": "long_code_review", + "wall_s": 19.742, + "predicted_n": 192, + "predicted_per_second": 12.64, + "draft_n": 187, + "draft_n_accepted": 128, + "accept_rate": 0.6845 + } + ], + "aggregate": { + "n_requests": 9, + "total_predicted": 1728, + "total_draft": 1612, + "total_draft_accepted": 1177, + "aggregate_accept_rate": 0.7301, + "wall_s_total": 141.74 + } +} diff --git a/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7.2.3-mtp__baseline.json b/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7.2.3-mtp__baseline.json new file mode 100644 index 0000000..c691416 --- /dev/null +++ b/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7.2.3-mtp__baseline.json @@ -0,0 +1,99 @@ +{ + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "gguf": "Qwen3.6-35B-A3B-MTP-GGUF/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf", + "toolbox": "rocm-7.2.3-mtp", + "mode": "baseline", + "spec_flags": "(none)", + "timestamp": "2026-05-15T10:41:02.007668", + "results": [ + { + "name": "code_python", + "wall_s": 4.09, + "predicted_n": 192, + "predicted_per_second": 48.73, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "code_cpp", + "wall_s": 4.097, + "predicted_n": 192, + "predicted_per_second": 48.72, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "explain_concept", + "wall_s": 4.08, + "predicted_n": 192, + "predicted_per_second": 48.73, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "summarize", + "wall_s": 4.124, + "predicted_n": 192, + "predicted_per_second": 48.75, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "qa_factual", + "wall_s": 4.067, + "predicted_n": 192, + "predicted_per_second": 48.78, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "translation", + "wall_s": 4.067, + "predicted_n": 192, + "predicted_per_second": 48.78, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "creative_short", + "wall_s": 4.065, + "predicted_n": 192, + "predicted_per_second": 48.77, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "stepwise_math", + "wall_s": 4.124, + "predicted_n": 192, + "predicted_per_second": 48.68, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "long_code_review", + "wall_s": 4.831, + "predicted_n": 192, + "predicted_per_second": 48.38, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + } + ], + "aggregate": { + "n_requests": 9, + "total_predicted": 1728, + "total_draft": 0, + "total_draft_accepted": 0, + "aggregate_accept_rate": null, + "wall_s_total": 37.55 + } +} diff --git a/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7.2.3-mtp__mtp-2.json b/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7.2.3-mtp__mtp-2.json new file mode 100644 index 0000000..6bf1e5e --- /dev/null +++ b/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7.2.3-mtp__mtp-2.json @@ -0,0 +1,99 @@ +{ + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "gguf": "Qwen3.6-35B-A3B-MTP-GGUF/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf", + "toolbox": "rocm-7.2.3-mtp", + "mode": "mtp-2", + "spec_flags": "--spec-type draft-mtp --spec-draft-n-max 2 -np 1", + "timestamp": "2026-05-15T10:41:43.075333", + "results": [ + { + "name": "code_python", + "wall_s": 2.929, + "predicted_n": 192, + "predicted_per_second": 70.03, + "draft_n": 133, + "draft_n_accepted": 123, + "accept_rate": 0.9248 + }, + { + "name": "code_cpp", + "wall_s": 3.521, + "predicted_n": 192, + "predicted_per_second": 57.34, + "draft_n": 165, + "draft_n_accepted": 108, + "accept_rate": 0.6545 + }, + { + "name": "explain_concept", + "wall_s": 3.261, + "predicted_n": 192, + "predicted_per_second": 61.76, + "draft_n": 154, + "draft_n_accepted": 114, + "accept_rate": 0.7403 + }, + { + "name": "summarize", + "wall_s": 2.958, + "predicted_n": 192, + "predicted_per_second": 69.68, + "draft_n": 135, + "draft_n_accepted": 123, + "accept_rate": 0.9111 + }, + { + "name": "qa_factual", + "wall_s": 2.965, + "predicted_n": 192, + "predicted_per_second": 68.0, + "draft_n": 139, + "draft_n_accepted": 121, + "accept_rate": 0.8705 + }, + { + "name": "translation", + "wall_s": 3.095, + "predicted_n": 192, + "predicted_per_second": 65.08, + "draft_n": 145, + "draft_n_accepted": 118, + "accept_rate": 0.8138 + }, + { + "name": "creative_short", + "wall_s": 3.273, + "predicted_n": 192, + "predicted_per_second": 61.28, + "draft_n": 154, + "draft_n_accepted": 113, + "accept_rate": 0.7338 + }, + { + "name": "stepwise_math", + "wall_s": 3.132, + "predicted_n": 192, + "predicted_per_second": 65.26, + "draft_n": 145, + "draft_n_accepted": 118, + "accept_rate": 0.8138 + }, + { + "name": "long_code_review", + "wall_s": 4.194, + "predicted_n": 192, + "predicted_per_second": 61.74, + "draft_n": 152, + "draft_n_accepted": 114, + "accept_rate": 0.75 + } + ], + "aggregate": { + "n_requests": 9, + "total_predicted": 1728, + "total_draft": 1322, + "total_draft_accepted": 1052, + "aggregate_accept_rate": 0.7958, + "wall_s_total": 29.33 + } +} diff --git a/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7.2.3-mtp__mtp-3.json b/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7.2.3-mtp__mtp-3.json new file mode 100644 index 0000000..58a3e07 --- /dev/null +++ b/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7.2.3-mtp__mtp-3.json @@ -0,0 +1,99 @@ +{ + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "gguf": "Qwen3.6-35B-A3B-MTP-GGUF/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf", + "toolbox": "rocm-7.2.3-mtp", + "mode": "mtp-3", + "spec_flags": "--spec-type draft-mtp --spec-draft-n-max 3 -np 1", + "timestamp": "2026-05-15T10:42:22.608352", + "results": [ + { + "name": "code_python", + "wall_s": 2.881, + "predicted_n": 192, + "predicted_per_second": 71.3, + "draft_n": 168, + "draft_n_accepted": 134, + "accept_rate": 0.7976 + }, + { + "name": "code_cpp", + "wall_s": 2.798, + "predicted_n": 192, + "predicted_per_second": 73.24, + "draft_n": 165, + "draft_n_accepted": 135, + "accept_rate": 0.8182 + }, + { + "name": "explain_concept", + "wall_s": 3.341, + "predicted_n": 192, + "predicted_per_second": 60.29, + "draft_n": 201, + "draft_n_accepted": 123, + "accept_rate": 0.6119 + }, + { + "name": "summarize", + "wall_s": 2.74, + "predicted_n": 192, + "predicted_per_second": 75.85, + "draft_n": 157, + "draft_n_accepted": 137, + "accept_rate": 0.8726 + }, + { + "name": "qa_factual", + "wall_s": 2.841, + "predicted_n": 192, + "predicted_per_second": 71.37, + "draft_n": 169, + "draft_n_accepted": 134, + "accept_rate": 0.7929 + }, + { + "name": "translation", + "wall_s": 2.956, + "predicted_n": 192, + "predicted_per_second": 68.4, + "draft_n": 177, + "draft_n_accepted": 131, + "accept_rate": 0.7401 + }, + { + "name": "creative_short", + "wall_s": 3.255, + "predicted_n": 192, + "predicted_per_second": 61.75, + "draft_n": 197, + "draft_n_accepted": 125, + "accept_rate": 0.6345 + }, + { + "name": "stepwise_math", + "wall_s": 2.997, + "predicted_n": 192, + "predicted_per_second": 68.52, + "draft_n": 177, + "draft_n_accepted": 131, + "accept_rate": 0.7401 + }, + { + "name": "long_code_review", + "wall_s": 4.024, + "predicted_n": 192, + "predicted_per_second": 64.34, + "draft_n": 184, + "draft_n_accepted": 128, + "accept_rate": 0.6957 + } + ], + "aggregate": { + "n_requests": 9, + "total_predicted": 1728, + "total_draft": 1595, + "total_draft_accepted": 1178, + "aggregate_accept_rate": 0.7386, + "wall_s_total": 27.83 + } +} diff --git a/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan-radv-mtp__baseline.json b/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan-radv-mtp__baseline.json new file mode 100644 index 0000000..3c40a59 --- /dev/null +++ b/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan-radv-mtp__baseline.json @@ -0,0 +1,99 @@ +{ + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "gguf": "Qwen3.6-35B-A3B-MTP-GGUF/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf", + "toolbox": "vulkan-radv-mtp", + "mode": "baseline", + "spec_flags": "(none)", + "timestamp": "2026-05-15T10:53:35.323487", + "results": [ + { + "name": "code_python", + "wall_s": 3.609, + "predicted_n": 192, + "predicted_per_second": 58.67, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "code_cpp", + "wall_s": 3.562, + "predicted_n": 192, + "predicted_per_second": 58.2, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "explain_concept", + "wall_s": 3.399, + "predicted_n": 192, + "predicted_per_second": 59.04, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "summarize", + "wall_s": 3.459, + "predicted_n": 192, + "predicted_per_second": 59.03, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "qa_factual", + "wall_s": 3.394, + "predicted_n": 192, + "predicted_per_second": 59.06, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "translation", + "wall_s": 3.418, + "predicted_n": 192, + "predicted_per_second": 58.67, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "creative_short", + "wall_s": 3.441, + "predicted_n": 192, + "predicted_per_second": 58.2, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "stepwise_math", + "wall_s": 3.459, + "predicted_n": 192, + "predicted_per_second": 59.0, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + }, + { + "name": "long_code_review", + "wall_s": 4.187, + "predicted_n": 192, + "predicted_per_second": 58.63, + "draft_n": 0, + "draft_n_accepted": 0, + "accept_rate": null + } + ], + "aggregate": { + "n_requests": 9, + "total_predicted": 1728, + "total_draft": 0, + "total_draft_accepted": 0, + "aggregate_accept_rate": null, + "wall_s_total": 31.93 + } +} diff --git a/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan-radv-mtp__mtp-2.json b/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan-radv-mtp__mtp-2.json new file mode 100644 index 0000000..0f5c088 --- /dev/null +++ b/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan-radv-mtp__mtp-2.json @@ -0,0 +1,99 @@ +{ + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "gguf": "Qwen3.6-35B-A3B-MTP-GGUF/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf", + "toolbox": "vulkan-radv-mtp", + "mode": "mtp-2", + "spec_flags": "--spec-type draft-mtp --spec-draft-n-max 2 -np 1", + "timestamp": "2026-05-15T10:54:13.838672", + "results": [ + { + "name": "code_python", + "wall_s": 2.89, + "predicted_n": 192, + "predicted_per_second": 77.15, + "draft_n": 135, + "draft_n_accepted": 123, + "accept_rate": 0.9111 + }, + { + "name": "code_cpp", + "wall_s": 2.956, + "predicted_n": 192, + "predicted_per_second": 72.36, + "draft_n": 149, + "draft_n_accepted": 116, + "accept_rate": 0.7785 + }, + { + "name": "explain_concept", + "wall_s": 2.952, + "predicted_n": 192, + "predicted_per_second": 68.88, + "draft_n": 155, + "draft_n_accepted": 113, + "accept_rate": 0.729 + }, + { + "name": "summarize", + "wall_s": 2.652, + "predicted_n": 192, + "predicted_per_second": 79.57, + "draft_n": 134, + "draft_n_accepted": 124, + "accept_rate": 0.9254 + }, + { + "name": "qa_factual", + "wall_s": 2.609, + "predicted_n": 192, + "predicted_per_second": 78.21, + "draft_n": 137, + "draft_n_accepted": 122, + "accept_rate": 0.8905 + }, + { + "name": "translation", + "wall_s": 3.09, + "predicted_n": 192, + "predicted_per_second": 65.59, + "draft_n": 165, + "draft_n_accepted": 107, + "accept_rate": 0.6485 + }, + { + "name": "creative_short", + "wall_s": 3.109, + "predicted_n": 192, + "predicted_per_second": 65.06, + "draft_n": 166, + "draft_n_accepted": 108, + "accept_rate": 0.6506 + }, + { + "name": "stepwise_math", + "wall_s": 2.695, + "predicted_n": 192, + "predicted_per_second": 77.85, + "draft_n": 137, + "draft_n_accepted": 122, + "accept_rate": 0.8905 + }, + { + "name": "long_code_review", + "wall_s": 3.899, + "predicted_n": 192, + "predicted_per_second": 70.62, + "draft_n": 150, + "draft_n_accepted": 115, + "accept_rate": 0.7667 + } + ], + "aggregate": { + "n_requests": 9, + "total_predicted": 1728, + "total_draft": 1328, + "total_draft_accepted": 1050, + "aggregate_accept_rate": 0.7907, + "wall_s_total": 26.85 + } +} diff --git a/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan-radv-mtp__mtp-3.json b/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan-radv-mtp__mtp-3.json new file mode 100644 index 0000000..b82fcfe --- /dev/null +++ b/benchmark/results-mtp/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan-radv-mtp__mtp-3.json @@ -0,0 +1,99 @@ +{ + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "gguf": "Qwen3.6-35B-A3B-MTP-GGUF/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf", + "toolbox": "vulkan-radv-mtp", + "mode": "mtp-3", + "spec_flags": "--spec-type draft-mtp --spec-draft-n-max 3 -np 1", + "timestamp": "2026-05-15T10:54:51.776894", + "results": [ + { + "name": "code_python", + "wall_s": 2.793, + "predicted_n": 192, + "predicted_per_second": 80.47, + "draft_n": 165, + "draft_n_accepted": 136, + "accept_rate": 0.8242 + }, + { + "name": "code_cpp", + "wall_s": 3.042, + "predicted_n": 192, + "predicted_per_second": 70.24, + "draft_n": 185, + "draft_n_accepted": 129, + "accept_rate": 0.6973 + }, + { + "name": "explain_concept", + "wall_s": 2.863, + "predicted_n": 192, + "predicted_per_second": 71.33, + "draft_n": 185, + "draft_n_accepted": 129, + "accept_rate": 0.6973 + }, + { + "name": "summarize", + "wall_s": 2.568, + "predicted_n": 192, + "predicted_per_second": 82.57, + "draft_n": 159, + "draft_n_accepted": 138, + "accept_rate": 0.8679 + }, + { + "name": "qa_factual", + "wall_s": 2.584, + "predicted_n": 192, + "predicted_per_second": 79.2, + "draft_n": 165, + "draft_n_accepted": 135, + "accept_rate": 0.8182 + }, + { + "name": "translation", + "wall_s": 2.84, + "predicted_n": 192, + "predicted_per_second": 71.91, + "draft_n": 185, + "draft_n_accepted": 129, + "accept_rate": 0.6973 + }, + { + "name": "creative_short", + "wall_s": 2.992, + "predicted_n": 192, + "predicted_per_second": 67.9, + "draft_n": 198, + "draft_n_accepted": 125, + "accept_rate": 0.6313 + }, + { + "name": "stepwise_math", + "wall_s": 2.602, + "predicted_n": 192, + "predicted_per_second": 81.07, + "draft_n": 164, + "draft_n_accepted": 136, + "accept_rate": 0.8293 + }, + { + "name": "long_code_review", + "wall_s": 4.076, + "predicted_n": 192, + "predicted_per_second": 66.39, + "draft_n": 197, + "draft_n_accepted": 125, + "accept_rate": 0.6345 + } + ], + "aggregate": { + "n_requests": 9, + "total_predicted": 1728, + "total_draft": 1603, + "total_draft_accepted": 1182, + "aggregate_accept_rate": 0.7374, + "wall_s_total": 26.36 + } +} diff --git a/benchmark/results-mtp/summary.json b/benchmark/results-mtp/summary.json new file mode 100644 index 0000000..a5544bf --- /dev/null +++ b/benchmark/results-mtp/summary.json @@ -0,0 +1,98 @@ +[ + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "toolbox": "rocm-7.2.3-mtp", + "mode": "baseline", + "avg_tok_s": 6.5, + "accept_rate": null, + "wall_s_total": 273.39 + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "toolbox": "rocm-7.2.3-mtp", + "mode": "mtp-2", + "avg_tok_s": 12.4, + "accept_rate": 0.7971, + "wall_s_total": 147.31 + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "toolbox": "rocm-7.2.3-mtp", + "mode": "mtp-3", + "avg_tok_s": 13.5, + "accept_rate": 0.744, + "wall_s_total": 135.2 + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "toolbox": "vulkan-radv-mtp", + "mode": "baseline", + "avg_tok_s": 6.3, + "accept_rate": null, + "wall_s_total": 283.86 + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "toolbox": "vulkan-radv-mtp", + "mode": "mtp-2", + "avg_tok_s": 11.7, + "accept_rate": 0.8024, + "wall_s_total": 159.41 + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "toolbox": "vulkan-radv-mtp", + "mode": "mtp-3", + "avg_tok_s": 13.3, + "accept_rate": 0.7301, + "wall_s_total": 141.74 + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "toolbox": "rocm-7.2.3-mtp", + "mode": "baseline", + "avg_tok_s": 48.7, + "accept_rate": null, + "wall_s_total": 37.55 + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "toolbox": "rocm-7.2.3-mtp", + "mode": "mtp-2", + "avg_tok_s": 64.5, + "accept_rate": 0.7958, + "wall_s_total": 29.33 + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "toolbox": "rocm-7.2.3-mtp", + "mode": "mtp-3", + "avg_tok_s": 68.3, + "accept_rate": 0.7386, + "wall_s_total": 27.83 + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "toolbox": "vulkan-radv-mtp", + "mode": "baseline", + "avg_tok_s": 58.7, + "accept_rate": null, + "wall_s_total": 31.93 + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "toolbox": "vulkan-radv-mtp", + "mode": "mtp-2", + "avg_tok_s": 72.8, + "accept_rate": 0.7907, + "wall_s_total": 26.85 + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "toolbox": "vulkan-radv-mtp", + "mode": "mtp-3", + "avg_tok_s": 74.6, + "accept_rate": 0.7374, + "wall_s_total": 26.36 + } +] diff --git a/benchmark/results-mtp/system_info.json b/benchmark/results-mtp/system_info.json new file mode 100644 index 0000000..10c3b27 --- /dev/null +++ b/benchmark/results-mtp/system_info.json @@ -0,0 +1,6 @@ +{ + "distro": "Fedora Linux 43 (Workstation Edition)", + "kernel": "6.19.12-200.fc43.x86_64", + "linux_firmware": "linux-firmware-20260309-1.fc43.noarch", + "timestamp": "2026-05-15 10:30:23" +}