92 lines
2.1 KiB
Plaintext
92 lines
2.1 KiB
Plaintext
version = 1
|
|
|
|
[*]
|
|
threads = 12
|
|
flash-attn = on
|
|
mlock = off
|
|
mmap = off
|
|
fit = off
|
|
warmup = off
|
|
batch-size = 4096
|
|
ubatch-size = 512
|
|
cache-type-k = q8_0
|
|
cache-type-v = q8_0
|
|
jinja = true
|
|
direct-io = on
|
|
cache-prompt = true
|
|
cache-reuse = 256
|
|
cache-ram = 32768
|
|
|
|
# --- MODELS ---
|
|
|
|
[unsloth-Qwen3-Coder-Next]
|
|
model = /path/to/models/unsloth-Qwen3-Coder-Next/UD-Q6_K_XL/Qwen3-Coder-Next-UD-Q6_K_XL-00001-of-00003.gguf
|
|
n-gpu-layers = 999
|
|
ctx-size = 128000
|
|
temp = 1.0
|
|
top-p = 0.95
|
|
top-k = 40
|
|
min-p = 0.01
|
|
|
|
[unsloth-GLM-4.7-Flash]
|
|
model = /path/to/models/unsloth-GLM-4.7-Flash-GGUF/GLM-4.7-Flash-UD-Q6_K_XL.gguf
|
|
n-gpu-layers = 999
|
|
ctx-size = 128000
|
|
temp = 0.7
|
|
top-p = 1.0
|
|
min-p = 0.01
|
|
|
|
[gpt-oss-120b]
|
|
model = /path/to/models/gpt-oss-120/ud-q8_k_xl/gpt-oss-120b-UD-Q8_K_XL-00001-of-00002.gguf
|
|
alias = gpt-120b
|
|
n-gpu-layers = 999
|
|
ctx-size = 65536
|
|
temp = 0.8
|
|
min-p = 0.05
|
|
chat-template-kwargs = {"reasoning_effort": "high"}
|
|
|
|
[llama-4-scout]
|
|
model = /path/to/models/llama4-scout-17b-16e/unsloth/q4_k_xl/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
|
|
mmproj = /path/to/models/llama4-scout-17b-16e/unsloth/q4_k_xl/unsloth-Llama-4-Scout-17B-16E-Instruct-GGUF-mmproj-BF16.gguf
|
|
alias = llama-4
|
|
n-gpu-layers = 999
|
|
ctx-size = 65536
|
|
|
|
[unsloth-MiniMax-M2.5]
|
|
model = /path/to/models/unsloth-MiniMax-M2.5-GGUF/UD-IQ3_XXS/MiniMax-M2.5-UD-IQ3_XXS-00001-of-00003.gguf
|
|
n-gpu-layers = 999
|
|
ctx-size = 128000
|
|
temp = 1.0
|
|
top-p = 0.95
|
|
top-k = 40
|
|
min-p = 0.01
|
|
|
|
[unsloth-Qwen3.5-397B-A17B]
|
|
model = /path/to/models/unsloth-Qwen3.5-397B-A17B/Qwen3.5-397B-A17B-UD-TQ1_0.gguf
|
|
mmproj = /path/to/models/unsloth-Qwen3.5-397B-A17B/mmproj-BF16.gguf
|
|
alias = qwen3.5
|
|
n-gpu-layers = 999
|
|
ctx-size = 128000
|
|
temp = 0.6
|
|
top-p = 0.95
|
|
top-k = 20
|
|
min-p = 0.0
|
|
cache-type-k = q4_0
|
|
cache-type-v = q4_0
|
|
chat-template-kwargs = {"enable_thinking": true}
|
|
ubatch-size = 256
|
|
cache-reuse = 0
|
|
|
|
[unsloth-Qwen3.5-122B-A10B]
|
|
model = /path/to/models/unsloth-Qwen3.5-122B-A10B-GGUF/Qwen3.5-122B-A10B-MXFP4_MOE-00001-of-00003.gguf
|
|
alias = qwen3.5-122b
|
|
n-gpu-layers = 999
|
|
ctx-size = 128000
|
|
temp = 1.0
|
|
top-p = 0.95
|
|
top-k = 20
|
|
min-p = 0.0
|
|
ubatch-size = 2048
|
|
cache-type-k = q4_0
|
|
cache-type-v = q4_0
|
|
cache-reuse = 0 |