switched model to Q6 quant

This commit is contained in:
Bryan Ramos 2026-04-14 23:14:33 -04:00
parent 32e213dc69
commit 6eff5326d2

View file

@ -9,8 +9,8 @@
"apiKey": "{env:LLAMA_API_KEY}"
},
"models": {
"llamacpp/Qwen3-Coder-30B-A3B-Instruct-Q8_0": {
"name": "Qwen3-Coder-30B-A3B-Instruct-Q8",
"llamacpp/Qwen3-Coder-30B-A3B-Instruct-Q6_K": {
"name": "Qwen3-Coder-30B-A3B-Instruct-Q6",
"limit": {
"context": 262144,
"output": 262144
@ -35,5 +35,5 @@
"enabled": true
}
},
"model": "llama-stack/llamacpp/Qwen3-Coder-30B-A3B-Instruct-Q8_0"
"model": "llama-stack/llamacpp/Qwen3-Coder-30B-A3B-Instruct-Q6_K"
}