switched model to Q6 quant

This commit is contained in:
Bryan Ramos 2026-04-14 23:14:33 -04:00
parent 32e213dc69
commit 6eff5326d2

View file

@ -9,8 +9,8 @@
"apiKey": "{env:LLAMA_API_KEY}" "apiKey": "{env:LLAMA_API_KEY}"
}, },
"models": { "models": {
"llamacpp/Qwen3-Coder-30B-A3B-Instruct-Q8_0": { "llamacpp/Qwen3-Coder-30B-A3B-Instruct-Q6_K": {
"name": "Qwen3-Coder-30B-A3B-Instruct-Q8", "name": "Qwen3-Coder-30B-A3B-Instruct-Q6",
"limit": { "limit": {
"context": 262144, "context": 262144,
"output": 262144 "output": 262144
@ -35,5 +35,5 @@
"enabled": true "enabled": true
} }
}, },
"model": "llama-stack/llamacpp/Qwen3-Coder-30B-A3B-Instruct-Q8_0" "model": "llama-stack/llamacpp/Qwen3-Coder-30B-A3B-Instruct-Q6_K"
} }