smaller output capacity in order to maintain strong tok/s gen speed

This commit is contained in:
Bryan Ramos 2026-04-15 08:59:11 -04:00
parent 9d5559e2b4
commit d6e0e9f2d9

View file

@ -13,7 +13,7 @@
"name": "Qwen3-Coder-30B-A3B-Instruct-Q6", "name": "Qwen3-Coder-30B-A3B-Instruct-Q6",
"limit": { "limit": {
"context": 262144, "context": 262144,
"output": 262144 "output": 8192
}, },
"cost": { "cost": {
"input": 0, "input": 0,