mirror of
https://github.com/itme-brain/nixos.git
synced 2026-05-08 14:50:12 -04:00
58 lines
2 KiB
Django/Jinja
58 lines
2 KiB
Django/Jinja
{% raw %}# List available recipes
|
|
[private]
|
|
default:
|
|
@just --list
|
|
|
|
# Show currently running vLLM server
|
|
status:
|
|
@pgrep -a -f "vllm serve" || echo "No vLLM server running"
|
|
|
|
# Tail the vLLM log
|
|
logs:
|
|
@tail -f {% endraw %}{{ vllm_home }}/vllm.log{% raw %}
|
|
|
|
# Stop any running vLLM server and wait for VRAM to free
|
|
stop:
|
|
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
if pgrep -f "vllm serve" > /dev/null; then
|
|
echo "Stopping vLLM..."
|
|
pkill -TERM -f "vllm serve" || true
|
|
sleep 2
|
|
pkill -KILL -f "vllm serve" 2>/dev/null || true
|
|
fi
|
|
echo "Waiting for VRAM to release..."
|
|
for i in $(seq 1 30); do
|
|
used=$(rocm-smi --showmeminfo vram 2>/dev/null | awk '/VRAM Total Used Memory/ {print $NF}')
|
|
total=$(rocm-smi --showmeminfo vram 2>/dev/null | awk '/VRAM Total Memory \(B\)/ {print $NF}')
|
|
if [ -n "$used" ] && [ -n "$total" ] && [ "$total" -gt 0 ]; then
|
|
pct=$(( used * 100 / total ))
|
|
echo " VRAM: ${pct}%"
|
|
if [ "$pct" -lt 10 ]; then
|
|
echo "VRAM free."
|
|
exit 0
|
|
fi
|
|
fi
|
|
sleep 2
|
|
done
|
|
echo "Warning: VRAM did not fully release after 60s"
|
|
{% endraw %}
|
|
{% for model in vllm_models_list %}
|
|
# Serve {{ model.name }}
|
|
{{ model.recipe }}: stop
|
|
#!/usr/bin/env bash
|
|
source {{ vllm_home }}/.bashrc
|
|
nohup {{ vllm_home }}/vllm-venv/bin/vllm serve {{ vllm_models }}/{{ model.dir }} \
|
|
--served-model-name {{ model.name }} \
|
|
--host 0.0.0.0 \
|
|
--port {{ vllm_port }} \
|
|
--api-key ${VLLM_API_KEY} \
|
|
--dtype auto \
|
|
--max-model-len {{ model.max_model_len }} \
|
|
--gpu-memory-utilization {{ vllm_gpu_memory_utilization }}{% if model.tool_call_parser is defined %} \
|
|
--enable-auto-tool-choice \
|
|
--tool-call-parser {{ model.tool_call_parser }}{% endif %} \
|
|
> {{ vllm_home }}/vllm.log 2>&1 &
|
|
echo "Started {{ model.name }} (pid $!). Run 'just logs' to follow."
|
|
|
|
{% endfor %}
|