init

2026-05-08 06:50:11 -04:00 · 2026-04-15 20:58:07 -04:00 · 2026-04-15 20:58:07 -04:00 · 864c69fe61
commit 864c69fe61
147 changed files with 11233 additions and 0 deletions
--- a/external/rigby/README.md
+++ b/external/rigby/README.md
@ -0,0 +1,64 @@
+# Rigby Recovery
+
+This directory contains disaster-recovery automation for `rigby`, the Ubuntu
+AI rig at `192.168.0.23`.
+
+## Scope
+
+This automation manages the host state after a manual Ubuntu install.
+
+It is intended to restore the working state we validated for:
+
+- AMD ROCm `7.2.1`
+- `amdgpu-dkms`
+- `amdgpu.cwsr_enable=0`
+- pinned ComfyUI checkout
+- `uv`-managed Python `3.13` venv
+- ROCm PyTorch
+- ComfyUI service layout
+- output sharing over Samba
+- required groups and permissions
+
+## Manual Prerequisites
+
+These are intentionally documented, not automated:
+
+- Install Ubuntu `24.04.4`
+- Update BIOS to the known-good version for the board
+- Verify BIOS settings:
+  - `Above 4G Decoding = Enabled`
+  - `SVM = Enabled`
+  - UEFI boot
+  - sane PCIe slot configuration
+- Ensure host SSH is reachable as `bryan`
+- Ensure passwordless sudo works for `bryan`
+- Ensure the initial DHCP lease is known so recovery can begin
+
+## Recovery Flow
+
+1. Install Ubuntu manually.
+2. Clone this repository onto the operator machine.
+3. From the repo root, run `just rigby-check HOST=<rigby-ip>`.
+4. Run `just rigby-recover HOST=<rigby-ip>`.
+5. Reboot `rigby`.
+6. Validate:
+   - `rocminfo`
+   - `rocm-smi`
+   - ComfyUI startup
+
+## Notes
+
+- The AMD repo and package installs are automated here, but BIOS and physical
+  host setup remain manual.
+- ComfyUI itself is deployed as an application under `/home/comfy/ComfyUI`.
+- The `comfyui.service` unit is installed but left disabled so the service is
+  started on demand.
+- Models, LoRAs, VAEs, outputs, and other AI assets are not restored by this
+  automation. `rigby` is the source of truth for that data, so disaster
+  recovery for models requires a separate backup strategy.
+- The `just` entrypoints accept `HOST=<ip>` so recovery does not depend on a
+  fixed DHCP lease.
+- Recovery installs the configured SSH key for `bryan`.
+- Static IP configuration is applied at the end of the playbook via netplan.
+  The SSH session used for recovery may be interrupted once the new address is
+  applied, and subsequent access should use the final static IP.
--- a/external/rigby/ansible.cfg
+++ b/external/rigby/ansible.cfg
@ -0,0 +1,9 @@
+[defaults]
+inventory = inventory.ini
+host_key_checking = False
+stdout_callback = yaml
+retry_files_enabled = False
+interpreter_python = auto_silent
+
+[ssh_connection]
+pipelining = True
--- a/external/rigby/inventory.ini
+++ b/external/rigby/inventory.ini
@ -0,0 +1,2 @@
+[ai_rig]
+rigby ansible_host=192.168.0.23 ansible_user=bryan
--- a/external/rigby/playbooks/recover.yml
+++ b/external/rigby/playbooks/recover.yml
@ -0,0 +1,442 @@
+---
+- name: Recover rigby AI rig
+  hosts: ai_rig
+  become: true
+  vars:
+    rigby_user: bryan
+    rigby_recovery_ssh_keys:
+      - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDl4895aB9P5p/lp8Hq5rHun4clvhyTSHFi3U2d6OOBoW5Fm+VcQnW/xbjmCBsXk5BdiowsBxQhwnzdfz/KJL7J5RobomUEaVRwb9UwT88eJveLp14BG8j2J3SjfyhrCX+4jkPx0bPQk1HGcuYY+tPEXf1q/ps88Dhu0CARBIzYQOTYY6b1qWzxpDoFZGHjKG8g5iY6FIu65yKKvvVy1f8IgZ3l3IpwBWVamxgkTcYY0QYSrmzo1n7TXxwrWbvenAqBsQ0cBPs+gVa3uIr+1TJl0Az5SElBVGu3LvUdlk58trtPUj6TQR3YUkg7Vjll7WHOdqhux5ZQNhjkOsHerf0Tw86e6cEzgeTuIbQHIb0LcsUunwKcuh2+au7RO599cvHn0+xZE5MZBxloDDaJ3JsiliM8kyPP/U3ERj03cWLW7BqbT+sfjAOl21RCzk0iQxk1wt/8VmtCr9Adv7IyrtaYvf/bwRP+g+9ldmzKGt8Mdb605uVzZ70H/LLm17f40Te+QHaex5by/6p6cuwEEZtgIg53Wpglu0rA6UxrBfQEHKl/Jt3FLeE0mnEyYkkR2MnHNtyWRIXtuqYZMAm2Ub1pFHH7jQV1gGiDVTw6a2eIwK21a/hXtRjFUpFd1nB1n+KNfJBE4zT3wm3Ud7mKw/6rWnoRyhYZvGXkFdp+iEs49Q== itme-brain@github/78120816
+    rigby_static_network_enabled: true
+    rigby_interface: eno1
+    rigby_static_ip: 192.168.0.23/24
+    rigby_gateway: 192.168.0.1
+    rigby_dns:
+      - 192.168.0.1
+      - 1.1.1.1
+    comfy_user: comfy
+    comfy_group: comfy
+    comfy_home: /home/comfy
+    comfy_root: /home/comfy/ComfyUI
+    comfy_venv: /home/comfy/comfy-venv
+    comfy_python_version: "3.13"
+    comfy_port: 8188
+    comfy_output_dir: /home/comfy/ComfyUI/output
+    comfy_repo_url: https://github.com/comfy-org/ComfyUI
+    comfy_repo_version: a1344238901efc5ea199d8094cb16fca36ceb28b
+    comfy_manager_version: "4.1"
+    comfy_torch_index_url: https://download.pytorch.org/whl/rocm7.2
+    grub_cmdline_linux_default: "amdgpu.cwsr_enable=0"
+    amd_driver_deb: amdgpu-install_7.2.1.70201-1_all.deb
+    amd_driver_url: https://repo.radeon.com/amdgpu-install/7.2.1/ubuntu/noble/amdgpu-install_7.2.1.70201-1_all.deb
+    rigby_packages:
+      - curl
+      - git
+      - rsync
+      - software-properties-common
+      - python-is-python3
+      - python3.13
+      - python3.13-venv
+      - python3.13-dev
+      - build-essential
+      - linux-headers-{{ ansible_kernel }}
+      - linux-modules-extra-{{ ansible_kernel }}
+      - samba
+      - just
+      - python3.12
+      - python3.12-venv
+      - docker.io
+    vllm_user: vllm
+    vllm_home: /home/vllm
+    vllm_venv: /home/vllm/vllm-venv
+    vllm_models: /home/vllm/models
+    vllm_port: 8000
+    vllm_gpu_memory_utilization: "0.95"
+    vllm_rocm_wheels_url: https://wheels.vllm.ai/rocm/0.19.0/rocm721
+    vllm_models_list:
+      - name: Qwen2.5-Coder-14B
+        recipe: coder
+        dir: Qwen2.5-Coder-14B-Instruct
+        max_model_len: 4096
+      - name: Qwen2.5-7B-Instruct
+        recipe: qwen7b
+        dir: Qwen2.5-7B-Instruct
+        max_model_len: 8192
+        tool_call_parser: hermes
+    librechat_root: /home/bryan/LibreChat
+    librechat_repo_url: https://github.com/danny-avila/LibreChat
+  tasks:
+    - name: Ensure deadsnakes PPA is configured
+      ansible.builtin.apt_repository:
+        repo: ppa:deadsnakes/ppa
+        state: present
+        update_cache: true
+
+    - name: Install required Ubuntu packages
+      ansible.builtin.apt:
+        name: "{{ rigby_packages }}"
+        state: present
+        update_cache: true
+
+    - name: Ensure AMD installer package is present
+      ansible.builtin.get_url:
+        url: "{{ amd_driver_url }}"
+        dest: "/tmp/{{ amd_driver_deb }}"
+        mode: "0644"
+
+    - name: Install AMD installer package
+      ansible.builtin.apt:
+        deb: "/tmp/{{ amd_driver_deb }}"
+        state: present
+
+    - name: Install AMD GPU DKMS driver
+      ansible.builtin.apt:
+        name: amdgpu-dkms
+        state: present
+        update_cache: true
+
+    - name: Install ROCm stack
+      ansible.builtin.apt:
+        name: rocm
+        state: present
+
+    - name: Ensure required groups exist
+      ansible.builtin.group:
+        name: "{{ item }}"
+        state: present
+      loop:
+        - render
+        - video
+        - "{{ comfy_group }}"
+
+    - name: Ensure comfy user exists
+      ansible.builtin.user:
+        name: "{{ comfy_user }}"
+        group: "{{ comfy_group }}"
+        groups:
+          - render
+          - video
+        append: true
+        create_home: true
+        shell: /bin/bash
+
+    - name: Ensure bryan is in required groups
+      ansible.builtin.user:
+        name: "{{ rigby_user }}"
+        groups:
+          - render
+          - video
+          - "{{ comfy_group }}"
+        append: true
+
+    - name: Ensure recovery SSH keys are present for bryan
+      ansible.posix.authorized_key:
+        user: "{{ rigby_user }}"
+        state: present
+        key: "{{ item }}"
+      loop: "{{ rigby_recovery_ssh_keys }}"
+
+    - name: Configure GRUB default kernel args
+      ansible.builtin.lineinfile:
+        path: /etc/default/grub
+        regexp: '^GRUB_CMDLINE_LINUX_DEFAULT='
+        line: 'GRUB_CMDLINE_LINUX_DEFAULT="{{ grub_cmdline_linux_default }}"'
+
+    - name: Ensure GRUB menu is shown
+      ansible.builtin.lineinfile:
+        path: /etc/default/grub
+        regexp: '^{{ item.key }}='
+        line: "{{ item.key }}={{ item.value }}"
+      loop:
+        - { key: GRUB_TIMEOUT_STYLE, value: "menu" }
+        - { key: GRUB_TIMEOUT, value: "5" }
+
+    - name: Regenerate grub config
+      ansible.builtin.command: update-grub
+      changed_when: true
+
+    - name: Ensure Comfy directories exist
+      ansible.builtin.file:
+        path: "{{ item.path }}"
+        state: directory
+        owner: "{{ comfy_user }}"
+        group: "{{ comfy_group }}"
+        mode: "{{ item.mode }}"
+      loop:
+        - { path: "{{ comfy_home }}", mode: "0775" }
+        - { path: "{{ comfy_root }}", mode: "0775" }
+        - { path: "{{ comfy_output_dir }}", mode: "2775" }
+        - { path: "{{ comfy_home }}/.local/bin", mode: "0775" }
+        - { path: "{{ comfy_home }}/piptmp", mode: "0775" }
+
+    - name: Ensure uv is installed for comfy
+      ansible.builtin.shell: |
+        set -euo pipefail
+        curl -LsSf https://astral.sh/uv/install.sh | sh
+      args:
+        creates: "{{ comfy_home }}/.local/bin/uv"
+      become_user: "{{ comfy_user }}"
+
+    - name: Ensure ComfyUI repo is present at pinned revision
+      ansible.builtin.git:
+        repo: "{{ comfy_repo_url }}"
+        dest: "{{ comfy_root }}"
+        version: "{{ comfy_repo_version }}"
+        update: true
+      become_user: "{{ comfy_user }}"
+
+    - name: Ensure ComfyUI venv exists
+      ansible.builtin.command:
+        argv:
+          - "{{ comfy_home }}/.local/bin/uv"
+          - venv
+          - --python
+          - "{{ comfy_python_version }}"
+          - "{{ comfy_venv }}"
+      args:
+        creates: "{{ comfy_venv }}/bin/python"
+      become_user: "{{ comfy_user }}"
+
+    - name: Install base Python packaging tools in Comfy venv
+      ansible.builtin.command:
+        argv:
+          - "{{ comfy_home }}/.local/bin/uv"
+          - pip
+          - install
+          - --python
+          - "{{ comfy_venv }}/bin/python"
+          - --upgrade
+          - pip
+          - setuptools
+          - wheel
+      become_user: "{{ comfy_user }}"
+
+    - name: Install ROCm PyTorch in Comfy venv
+      ansible.builtin.command:
+        argv:
+          - "{{ comfy_home }}/.local/bin/uv"
+          - pip
+          - install
+          - --python
+          - "{{ comfy_venv }}/bin/python"
+          - --index-url
+          - "{{ comfy_torch_index_url }}"
+          - torch
+          - torchvision
+          - torchaudio
+      environment:
+        TMPDIR: "{{ comfy_home }}/piptmp"
+      become_user: "{{ comfy_user }}"
+
+    - name: Install ComfyUI requirements in Comfy venv
+      ansible.builtin.command:
+        argv:
+          - "{{ comfy_home }}/.local/bin/uv"
+          - pip
+          - install
+          - --python
+          - "{{ comfy_venv }}/bin/python"
+          - -r
+          - "{{ comfy_root }}/requirements.txt"
+      environment:
+        TMPDIR: "{{ comfy_home }}/piptmp"
+      become_user: "{{ comfy_user }}"
+
+    - name: Install ComfyUI-Manager in Comfy venv
+      ansible.builtin.command:
+        argv:
+          - "{{ comfy_home }}/.local/bin/uv"
+          - pip
+          - install
+          - --python
+          - "{{ comfy_venv }}/bin/python"
+          - "comfyui-manager=={{ comfy_manager_version }}"
+      environment:
+        TMPDIR: "{{ comfy_home }}/piptmp"
+      become_user: "{{ comfy_user }}"
+
+    - name: Ensure output directories have group inheritance
+      ansible.builtin.shell: |
+        set -euo pipefail
+        find "{{ comfy_output_dir }}" -type d -exec chown {{ comfy_user }}:{{ comfy_group }} {} +
+        find "{{ comfy_output_dir }}" -type d -exec chmod 2775 {} +
+      changed_when: true
+
+    - name: Ensure output files are group writable
+      ansible.builtin.shell: |
+        set -euo pipefail
+        find "{{ comfy_output_dir }}" -type f -exec chown {{ comfy_user }}:{{ comfy_group }} {} +
+        find "{{ comfy_output_dir }}" -type f -exec chmod 0664 {} +
+      changed_when: true
+
+    - name: Install ComfyUI systemd unit
+      ansible.builtin.template:
+        src: ../templates/comfyui.service.j2
+        dest: /etc/systemd/system/comfyui.service
+        owner: root
+        group: root
+        mode: "0644"
+
+    - name: Ensure Samba include directory exists
+      ansible.builtin.file:
+        path: /etc/samba/smb.conf.d
+        state: directory
+        owner: root
+        group: root
+        mode: "0755"
+
+    - name: Install Samba share config for Comfy outputs
+      ansible.builtin.template:
+        src: ../templates/comfy-output.conf.j2
+        dest: /etc/samba/smb.conf.d/comfy-output.conf
+        owner: root
+        group: root
+        mode: "0644"
+
+    - name: Ensure Samba includes conf.d snippets
+      ansible.builtin.blockinfile:
+        path: /etc/samba/smb.conf
+        marker: "; {mark} ANSIBLE MANAGED COMFY OUTPUT INCLUDE"
+        block: |
+          include = /etc/samba/smb.conf.d/comfy-output.conf
+
+    - name: Reload systemd
+      ansible.builtin.systemd_service:
+        daemon_reload: true
+
+    - name: Ensure ComfyUI service is installed but disabled
+      ansible.builtin.systemd_service:
+        name: comfyui.service
+        enabled: false
+
+    - name: Ensure Samba service is enabled and running
+      ansible.builtin.systemd_service:
+        name: smbd.service
+        enabled: true
+        state: started
+
+    - name: Install netplan static IP config for rigby
+      ansible.builtin.template:
+        src: ../templates/99-rigby-static.yaml.j2
+        dest: /etc/netplan/99-rigby-static.yaml
+        owner: root
+        group: root
+        mode: "0644"
+      when: rigby_static_network_enabled | bool
+
+    - name: Apply static netplan configuration as final step
+      ansible.builtin.command: netplan apply
+      when: rigby_static_network_enabled | bool
+      changed_when: true
+
+    # --- vLLM ---
+
+    - name: Ensure vllm user exists
+      ansible.builtin.user:
+        name: "{{ vllm_user }}"
+        groups:
+          - render
+          - video
+        append: true
+        create_home: true
+        shell: /bin/bash
+
+    - name: Ensure vllm models directory exists
+      ansible.builtin.file:
+        path: "{{ vllm_models }}"
+        state: directory
+        owner: "{{ vllm_user }}"
+        group: "{{ vllm_user }}"
+        mode: "0755"
+
+    - name: Ensure uv is installed for vllm user
+      ansible.builtin.shell: |
+        set -euo pipefail
+        curl -LsSf https://astral.sh/uv/install.sh | sh
+      args:
+        creates: "{{ vllm_home }}/.local/bin/uv"
+      become_user: "{{ vllm_user }}"
+
+    - name: Ensure vllm venv exists
+      ansible.builtin.command:
+        argv:
+          - "{{ vllm_home }}/.local/bin/uv"
+          - venv
+          - --python
+          - "3.12"
+          - "{{ vllm_venv }}"
+      args:
+        creates: "{{ vllm_venv }}/bin/python"
+      become_user: "{{ vllm_user }}"
+
+    - name: Install vLLM in venv
+      ansible.builtin.command:
+        argv:
+          - "{{ vllm_home }}/.local/bin/uv"
+          - pip
+          - install
+          - --python
+          - "{{ vllm_venv }}/bin/python"
+          - vllm
+          - --extra-index-url
+          - "{{ vllm_rocm_wheels_url }}"
+      args:
+        creates: "{{ vllm_venv }}/bin/vllm"
+      become_user: "{{ vllm_user }}"
+
+    - name: Install vllm justfile
+      ansible.builtin.template:
+        src: ../templates/vllm-justfile.j2
+        dest: "{{ vllm_home }}/justfile"
+        owner: "{{ vllm_user }}"
+        group: "{{ vllm_user }}"
+        mode: "0644"
+
+    - name: Ensure vllm bashrc sources api key from file
+      ansible.builtin.lineinfile:
+        path: "{{ vllm_home }}/.bashrc"
+        line: "export VLLM_API_KEY=$(cat {{ vllm_home }}/.api_key)"
+        state: present
+
+    # --- LibreChat ---
+
+    - name: Ensure Docker service is enabled and running
+      ansible.builtin.systemd_service:
+        name: docker
+        enabled: true
+        state: started
+
+    - name: Ensure bryan is in docker group
+      ansible.builtin.user:
+        name: "{{ rigby_user }}"
+        groups:
+          - docker
+        append: true
+
+    - name: Ensure LibreChat repo is present
+      ansible.builtin.git:
+        repo: "{{ librechat_repo_url }}"
+        dest: "{{ librechat_root }}"
+        update: false
+      become_user: "{{ rigby_user }}"
+
+    - name: Install librechat.yaml config
+      ansible.builtin.template:
+        src: ../templates/librechat.yaml.j2
+        dest: "{{ librechat_root }}/librechat.yaml"
+        owner: "{{ rigby_user }}"
+        group: "{{ rigby_user }}"
+        mode: "0644"
+
+    - name: Install librechat systemd unit
+      ansible.builtin.template:
+        src: ../templates/librechat.service.j2
+        dest: /etc/systemd/system/librechat.service
+        owner: root
+        group: root
+        mode: "0644"
+
+    - name: Reload systemd and enable librechat service
+      ansible.builtin.systemd_service:
+        name: librechat.service
+        daemon_reload: true
+        enabled: true
--- a/external/rigby/templates/99-rigby-static.yaml.j2
+++ b/external/rigby/templates/99-rigby-static.yaml.j2
@ -0,0 +1,16 @@
+network:
+  version: 2
+  renderer: networkd
+  ethernets:
+    {{ rigby_interface }}:
+      dhcp4: false
+      addresses:
+        - {{ rigby_static_ip }}
+      routes:
+        - to: default
+          via: {{ rigby_gateway }}
+      nameservers:
+        addresses:
+{% for dns in rigby_dns %}
+          - {{ dns }}
+{% endfor %}
--- a/external/rigby/templates/comfy-output.conf.j2
+++ b/external/rigby/templates/comfy-output.conf.j2
@ -0,0 +1,9 @@
+[comfy-output]
+   path = {{ comfy_output_dir }}
+   browseable = yes
+   read only = no
+   guest ok = yes
+   force user = {{ comfy_user }}
+   force group = {{ comfy_group }}
+   create mask = 0664
+   directory mask = 2775
--- a/external/rigby/templates/comfyui.service.j2
+++ b/external/rigby/templates/comfyui.service.j2
@ -0,0 +1,22 @@
+[Unit]
+Description=ComfyUI
+After=network-online.target
+Wants=network-online.target
+
+[Service]
+Type=simple
+User={{ comfy_user }}
+Group={{ comfy_group }}
+UMask=0002
+WorkingDirectory={{ comfy_root }}
+Environment=HOME={{ comfy_home }}
+Environment=COMFYUI_PATH={{ comfy_root }}
+Environment=PATH={{ comfy_home }}/.local/bin:{{ comfy_venv }}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+ExecStart={{ comfy_venv }}/bin/python {{ comfy_root }}/main.py --highvram --enable-manager --listen 0.0.0.0 --port {{ comfy_port }} --disable-auto-launch
+Restart=on-failure
+RestartSec=5
+NoNewPrivileges=true
+PrivateTmp=true
+
+[Install]
+WantedBy=multi-user.target
--- a/external/rigby/templates/librechat.service.j2
+++ b/external/rigby/templates/librechat.service.j2
@ -0,0 +1,18 @@
+[Unit]
+Description=LibreChat
+After=network-online.target docker.service
+Wants=network-online.target
+Requires=docker.service
+
+[Service]
+Type=simple
+User={{ rigby_user }}
+Group=docker
+WorkingDirectory={{ librechat_root }}
+ExecStart=/usr/bin/docker compose up
+ExecStop=/usr/bin/docker compose down
+Restart=on-failure
+RestartSec=10
+
+[Install]
+WantedBy=multi-user.target
--- a/external/rigby/templates/librechat.yaml.j2
+++ b/external/rigby/templates/librechat.yaml.j2
@ -0,0 +1,43 @@
+version: 1.3.5
+
+cache: true
+
+interface:
+  webSearch: false
+  runCode: false
+  mcpServers:
+    use: true
+    create: true
+    share: false
+    public: false
+
+mcpServers:
+  searxng:
+    command: npx
+    args:
+      - -y
+      - mcp-searxng
+    env:
+      SEARXNG_URL: http://searxng:8080
+    timeout: 60000
+  fetch:
+    command: uvx
+    args:
+      - mcp-server-fetch
+      - --ignore-robots-txt
+    timeout: 60000
+
+endpoints:
+  custom:
+    - name: "rigby-vllm"
+      apiKey: "${VLLM_API_KEY}"
+      baseURL: "http://host.docker.internal:{{ vllm_port }}/v1"
+      models:
+        default: []
+        fetch: true
+      titleConvo: true
+      titleModel: "current_model"
+      titleMessageRole: "user"
+      summarize: false
+      summaryModel: "current_model"
+      modelDisplayLabel: "Rigby vLLM"
--- a/external/rigby/templates/vllm-justfile.j2
+++ b/external/rigby/templates/vllm-justfile.j2
@ -0,0 +1,58 @@
+{% raw %}# List available recipes
+[private]
+default:
+    @just --list
+
+# Show currently running vLLM server
+status:
+    @pgrep -a -f "vllm serve" || echo "No vLLM server running"
+
+# Tail the vLLM log
+logs:
+    @tail -f {% endraw %}{{ vllm_home }}/vllm.log{% raw %}
+
+# Stop any running vLLM server and wait for VRAM to free
+stop:
+    #!/usr/bin/env bash
+    set -euo pipefail
+    if pgrep -f "vllm serve" > /dev/null; then
+        echo "Stopping vLLM..."
+        pkill -TERM -f "vllm serve" || true
+        sleep 2
+        pkill -KILL -f "vllm serve" 2>/dev/null || true
+    fi
+    echo "Waiting for VRAM to release..."
+    for i in $(seq 1 30); do
+        used=$(rocm-smi --showmeminfo vram 2>/dev/null | awk '/VRAM Total Used Memory/ {print $NF}')
+        total=$(rocm-smi --showmeminfo vram 2>/dev/null | awk '/VRAM Total Memory \(B\)/ {print $NF}')
+        if [ -n "$used" ] && [ -n "$total" ] && [ "$total" -gt 0 ]; then
+            pct=$(( used * 100 / total ))
+            echo "  VRAM: ${pct}%"
+            if [ "$pct" -lt 10 ]; then
+                echo "VRAM free."
+                exit 0
+            fi
+        fi
+        sleep 2
+    done
+    echo "Warning: VRAM did not fully release after 60s"
+{% endraw %}
+{% for model in vllm_models_list %}
+# Serve {{ model.name }}
+{{ model.recipe }}: stop
+    #!/usr/bin/env bash
+    source {{ vllm_home }}/.bashrc
+    nohup {{ vllm_home }}/vllm-venv/bin/vllm serve {{ vllm_models }}/{{ model.dir }} \
+        --served-model-name {{ model.name }} \
+        --host 0.0.0.0 \
+        --port {{ vllm_port }} \
+        --api-key ${VLLM_API_KEY} \
+        --dtype auto \
+        --max-model-len {{ model.max_model_len }} \
+        --gpu-memory-utilization {{ vllm_gpu_memory_utilization }}{% if model.tool_call_parser is defined %} \
+        --enable-auto-tool-choice \
+        --tool-call-parser {{ model.tool_call_parser }}{% endif %} \
+        > {{ vllm_home }}/vllm.log 2>&1 &
+    echo "Started {{ model.name }} (pid $!). Run 'just logs' to follow."
+
+{% endfor %}