integrated rigby

This commit is contained in:
Bryan Ramos 2026-04-12 13:55:51 -04:00
parent 532c874c96
commit 6af94e54d5
9 changed files with 1653 additions and 8 deletions

View file

@ -41,6 +41,29 @@
- linux-headers-{{ ansible_kernel }} - linux-headers-{{ ansible_kernel }}
- linux-modules-extra-{{ ansible_kernel }} - linux-modules-extra-{{ ansible_kernel }}
- samba - samba
- just
- python3.12
- python3.12-venv
- docker.io
vllm_user: vllm
vllm_home: /home/vllm
vllm_venv: /home/vllm/vllm-venv
vllm_models: /home/vllm/models
vllm_port: 8000
vllm_gpu_memory_utilization: "0.95"
vllm_rocm_wheels_url: https://wheels.vllm.ai/rocm/0.19.0/rocm721
vllm_models_list:
- name: Qwen2.5-Coder-14B
recipe: coder
dir: Qwen2.5-Coder-14B-Instruct
max_model_len: 4096
- name: Qwen2.5-7B-Instruct
recipe: qwen7b
dir: Qwen2.5-7B-Instruct
max_model_len: 8192
tool_call_parser: hermes
librechat_root: /home/bryan/LibreChat
librechat_repo_url: https://github.com/danny-avila/LibreChat
tasks: tasks:
- name: Ensure deadsnakes PPA is configured - name: Ensure deadsnakes PPA is configured
ansible.builtin.apt_repository: ansible.builtin.apt_repository:
@ -304,3 +327,116 @@
ansible.builtin.command: netplan apply ansible.builtin.command: netplan apply
when: rigby_static_network_enabled | bool when: rigby_static_network_enabled | bool
changed_when: true changed_when: true
# --- vLLM ---
- name: Ensure vllm user exists
ansible.builtin.user:
name: "{{ vllm_user }}"
groups:
- render
- video
append: true
create_home: true
shell: /bin/bash
- name: Ensure vllm models directory exists
ansible.builtin.file:
path: "{{ vllm_models }}"
state: directory
owner: "{{ vllm_user }}"
group: "{{ vllm_user }}"
mode: "0755"
- name: Ensure uv is installed for vllm user
ansible.builtin.shell: |
set -euo pipefail
curl -LsSf https://astral.sh/uv/install.sh | sh
args:
creates: "{{ vllm_home }}/.local/bin/uv"
become_user: "{{ vllm_user }}"
- name: Ensure vllm venv exists
ansible.builtin.command:
argv:
- "{{ vllm_home }}/.local/bin/uv"
- venv
- --python
- "3.12"
- "{{ vllm_venv }}"
args:
creates: "{{ vllm_venv }}/bin/python"
become_user: "{{ vllm_user }}"
- name: Install vLLM in venv
ansible.builtin.command:
argv:
- "{{ vllm_home }}/.local/bin/uv"
- pip
- install
- --python
- "{{ vllm_venv }}/bin/python"
- vllm
- --extra-index-url
- "{{ vllm_rocm_wheels_url }}"
args:
creates: "{{ vllm_venv }}/bin/vllm"
become_user: "{{ vllm_user }}"
- name: Install vllm justfile
ansible.builtin.template:
src: ../templates/vllm-justfile.j2
dest: "{{ vllm_home }}/justfile"
owner: "{{ vllm_user }}"
group: "{{ vllm_user }}"
mode: "0644"
- name: Ensure vllm bashrc sources api key from file
ansible.builtin.lineinfile:
path: "{{ vllm_home }}/.bashrc"
line: "export VLLM_API_KEY=$(cat {{ vllm_home }}/.api_key)"
state: present
# --- LibreChat ---
- name: Ensure Docker service is enabled and running
ansible.builtin.systemd_service:
name: docker
enabled: true
state: started
- name: Ensure bryan is in docker group
ansible.builtin.user:
name: "{{ rigby_user }}"
groups:
- docker
append: true
- name: Ensure LibreChat repo is present
ansible.builtin.git:
repo: "{{ librechat_repo_url }}"
dest: "{{ librechat_root }}"
update: false
become_user: "{{ rigby_user }}"
- name: Install librechat.yaml config
ansible.builtin.template:
src: ../templates/librechat.yaml.j2
dest: "{{ librechat_root }}/librechat.yaml"
owner: "{{ rigby_user }}"
group: "{{ rigby_user }}"
mode: "0644"
- name: Install librechat systemd unit
ansible.builtin.template:
src: ../templates/librechat.service.j2
dest: /etc/systemd/system/librechat.service
owner: root
group: root
mode: "0644"
- name: Reload systemd and enable librechat service
ansible.builtin.systemd_service:
name: librechat.service
daemon_reload: true
enabled: true

View file

@ -0,0 +1,18 @@
[Unit]
Description=LibreChat
After=network-online.target docker.service
Wants=network-online.target
Requires=docker.service
[Service]
Type=simple
User={{ rigby_user }}
Group=docker
WorkingDirectory={{ librechat_root }}
ExecStart=/usr/bin/docker compose up
ExecStop=/usr/bin/docker compose down
Restart=on-failure
RestartSec=10
[Install]
WantedBy=multi-user.target

View file

@ -0,0 +1,43 @@
version: 1.3.5
cache: true
interface:
webSearch: false
runCode: false
mcpServers:
use: true
create: true
share: false
public: false
mcpServers:
searxng:
command: npx
args:
- -y
- mcp-searxng
env:
SEARXNG_URL: http://searxng:8080
timeout: 60000
fetch:
command: uvx
args:
- mcp-server-fetch
- --ignore-robots-txt
timeout: 60000
endpoints:
custom:
- name: "rigby-vllm"
apiKey: "${VLLM_API_KEY}"
baseURL: "http://host.docker.internal:{{ vllm_port }}/v1"
models:
default: []
fetch: true
titleConvo: true
titleModel: "current_model"
titleMessageRole: "user"
summarize: false
summaryModel: "current_model"
modelDisplayLabel: "Rigby vLLM"

View file

@ -0,0 +1,58 @@
{% raw %}# List available recipes
[private]
default:
@just --list
# Show currently running vLLM server
status:
@pgrep -a -f "vllm serve" || echo "No vLLM server running"
# Tail the vLLM log
logs:
@tail -f {% endraw %}{{ vllm_home }}/vllm.log{% raw %}
# Stop any running vLLM server and wait for VRAM to free
stop:
#!/usr/bin/env bash
set -euo pipefail
if pgrep -f "vllm serve" > /dev/null; then
echo "Stopping vLLM..."
pkill -TERM -f "vllm serve" || true
sleep 2
pkill -KILL -f "vllm serve" 2>/dev/null || true
fi
echo "Waiting for VRAM to release..."
for i in $(seq 1 30); do
used=$(rocm-smi --showmeminfo vram 2>/dev/null | awk '/VRAM Total Used Memory/ {print $NF}')
total=$(rocm-smi --showmeminfo vram 2>/dev/null | awk '/VRAM Total Memory \(B\)/ {print $NF}')
if [ -n "$used" ] && [ -n "$total" ] && [ "$total" -gt 0 ]; then
pct=$(( used * 100 / total ))
echo " VRAM: ${pct}%"
if [ "$pct" -lt 10 ]; then
echo "VRAM free."
exit 0
fi
fi
sleep 2
done
echo "Warning: VRAM did not fully release after 60s"
{% endraw %}
{% for model in vllm_models_list %}
# Serve {{ model.name }}
{{ model.recipe }}: stop
#!/usr/bin/env bash
source {{ vllm_home }}/.bashrc
nohup {{ vllm_home }}/vllm-venv/bin/vllm serve {{ vllm_models }}/{{ model.dir }} \
--served-model-name {{ model.name }} \
--host 0.0.0.0 \
--port {{ vllm_port }} \
--api-key ${VLLM_API_KEY} \
--dtype auto \
--max-model-len {{ model.max_model_len }} \
--gpu-memory-utilization {{ vllm_gpu_memory_utilization }}{% if model.tool_call_parser is defined %} \
--enable-auto-tool-choice \
--tool-call-parser {{ model.tool_call_parser }}{% endif %} \
> {{ vllm_home }}/vllm.log 2>&1 &
echo "Started {{ model.name }} (pid $!). Run 'just logs' to follow."
{% endfor %}

View file

@ -163,14 +163,7 @@ in
settings = { settings = {
# Explicit subdomains -> local server # Explicit subdomains -> local server
address = [ address = [
"/git.ramos.codes/192.168.0.154" "/*.ramos.codes/192.168.0.154"
"/ln.ramos.codes/192.168.0.154"
"/photos.ramos.codes/192.168.0.154"
"/test.ramos.codes/192.168.0.154"
"/electrum.ramos.codes/192.168.0.154"
"/immich.ramos.codes/192.168.0.154"
"/forgejo.ramos.codes/192.168.0.154"
"/frigate.ramos.codes/192.168.0.154"
]; ];
server = [ "192.168.0.1" ]; server = [ "192.168.0.1" ];
}; };

View file

@ -71,6 +71,33 @@ in
''; '';
}; };
}; };
virtualHosts."chat.${domain}" = {
useACMEHost = domain;
forceSSL = true;
locations."/" = {
proxyPass = "http://192.168.0.23:3080";
proxyWebsockets = true;
};
};
virtualHosts."ai.${domain}" = {
useACMEHost = domain;
forceSSL = true;
locations."/" = {
proxyPass = "http://192.168.0.23:8000";
proxyWebsockets = true;
};
};
virtualHosts."comfy.${domain}" = {
useACMEHost = domain;
forceSSL = true;
locations."/" = {
proxyPass = "http://192.168.0.23:8188";
proxyWebsockets = true;
};
};
}; };
}; };
} }

View file

@ -0,0 +1,131 @@
{ pkgs, lib, config, ... }:
with lib;
let
cfg = config.modules.system.sandpack;
domain = "ramos.codes";
staticBrowserServer = pkgs.stdenvNoCC.mkDerivation (finalAttrs: let
pnpm = pkgs.pnpm_10;
in {
pname = "static-browser-server";
version = "1.0.6";
src = pkgs.fetchFromGitHub {
owner = "LibreChat-AI";
repo = "static-browser-server";
rev = "30de7ae4ebf5433acc0fb640649fb77426a79e04";
hash = "sha256-OVAGnoh7KRmTPY2bXE0kvCMiPx3tXAooDa8n8ujugYM=";
};
patches = [ ./pnpm-lock.patch ];
pnpmDeps = pkgs.fetchPnpmDeps {
inherit (finalAttrs) pname version src patches;
pnpm = pnpm;
fetcherVersion = 3;
hash = "sha256-+Gz8tQy4rkoi365To9GI6sShPTjuKEmZxtV5mEB2UYk=";
};
nativeBuildInputs = [
pkgs.makeWrapper
pkgs.nodejs
pkgs.pnpmConfigHook
pnpm
];
buildPhase = ''
runHook preBuild
pnpm build
runHook postBuild
'';
installPhase = ''
runHook preInstall
mkdir -p $out/libexec/static-browser-server $out/bin
cp -r out $out/libexec/
pnpm exec esbuild \
./servers/demo-server.ts \
--bundle \
--platform=node \
--format=cjs \
--outfile=$out/libexec/static-browser-server/demo-server.js
makeWrapper ${pkgs.nodejs}/bin/node $out/bin/static-browser-server \
--add-flags $out/libexec/static-browser-server/demo-server.js
runHook postInstall
'';
});
in
{
options.modules.system.sandpack = {
enable = mkEnableOption "Sandpack services";
};
config = mkIf cfg.enable {
virtualisation.oci-containers = {
backend = "podman";
containers.sandpack-bundler = {
image = "ghcr.io/librechat-ai/codesandbox-client/bundler:latest";
ports = [ "127.0.0.1:4333:80" ];
};
};
systemd.services.sandpack-preview = {
description = "Sandpack static preview server";
after = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${staticBrowserServer}/bin/static-browser-server";
WorkingDirectory = "${staticBrowserServer}/libexec/static-browser-server";
Restart = "always";
RestartSec = 5;
DynamicUser = true;
Environment = [
"HOST=127.0.0.1"
"PORT=4324"
];
};
};
services.nginx.virtualHosts."bundler.${domain}" = {
useACMEHost = domain;
forceSSL = true;
locations."/" = {
proxyPass = "http://127.0.0.1:4333";
extraConfig = ''
add_header Access-Control-Allow-Origin "*" always;
add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS" always;
add_header Access-Control-Allow-Headers "Content-Type, Authorization" always;
add_header Access-Control-Max-Age "3600" always;
if ($request_method = OPTIONS) {
return 204;
}
'';
};
};
services.nginx.virtualHosts."preview.${domain}" = {
useACMEHost = domain;
forceSSL = true;
serverAliases = [ "~^.+-preview\\.ramos\\.codes$" ];
locations."/" = {
proxyPass = "http://127.0.0.1:4324";
extraConfig = ''
add_header Access-Control-Allow-Origin "*" always;
add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS" always;
add_header Access-Control-Allow-Headers "Content-Type, Authorization" always;
add_header Access-Control-Max-Age "3600" always;
if ($request_method = OPTIONS) {
return 204;
}
'';
};
};
};
}

File diff suppressed because it is too large Load diff

View file

@ -14,6 +14,7 @@
modules.system = { modules.system = {
nginx.enable = true; nginx.enable = true;
sandpack.enable = true;
forgejo.enable = true; forgejo.enable = true;
frigate.enable = true; frigate.enable = true;
immich.enable = true; immich.enable = true;