added README

This commit is contained in:
Luca Sacchi Ricciardi
2026-03-27 13:12:33 +00:00
parent 549f70b97a
commit 5647bc4336
2 changed files with 114 additions and 2 deletions

View File

@@ -22,6 +22,6 @@ services:
command: >
--model /app/models/Qwen3.5-0.8B-UD-Q8_K_XL.gguf
--quantization gguf
--gpu-memory-utilization 0.50
--max-model-len 4096
--gpu-memory-utilization 0.06
--max-model-len 32768
restart: unless-stopped