project refactored

This commit is contained in:
Luca Sacchi Ricciardi
2026-03-27 14:27:12 +00:00
parent 5647bc4336
commit f20f6571c2
3 changed files with 78 additions and 127 deletions

View File

@@ -1,27 +1,20 @@
services:
vllm-qwen:
build:
context: .
dockerfile: Dockerfile
container_name: vllm_qwen_08b
qwen-08b:
build: .
container_name: qwen_08b_server
ports:
- "8000:8000"
ipc: host
devices:
- "/dev/kfd:/dev/kfd"
- "/dev/dri:/dev/dri"
group_add:
- video
- render
- /dev/kfd:/dev/kfd
- /dev/dri:/dev/dri
volumes:
# Montaggio in sola lettura del modello
- /opt/models:/app/models:ro
environment:
- HSA_OVERRIDE_GFX_VERSION=11.0.0
- HIP_VISIBLE_DEVICES=0
- /opt/models:/models
# -ngl 99 offloada tutti i layer sulla GPU AMD
# --host 0.0.0.0 lo rende accessibile fuori dal container
command: >
--model /app/models/Qwen3.5-0.8B-UD-Q8_K_XL.gguf
--quantization gguf
--gpu-memory-utilization 0.06
--max-model-len 32768
-m /models/qwen1_5-0_8b-chat-q8_0.gguf
--host 0.0.0.0
--port 8000
-ngl 99
-c 4096
restart: unless-stopped