services: qwen-08b: build: . container_name: qwen_08b_server ports: - "8000:8000" devices: - /dev/kfd:/dev/kfd - /dev/dri:/dev/dri volumes: - /opt/models:/models # -ngl 99 offloada tutti i layer sulla GPU AMD # --host 0.0.0.0 lo rende accessibile fuori dal container command: > -m /models/Qwen3.5-0.8B-UD-Q8_K_XL.gguf --host 0.0.0.0 --port 8000 -ngl 99 -c 4096 restart: unless-stopped