services: vllm-qwen: build: context: . dockerfile: Dockerfile container_name: vllm_qwen_08b ports: - "8000:8000" ipc: host devices: - "/dev/kfd:/dev/kfd" - "/dev/dri:/dev/dri" group_add: - video - render volumes: # Montaggio in sola lettura del modello - /opt/models:/app/models:ro environment: - HSA_OVERRIDE_GFX_VERSION=11.0.0 - HIP_VISIBLE_DEVICES=0 command: > --model /app/models/Qwen3.5-0.8B-UD-Q8_K_XL.gguf --quantization gguf --gpu-memory-utilization 0.50 --max-model-len 4096 restart: unless-stopped