28 lines
642 B
YAML
28 lines
642 B
YAML
services:
|
|
vllm-qwen:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
container_name: vllm_qwen_08b
|
|
ports:
|
|
- "8000:8000"
|
|
ipc: host
|
|
devices:
|
|
- "/dev/kfd:/dev/kfd"
|
|
- "/dev/dri:/dev/dri"
|
|
group_add:
|
|
- video
|
|
- render
|
|
volumes:
|
|
# Montaggio in sola lettura del modello
|
|
- /opt/models:/app/models:ro
|
|
environment:
|
|
- HSA_OVERRIDE_GFX_VERSION=11.0.0
|
|
- HIP_VISIBLE_DEVICES=0
|
|
command: >
|
|
--model /app/models/Qwen3.5-0.8B-UD-Q8_K_XL.gguf
|
|
--quantization gguf
|
|
--gpu-memory-utilization 0.50
|
|
--max-model-len 4096
|
|
restart: unless-stopped
|