project refactored

2026-03-27 14:27:12 +00:00
parent 5647bc4336
commit f20f6571c2
3 changed files with 78 additions and 127 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,27 +1,20 @@
 services:
-  vllm-qwen:
-    build: 
-      context: .
-      dockerfile: Dockerfile
-    container_name: vllm_qwen_08b
+  qwen-08b:
+    build: .
+    container_name: qwen_08b_server
    ports:
      - "8000:8000"
-    ipc: host 
    devices:
-      - "/dev/kfd:/dev/kfd"
-      - "/dev/dri:/dev/dri"
-    group_add:
-      - video
-      - render
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
    volumes:
-      # Montaggio in sola lettura del modello
-      - /opt/models:/app/models:ro
-    environment:
-      - HSA_OVERRIDE_GFX_VERSION=11.0.0
-      - HIP_VISIBLE_DEVICES=0
+      - /opt/models:/models
+    # -ngl 99 offloada tutti i layer sulla GPU AMD
+    # --host 0.0.0.0 lo rende accessibile fuori dal container
    command: >
-      --model /app/models/Qwen3.5-0.8B-UD-Q8_K_XL.gguf
-      --quantization gguf
-      --gpu-memory-utilization 0.06
-      --max-model-len 32768
+      -m /models/qwen1_5-0_8b-chat-q8_0.gguf
+      --host 0.0.0.0
+      --port 8000
+      -ngl 99
+      -c 4096
    restart: unless-stopped