commit 549f70b97ac86cddc7142a418de481394cbf9173 Author: Luca Sacchi Ricciardi Date: Fri Mar 27 13:05:47 2026 +0000 first commit diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..0c25af4 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,8 @@ +# Changelog +All notable changes to this project will be documented in this file. + +## [1.0.0] - 2026-03-27 +### Added +- Inizializzazione ambiente vLLM per Qwen3.5-0.8B GGUF. +- Dockerfile con build da sorgente per architettura gfx1100. +- docker-compose.yml con HSA_OVERRIDE e IPC host. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ed70799 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +FROM rocm/pytorch:latest + +ENV DEBIAN_FRONTEND=noninteractive +# Specifica l'architettura target per la compilazione dei kernel C++/HIP +ENV PYTORCH_ROCM_ARCH="gfx1100" +ENV HSA_OVERRIDE_GFX_VERSION="11.0.0" +ENV HIP_VISIBLE_DEVICES="0" + +RUN apt-get update -y && apt-get install -y git build-essential python3-dev && rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace + +# Clona vLLM e usa un tag stabile recente che supporta GGUF (0.6.0+) +RUN git clone https://github.com/vllm-project/vllm.git . && git checkout v0.6.3 + +RUN pip install -U pip && pip install -r requirements-rocm.txt + +# Compilazione (richiederà tempo) +RUN python3 setup.py install + +EXPOSE 8000 +ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..4dae553 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,27 @@ +services: + vllm-qwen: + build: + context: . + dockerfile: Dockerfile + container_name: vllm_qwen_08b + ports: + - "8000:8000" + ipc: host + devices: + - "/dev/kfd:/dev/kfd" + - "/dev/dri:/dev/dri" + group_add: + - video + - render + volumes: + # Montaggio in sola lettura del modello + - /opt/models:/app/models:ro + environment: + - HSA_OVERRIDE_GFX_VERSION=11.0.0 + - HIP_VISIBLE_DEVICES=0 + command: > + --model /app/models/Qwen3.5-0.8B-UD-Q8_K_XL.gguf + --quantization gguf + --gpu-memory-utilization 0.50 + --max-model-len 4096 + restart: unless-stopped diff --git a/test_api.py b/test_api.py new file mode 100644 index 0000000..610e73f --- /dev/null +++ b/test_api.py @@ -0,0 +1,25 @@ +import urllib.request +import json + +url = "http://localhost:8000/v1/chat/completions" +headers = {"Content-Type": "application/json"} +data = { + "model": "/app/models/Qwen3.5-0.8B-UD-Q8_K_XL.gguf", + "messages": [ + {"role": "system", "content": "Sei un assistente utile e conciso."}, + {"role": "user", "content": "Quali sono i vantaggi principali del sistema operativo Linux?"} + ], + "max_tokens": 100, + "temperature": 0.2 +} + +req = urllib.request.Request(url, data=json.dumps(data).encode('utf-8'), headers=headers) + +print("Inviando richiesta a vLLM...") +try: + with urllib.request.urlopen(req) as response: + result = json.loads(response.read().decode('utf-8')) + print("\n✅ TEST SUPERATO. Risposta dal modello:\n") + print(result['choices'][0]['message']['content']) +except Exception as e: + print(f"\n❌ ERRORE DURANTE IL TEST: {e}")