first commit

This commit is contained in:
Luca Sacchi Ricciardi
2026-03-27 13:05:47 +00:00
commit 549f70b97a
4 changed files with 82 additions and 0 deletions

8
CHANGELOG.md Normal file
View File

@@ -0,0 +1,8 @@
# Changelog
All notable changes to this project will be documented in this file.
## [1.0.0] - 2026-03-27
### Added
- Inizializzazione ambiente vLLM per Qwen3.5-0.8B GGUF.
- Dockerfile con build da sorgente per architettura gfx1100.
- docker-compose.yml con HSA_OVERRIDE e IPC host.

22
Dockerfile Normal file
View File

@@ -0,0 +1,22 @@
FROM rocm/pytorch:latest
ENV DEBIAN_FRONTEND=noninteractive
# Specifica l'architettura target per la compilazione dei kernel C++/HIP
ENV PYTORCH_ROCM_ARCH="gfx1100"
ENV HSA_OVERRIDE_GFX_VERSION="11.0.0"
ENV HIP_VISIBLE_DEVICES="0"
RUN apt-get update -y && apt-get install -y git build-essential python3-dev && rm -rf /var/lib/apt/lists/*
WORKDIR /workspace
# Clona vLLM e usa un tag stabile recente che supporta GGUF (0.6.0+)
RUN git clone https://github.com/vllm-project/vllm.git . && git checkout v0.6.3
RUN pip install -U pip && pip install -r requirements-rocm.txt
# Compilazione (richiederà tempo)
RUN python3 setup.py install
EXPOSE 8000
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]

27
docker-compose.yml Normal file
View File

@@ -0,0 +1,27 @@
services:
vllm-qwen:
build:
context: .
dockerfile: Dockerfile
container_name: vllm_qwen_08b
ports:
- "8000:8000"
ipc: host
devices:
- "/dev/kfd:/dev/kfd"
- "/dev/dri:/dev/dri"
group_add:
- video
- render
volumes:
# Montaggio in sola lettura del modello
- /opt/models:/app/models:ro
environment:
- HSA_OVERRIDE_GFX_VERSION=11.0.0
- HIP_VISIBLE_DEVICES=0
command: >
--model /app/models/Qwen3.5-0.8B-UD-Q8_K_XL.gguf
--quantization gguf
--gpu-memory-utilization 0.50
--max-model-len 4096
restart: unless-stopped

25
test_api.py Normal file
View File

@@ -0,0 +1,25 @@
import urllib.request
import json
url = "http://localhost:8000/v1/chat/completions"
headers = {"Content-Type": "application/json"}
data = {
"model": "/app/models/Qwen3.5-0.8B-UD-Q8_K_XL.gguf",
"messages": [
{"role": "system", "content": "Sei un assistente utile e conciso."},
{"role": "user", "content": "Quali sono i vantaggi principali del sistema operativo Linux?"}
],
"max_tokens": 100,
"temperature": 0.2
}
req = urllib.request.Request(url, data=json.dumps(data).encode('utf-8'), headers=headers)
print("Inviando richiesta a vLLM...")
try:
with urllib.request.urlopen(req) as response:
result = json.loads(response.read().decode('utf-8'))
print("\n✅ TEST SUPERATO. Risposta dal modello:\n")
print(result['choices'][0]['message']['content'])
except Exception as e:
print(f"\n❌ ERRORE DURANTE IL TEST: {e}")