llamacpp-qwen3.5-0.8b/Dockerfile

FROM rocm/pytorch:latest

ENV DEBIAN_FRONTEND=noninteractive
# Specifica l'architettura target per la compilazione dei kernel C++/HIP
ENV PYTORCH_ROCM_ARCH="gfx1100"
ENV HSA_OVERRIDE_GFX_VERSION="11.0.0"
ENV HIP_VISIBLE_DEVICES="0"

RUN apt-get update -y && apt-get install -y     git build-essential python3-dev     && rm -rf /var/lib/apt/lists/*

WORKDIR /workspace

# Clona vLLM e usa un tag stabile recente che supporta GGUF (0.6.0+)
RUN git clone https://github.com/vllm-project/vllm.git .     && git checkout v0.6.3

RUN pip install -U pip &&     pip install -r requirements-rocm.txt

# Compilazione (richiederà tempo)
RUN python3 setup.py install

EXPOSE 8000
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]