project refactored

2026-03-27 14:27:12 +00:00
parent 5647bc4336
commit f20f6571c2
3 changed files with 78 additions and 127 deletions
@@ -1,22 +1,24 @@
-FROM rocm/pytorch:latest
+FROM rocm/dev-ubuntu-22.04:latest

 ENV DEBIAN_FRONTEND=noninteractive
-# Specifica l'architettura target per la compilazione dei kernel C++/HIP
-ENV PYTORCH_ROCM_ARCH="gfx1100"
 ENV HSA_OVERRIDE_GFX_VERSION="11.0.0"
+ENV AMDGPU_TARGETS="gfx1100"
 ENV HIP_VISIBLE_DEVICES="0"

-RUN apt-get update -y && apt-get install -y     git build-essential python3-dev     && rm -rf /var/lib/apt/lists/*
+RUN apt-get update && apt-get install -y \
+    build-essential cmake git libcurl4-openssl-dev \
+    && rm -rf /var/lib/apt/lists/*

-WORKDIR /workspace
+WORKDIR /app

-# Clona vLLM e usa un tag stabile recente che supporta GGUF (0.6.0+)
-RUN git clone https://github.com/vllm-project/vllm.git .     && git checkout v0.6.3
+# Little often: cloniamo e compiliamo
+RUN git clone https://github.com/ggerganov/llama.cpp.git .

-RUN pip install -U pip &&     pip install -r requirements-rocm.txt
-
-# Compilazione (richiederà tempo)
-RUN python3 setup.py install
+# Compilazione con supporto HIP nativo per AMD
+RUN cmake -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=gfx1100 -DCMAKE_BUILD_TYPE=Release
+RUN cmake --build build --config Release -j $(nproc)

 EXPOSE 8000
-ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
+
+# Punto d'ingresso nudo e crudo
+ENTRYPOINT ["/app/build/bin/llama-server"]