project refactored
This commit is contained in:
26
Dockerfile
26
Dockerfile
@@ -1,22 +1,24 @@
|
||||
FROM rocm/pytorch:latest
|
||||
FROM rocm/dev-ubuntu-22.04:latest
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
# Specifica l'architettura target per la compilazione dei kernel C++/HIP
|
||||
ENV PYTORCH_ROCM_ARCH="gfx1100"
|
||||
ENV HSA_OVERRIDE_GFX_VERSION="11.0.0"
|
||||
ENV AMDGPU_TARGETS="gfx1100"
|
||||
ENV HIP_VISIBLE_DEVICES="0"
|
||||
|
||||
RUN apt-get update -y && apt-get install -y git build-essential python3-dev && rm -rf /var/lib/apt/lists/*
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential cmake git libcurl4-openssl-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /workspace
|
||||
WORKDIR /app
|
||||
|
||||
# Clona vLLM e usa un tag stabile recente che supporta GGUF (0.6.0+)
|
||||
RUN git clone https://github.com/vllm-project/vllm.git . && git checkout v0.6.3
|
||||
# Little often: cloniamo e compiliamo
|
||||
RUN git clone https://github.com/ggerganov/llama.cpp.git .
|
||||
|
||||
RUN pip install -U pip && pip install -r requirements-rocm.txt
|
||||
|
||||
# Compilazione (richiederà tempo)
|
||||
RUN python3 setup.py install
|
||||
# Compilazione con supporto HIP nativo per AMD
|
||||
RUN cmake -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=gfx1100 -DCMAKE_BUILD_TYPE=Release
|
||||
RUN cmake --build build --config Release -j $(nproc)
|
||||
|
||||
EXPOSE 8000
|
||||
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||
|
||||
# Punto d'ingresso nudo e crudo
|
||||
ENTRYPOINT ["/app/build/bin/llama-server"]
|
||||
|
||||
Reference in New Issue
Block a user