Signed-off-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
Roger Wang
2025-09-30 23:04:53 -07:00
parent 75bd78eaca
commit f60838876b
3 changed files with 6 additions and 19 deletions
+4 -14
View File
@@ -1024,23 +1024,13 @@ python3 tools/download_model.py --type modelscope
## 2. Deployment ## 2. Deployment
### vLLM inference ### vLLM inference
We highly recommend using vllm for deployment and inference. All of our evaluations results are based on vllm version 0.9.1. We highly recommend using vLLM for deployment and inference. All of our evaluations results are based on vLLM 0.9.1 via out-of-tree model registration. Since vLLM version 0.11.0, Dots OCR has been officially integrated into vLLM and you can use vLLM docker image directly (e.g, `vllm/vllm-openai:v0.11.0`) to deploy the model server.
The [Docker Image](https://hub.docker.com/r/rednotehilab/dots.ocr) is based on the official vllm image. You can also follow [Dockerfile](https://github.com/rednote-hilab/dots.ocr/blob/master/docker/Dockerfile) to build the deployment environment by yourself.
```shell ```shell
# You need to register model to vllm at first # Launch vLLM model server
python3 tools/download_model.py vllm serve rednote-hilab/dots.ocr --trust-remote-code --async-scheduling
export hf_model_path=./weights/DotsOCR # Path to your downloaded model weights, Please use a directory name without periods (e.g., `DotsOCR` instead of `dots.ocr`) for the model save path. This is a temporary workaround pending our integration with Transformers.
export PYTHONPATH=$(dirname "$hf_model_path"):$PYTHONPATH
sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
from DotsOCR import modeling_dots_ocr_vllm' `which vllm` # If you downloaded model weights by yourself, please replace `DotsOCR` by your model saved directory name, and remember to use a directory name without periods (e.g., `DotsOCR` instead of `dots.ocr`)
# launch vllm server # vllm API Demo
CUDA_VISIBLE_DEVICES=0 vllm serve ${hf_model_path} --tensor-parallel-size 1 --gpu-memory-utilization 0.95 --chat-template-content-format string --served-model-name model --trust-remote-code
# If you get a ModuleNotFoundError: No module named 'DotsOCR', please check the note above on the saved model directory name.
# vllm api demo
python3 ./demo/demo_vllm.py --prompt_mode prompt_layout_all_en python3 ./demo/demo_vllm.py --prompt_mode prompt_layout_all_en
``` ```
+1 -4
View File
@@ -1,11 +1,8 @@
import argparse import argparse
import os
from openai import OpenAI from openai import OpenAI
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
from PIL import Image from PIL import Image
import io
import base64
from dots_ocr.utils import dict_promptmode_to_prompt from dots_ocr.utils import dict_promptmode_to_prompt
from dots_ocr.model.inference import inference_with_vllm from dots_ocr.model.inference import inference_with_vllm
@@ -13,7 +10,7 @@ from dots_ocr.model.inference import inference_with_vllm
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--ip", type=str, default="localhost") parser.add_argument("--ip", type=str, default="localhost")
parser.add_argument("--port", type=str, default="8000") parser.add_argument("--port", type=str, default="8000")
parser.add_argument("--model_name", type=str, default="model") parser.add_argument("--model_name", type=str, default="rednote-hilab/dots.ocr")
parser.add_argument("--prompt_mode", type=str, default="prompt_layout_all_en") parser.add_argument("--prompt_mode", type=str, default="prompt_layout_all_en")
args = parser.parse_args() args = parser.parse_args()
+1 -1
View File
@@ -12,7 +12,7 @@ def inference_with_vllm(
temperature=0.1, temperature=0.1,
top_p=0.9, top_p=0.9,
max_completion_tokens=32768, max_completion_tokens=32768,
model_name='model', model_name='rednote-hilab/dots.ocr',
): ):
addr = f"http://{ip}:{port}/v1" addr = f"http://{ip}:{port}/v1"