From f60838876bc712bdd1885139b43d21751e478321 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Tue, 30 Sep 2025 23:04:53 -0700 Subject: [PATCH 1/8] cleanup Signed-off-by: Roger Wang --- README.md | 18 ++++-------------- demo/demo_vllm.py | 5 +---- dots_ocr/model/inference.py | 2 +- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index e9ae2f9..56b96f8 100755 --- a/README.md +++ b/README.md @@ -1024,23 +1024,13 @@ python3 tools/download_model.py --type modelscope ## 2. Deployment ### vLLM inference -We highly recommend using vllm for deployment and inference. All of our evaluations results are based on vllm version 0.9.1. -The [Docker Image](https://hub.docker.com/r/rednotehilab/dots.ocr) is based on the official vllm image. You can also follow [Dockerfile](https://github.com/rednote-hilab/dots.ocr/blob/master/docker/Dockerfile) to build the deployment environment by yourself. +We highly recommend using vLLM for deployment and inference. All of our evaluations results are based on vLLM 0.9.1 via out-of-tree model registration. Since vLLM version 0.11.0, Dots OCR has been officially integrated into vLLM and you can use vLLM docker image directly (e.g, `vllm/vllm-openai:v0.11.0`) to deploy the model server. ```shell -# You need to register model to vllm at first -python3 tools/download_model.py -export hf_model_path=./weights/DotsOCR # Path to your downloaded model weights, Please use a directory name without periods (e.g., `DotsOCR` instead of `dots.ocr`) for the model save path. This is a temporary workaround pending our integration with Transformers. -export PYTHONPATH=$(dirname "$hf_model_path"):$PYTHONPATH -sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\ -from DotsOCR import modeling_dots_ocr_vllm' `which vllm` # If you downloaded model weights by yourself, please replace `DotsOCR` by your model saved directory name, and remember to use a directory name without periods (e.g., `DotsOCR` instead of `dots.ocr`) +# Launch vLLM model server +vllm serve rednote-hilab/dots.ocr --trust-remote-code --async-scheduling -# launch vllm server -CUDA_VISIBLE_DEVICES=0 vllm serve ${hf_model_path} --tensor-parallel-size 1 --gpu-memory-utilization 0.95 --chat-template-content-format string --served-model-name model --trust-remote-code - -# If you get a ModuleNotFoundError: No module named 'DotsOCR', please check the note above on the saved model directory name. - -# vllm api demo +# vllm API Demo python3 ./demo/demo_vllm.py --prompt_mode prompt_layout_all_en ``` diff --git a/demo/demo_vllm.py b/demo/demo_vllm.py index d924adc..166c521 100755 --- a/demo/demo_vllm.py +++ b/demo/demo_vllm.py @@ -1,11 +1,8 @@ import argparse -import os from openai import OpenAI from transformers.utils.versions import require_version from PIL import Image -import io -import base64 from dots_ocr.utils import dict_promptmode_to_prompt from dots_ocr.model.inference import inference_with_vllm @@ -13,7 +10,7 @@ from dots_ocr.model.inference import inference_with_vllm parser = argparse.ArgumentParser() parser.add_argument("--ip", type=str, default="localhost") parser.add_argument("--port", type=str, default="8000") -parser.add_argument("--model_name", type=str, default="model") +parser.add_argument("--model_name", type=str, default="rednote-hilab/dots.ocr") parser.add_argument("--prompt_mode", type=str, default="prompt_layout_all_en") args = parser.parse_args() diff --git a/dots_ocr/model/inference.py b/dots_ocr/model/inference.py index 16e4ece..e482d72 100755 --- a/dots_ocr/model/inference.py +++ b/dots_ocr/model/inference.py @@ -12,7 +12,7 @@ def inference_with_vllm( temperature=0.1, top_p=0.9, max_completion_tokens=32768, - model_name='model', + model_name='rednote-hilab/dots.ocr', ): addr = f"http://{ip}:{port}/v1" From 18d5526dd957769f73fecb9f41929036a632211f Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Tue, 30 Sep 2025 23:06:30 -0700 Subject: [PATCH 2/8] cleanup Signed-off-by: Roger Wang --- docker/Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 0a9b969..9857994 100755 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,6 @@ +# Dots OCR has been officially integrated into vLLM since v0.11.0 +# Below is the dockerfile for out-of-tree model registration support based v0.9.1 from vllm/vllm-openai:v0.9.1 RUN pip3 install flash_attn==2.8.0.post2 -RUN pip3 install transformers==4.51.3 \ No newline at end of file +RUN pip3 install transformers==4.51.3 From e7a1f4aa085a83ac18668e16a10afbff8a8c5967 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Tue, 30 Sep 2025 23:07:48 -0700 Subject: [PATCH 3/8] update Signed-off-by: Roger Wang --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 56b96f8..918274c 100755 --- a/README.md +++ b/README.md @@ -1024,7 +1024,7 @@ python3 tools/download_model.py --type modelscope ## 2. Deployment ### vLLM inference -We highly recommend using vLLM for deployment and inference. All of our evaluations results are based on vLLM 0.9.1 via out-of-tree model registration. Since vLLM version 0.11.0, Dots OCR has been officially integrated into vLLM and you can use vLLM docker image directly (e.g, `vllm/vllm-openai:v0.11.0`) to deploy the model server. +We highly recommend using vLLM for deployment and inference. All of our evaluations results are based on vLLM 0.9.1 via out-of-tree model registration. **Since vLLM version 0.11.0, Dots OCR has been officially integrated into vLLM** and you can use vLLM docker image directly (e.g, `vllm/vllm-openai:v0.11.0`) to deploy the model server. ```shell # Launch vLLM model server From 96f8d51d754f272a9c5ebeebcf774b87feeed922 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Tue, 30 Sep 2025 23:12:37 -0700 Subject: [PATCH 4/8] update Signed-off-by: Roger Wang --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 918274c..82a5997 100755 --- a/README.md +++ b/README.md @@ -1030,7 +1030,8 @@ We highly recommend using vLLM for deployment and inference. All of our evaluati # Launch vLLM model server vllm serve rednote-hilab/dots.ocr --trust-remote-code --async-scheduling -# vllm API Demo +# vLLM API Demo +# See demo/demo_vllm.py for details on parameter and prompt settings that help achieve the best output quality. python3 ./demo/demo_vllm.py --prompt_mode prompt_layout_all_en ``` From 7d9af595f5bb68093f601e900deeaf772028a289 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Tue, 30 Sep 2025 23:13:55 -0700 Subject: [PATCH 5/8] typo Signed-off-by: Roger Wang --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 9857994..fc5ee2b 100755 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,5 +1,5 @@ # Dots OCR has been officially integrated into vLLM since v0.11.0 -# Below is the dockerfile for out-of-tree model registration support based v0.9.1 +# Below is the dockerfile for out-of-tree model registration support based on v0.9.1 from vllm/vllm-openai:v0.9.1 RUN pip3 install flash_attn==2.8.0.post2 From 1fda6f1ba7359a49e98bd84993d5701b1fcc17ec Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Tue, 30 Sep 2025 23:16:12 -0700 Subject: [PATCH 6/8] add Signed-off-by: Roger Wang --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 82a5997..eadb931 100755 --- a/README.md +++ b/README.md @@ -1028,7 +1028,7 @@ We highly recommend using vLLM for deployment and inference. All of our evaluati ```shell # Launch vLLM model server -vllm serve rednote-hilab/dots.ocr --trust-remote-code --async-scheduling +vllm serve rednote-hilab/dots.ocr --trust-remote-code --async-scheduling --gpu-memory-utilization 0.95 # vLLM API Demo # See demo/demo_vllm.py for details on parameter and prompt settings that help achieve the best output quality. From c445ffb5126b5213063c296fe3c066841d1a62ce Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Tue, 30 Sep 2025 23:28:43 -0700 Subject: [PATCH 7/8] update Signed-off-by: Roger Wang --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index eadb931..a71d05d 100755 --- a/README.md +++ b/README.md @@ -1024,7 +1024,7 @@ python3 tools/download_model.py --type modelscope ## 2. Deployment ### vLLM inference -We highly recommend using vLLM for deployment and inference. All of our evaluations results are based on vLLM 0.9.1 via out-of-tree model registration. **Since vLLM version 0.11.0, Dots OCR has been officially integrated into vLLM** and you can use vLLM docker image directly (e.g, `vllm/vllm-openai:v0.11.0`) to deploy the model server. +We highly recommend using vLLM for deployment and inference. All of our evaluations results are based on vLLM 0.9.1 via out-of-tree model registration. **Since vLLM version 0.11.0, Dots OCR has been officially integrated into vLLM with verified performance** and you can use vLLM docker image directly (e.g, `vllm/vllm-openai:v0.11.0`) to deploy the model server. ```shell # Launch vLLM model server From 37a0b4d5c050f20a66cbf88438c30a2cacf6f078 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Wed, 1 Oct 2025 00:46:49 -0700 Subject: [PATCH 8/8] update Signed-off-by: Roger Wang --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a71d05d..1f73ff6 100755 --- a/README.md +++ b/README.md @@ -1031,7 +1031,8 @@ We highly recommend using vLLM for deployment and inference. All of our evaluati vllm serve rednote-hilab/dots.ocr --trust-remote-code --async-scheduling --gpu-memory-utilization 0.95 # vLLM API Demo -# See demo/demo_vllm.py for details on parameter and prompt settings that help achieve the best output quality. +# See dots_ocr/model/inference.py for details on parameter and prompt settings +# that help achieve the best output quality. python3 ./demo/demo_vllm.py --prompt_mode prompt_layout_all_en ```