@@ -1024,23 +1024,13 @@ python3 tools/download_model.py --type modelscope
|
|||||||
|
|
||||||
## 2. Deployment
|
## 2. Deployment
|
||||||
### vLLM inference
|
### vLLM inference
|
||||||
We highly recommend using vllm for deployment and inference. All of our evaluations results are based on vllm version 0.9.1.
|
We highly recommend using vLLM for deployment and inference. All of our evaluations results are based on vLLM 0.9.1 via out-of-tree model registration. Since vLLM version 0.11.0, Dots OCR has been officially integrated into vLLM and you can use vLLM docker image directly (e.g, `vllm/vllm-openai:v0.11.0`) to deploy the model server.
|
||||||
The [Docker Image](https://hub.docker.com/r/rednotehilab/dots.ocr) is based on the official vllm image. You can also follow [Dockerfile](https://github.com/rednote-hilab/dots.ocr/blob/master/docker/Dockerfile) to build the deployment environment by yourself.
|
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
# You need to register model to vllm at first
|
# Launch vLLM model server
|
||||||
python3 tools/download_model.py
|
vllm serve rednote-hilab/dots.ocr --trust-remote-code --async-scheduling
|
||||||
export hf_model_path=./weights/DotsOCR # Path to your downloaded model weights, Please use a directory name without periods (e.g., `DotsOCR` instead of `dots.ocr`) for the model save path. This is a temporary workaround pending our integration with Transformers.
|
|
||||||
export PYTHONPATH=$(dirname "$hf_model_path"):$PYTHONPATH
|
|
||||||
sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
|
|
||||||
from DotsOCR import modeling_dots_ocr_vllm' `which vllm` # If you downloaded model weights by yourself, please replace `DotsOCR` by your model saved directory name, and remember to use a directory name without periods (e.g., `DotsOCR` instead of `dots.ocr`)
|
|
||||||
|
|
||||||
# launch vllm server
|
# vllm API Demo
|
||||||
CUDA_VISIBLE_DEVICES=0 vllm serve ${hf_model_path} --tensor-parallel-size 1 --gpu-memory-utilization 0.95 --chat-template-content-format string --served-model-name model --trust-remote-code
|
|
||||||
|
|
||||||
# If you get a ModuleNotFoundError: No module named 'DotsOCR', please check the note above on the saved model directory name.
|
|
||||||
|
|
||||||
# vllm api demo
|
|
||||||
python3 ./demo/demo_vllm.py --prompt_mode prompt_layout_all_en
|
python3 ./demo/demo_vllm.py --prompt_mode prompt_layout_all_en
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
+1
-4
@@ -1,11 +1,8 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import os
|
|
||||||
|
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from transformers.utils.versions import require_version
|
from transformers.utils.versions import require_version
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import io
|
|
||||||
import base64
|
|
||||||
from dots_ocr.utils import dict_promptmode_to_prompt
|
from dots_ocr.utils import dict_promptmode_to_prompt
|
||||||
from dots_ocr.model.inference import inference_with_vllm
|
from dots_ocr.model.inference import inference_with_vllm
|
||||||
|
|
||||||
@@ -13,7 +10,7 @@ from dots_ocr.model.inference import inference_with_vllm
|
|||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--ip", type=str, default="localhost")
|
parser.add_argument("--ip", type=str, default="localhost")
|
||||||
parser.add_argument("--port", type=str, default="8000")
|
parser.add_argument("--port", type=str, default="8000")
|
||||||
parser.add_argument("--model_name", type=str, default="model")
|
parser.add_argument("--model_name", type=str, default="rednote-hilab/dots.ocr")
|
||||||
parser.add_argument("--prompt_mode", type=str, default="prompt_layout_all_en")
|
parser.add_argument("--prompt_mode", type=str, default="prompt_layout_all_en")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ def inference_with_vllm(
|
|||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
top_p=0.9,
|
top_p=0.9,
|
||||||
max_completion_tokens=32768,
|
max_completion_tokens=32768,
|
||||||
model_name='model',
|
model_name='rednote-hilab/dots.ocr',
|
||||||
):
|
):
|
||||||
|
|
||||||
addr = f"http://{ip}:{port}/v1"
|
addr = f"http://{ip}:{port}/v1"
|
||||||
|
|||||||
Reference in New Issue
Block a user