diff --git a/tools/eval_omnidocbench.md b/tools/eval_omnidocbench.md new file mode 100755 index 0000000..6d3a115 --- /dev/null +++ b/tools/eval_omnidocbench.md @@ -0,0 +1,286 @@ +Here is the step-by-step doc to reproduce OmniDocBench benchmark results. + +## 1. Model Env +Here, we use [Docker Image](https://hub.docker.com/r/rednotehilab/dots.ocr) for setup. + +## 2. Model Launch +```shell +# dots.ocr parser env +git clone https://github.com/rednote-hilab/dots.ocr.git +cd dots.ocr +pip install -e . + +# model setup and register +python3 tools/download_model.py +export hf_model_path=./weights/DotsOCR # Path to your downloaded model weights, Please use a directory name without periods (e.g., `DotsOCR` instead of `dots.ocr`) for the model save path. This is a temporary workaround pending our integration with Transformers. +export PYTHONPATH=$(dirname "$hf_model_path"):$PYTHONPATH +sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\ +from DotsOCR import modeling_dots_ocr_vllm' `which vllm` # If you downloaded model weights by yourself, please replace `DotsOCR` by your model saved directory name, and remember to use a directory name without periods (e.g., `DotsOCR` instead of `dots.ocr`) + +# launch vllm server +CUDA_VISIBLE_DEVICES=0 vllm serve ${hf_model_path} --tensor-parallel-size 1 --gpu-memory-utilization 0.95 --chat-template-content-format string --served-model-name model --trust-remote-code +``` + + +## 3. Model Inference + +```python + +from tqdm import tqdm +import json +import argparse +from multiprocessing.pool import ThreadPool, Pool +import shutil +import os + + +if __name__=="__main__": + from dots_ocr import DotsOCRParser + parser = argparse.ArgumentParser( + description="dots.ocr Multilingual Document Layout Parser", + ) + + parser.add_argument( + '--bbox', + type=int, + nargs=4, + metavar=('x1', 'y1', 'x2', 'y2'), + help='should give this argument if you want to prompt_grounding_ocr' + ) + parser.add_argument( + "--ip", type=str, default="localhost", + help="" + ) + parser.add_argument( + "--port", type=int, default=8000, + help="" + ) + parser.add_argument( + "--model_name", type=str, default="model", + help="" + ) + parser.add_argument( + "--temperature", type=float, default=0.1, + help="" + ) + parser.add_argument( + "--top_p", type=float, default=1.0, + help="" + ) + parser.add_argument( + "--dpi", type=int, default=200, + help="" + ) + parser.add_argument( + "--max_completion_tokens", type=int, default=16384, + help="" + ) + parser.add_argument( + "--num_thread", type=int, default=128, + help="" + ) + # parser.add_argument( + # "--fitz_preprocess", type=bool, default=False, + # help="False will use tikz dpi upsample pipeline, good for images which has been render with low dpi, but maybe result in higher computational costs" + # ) + parser.add_argument( + "--min_pixels", type=int, default=None, + help="" + ) + parser.add_argument( + "--max_pixels", type=int, default=None, + help="" + ) + args = parser.parse_args() + + dots_ocr_parser = DotsOCRParser( + ip=args.ip, + port=args.port, + model_name=args.model_name, + temperature=args.temperature, + top_p=args.top_p, + max_completion_tokens=args.max_completion_tokens, + num_thread=args.num_thread, + dpi=args.dpi, + # output_dir=args.output, + min_pixels=args.min_pixels, + max_pixels=args.max_pixels, + ) + + filepath = "/path/to/OmniDocBench.jsonl" # download OmniDocBench datasets from https://huggingface.co/datasets/opendatalab/OmniDocBench, reformat it to input the images into model + with open(filepath, 'r') as f: + list_items = [json.loads(line) for line in f] + + results = [] + output_path = "./output_omni.jsonl" + f_out = open(output_path, 'w') + + tasks = [[item['file_path'], f_out] for item in list_items] + + def _excute(task): + image_path, f_out = task + result = dots_ocr_parser.parse_file( + image_path, + prompt_mode="prompt_layout_all_en", + # prompt_mode="prompt_ocr", + fitz_preprocess=True, + ) + results.append(result) + f_out.write(f"{json.dumps(result, ensure_ascii=False)}\n") + f_out.flush() + + with ThreadPool(128) as pool: + with tqdm(total=len(tasks)) as pbar: + for result in pool.imap(_excute, tasks): + pbar.update(1) + pool.close() + pool.join() + + f_out.close() + + eval_result_save_dir = "./output_omni/" + os.makedirs(eval_result_save_dir, exist_ok=True) + + with open(output_path, "r") as f: + for line in f.readlines(): + item = json.loads(line)[0] + if 'md_content_nohf_path' in item: + file_name = os.path.basename(item['md_content_nohf_path']).replace("_nohf", "") + shutil.copy2(item['md_content_nohf_path'], os.path.join(eval_result_save_dir, file_name)) + else: + shutil.copy2(item['md_content_path'], eval_result_save_dir) + + print(f"md results saved to {eval_result_save_dir}") +``` + + +## 4. Evaluation +We use [OmniDocBench](https://github.com/opendatalab/OmniDocBench) to evaluate the performance. Prepare the omnidocbench env by yourself, follow the official steps. + +```shell +git clone https://github.com/opendatalab/OmniDocBench.git +cd /OmniDocBench + +# Follow https://github.com/opendatalab/OmniDocBench?tab=readme-ov-file#environment-setup-and-running +conda create -n omnidocbench python=3.10 +conda activate omnidocbench +pip install -r requirements.txt + +# Eval. Change the gt&pred path in end2end.yaml to your own, here by our inference steps, prediction data_path set as: /path/to/dots.ocr/output_omni/ +python pdf_validation.py --config ./end2end.yaml > evaluation_output.log + +cat evaluation_output.log +``` + +./end2end.yaml like: +```yaml +end2end_eval: + metrics: + text_block: + metric: + - Edit_dist + display_formula: + metric: + - Edit_dist + - CDM + table: + metric: + - TEDS + - Edit_dist + reading_order: + metric: + - Edit_dist + dataset: + dataset_name: end2end_dataset + ground_truth: + data_path: ./OmniDocBench.json # change to omnidocbench official gt + prediction: + data_path: /path/to/dots.ocr/output_omni/ # change to your own output dir + match_method: quick_match +``` + +Eval results as follow: +```shell +DATASET_REGISTRY: ['recogition_text_dataset', 'omnidocbench_single_module_dataset', 'recogition_formula_dataset', 'recogition_table_dataset', 'end2end_dataset', 'recogition_end2end_base_dataset', 'recogition_end2end_table_dataset', 'detection_dataset', 'detection_dataset_simple_format', 'md2md_dataset'] +METRIC_REGISTRY: ['TEDS', 'BLEU', 'METEOR', 'Edit_dist', 'CDM'] +EVAL_TASK_REGISTRY: ['detection_eval', 'end2end_eval', 'recogition_eval'] +###### Process: _quick_match +【Overall】 +display_formula CDM is not found +display_formula CDM is not found +---------------------------- -------------------- +text_block_Edit_dist_EN 0.031039851583834904 +text_block_Edit_dist_CH 0.0643426705744435 +display_formula_Edit_dist_EN 0.32843522681423176 +display_formula_Edit_dist_CH 0.42557920974720154 +display_formula_CDM_EN - +display_formula_CDM_CH - +table_TEDS_EN 88.91012727754615 +table_TEDS_CH 89.0531009325606 +table_Edit_dist_EN 0.0943878061222165 +table_Edit_dist_CH 0.09173810770062703 +reading_order_Edit_dist_EN 0.04079293927450415 +reading_order_Edit_dist_CH 0.06625588944827145 +overall_EN 0.12366395594869685 +overall_CH 0.16197896936763587 +---------------------------- -------------------- + + +【PDF types】 +-------------------------------- --------- +data_source: book 0.0183191 +data_source: PPT2PDF 0.0470068 +data_source: research_report 0.0107441 +data_source: colorful_textbook 0.0710044 +data_source: exam_paper 0.0763102 +data_source: magazine 0.0278807 +data_source: academic_literature 0.0279 +data_source: note 0.112103 +data_source: newspaper 0.0787516 +ALL 0.055183 +-------------------------------- --------- + + +【Layout】 +Layout Mean Var +--------------------- --------- ---------- +layout: single_column 0.0267498 0.0187775 +layout: double_column 0.0789817 0.0283393 +layout: three_column 0.0738766 0.00154036 +layout: other_layout 0.115941 0.0336075 + + +【Text Attribute】 +-------------------------------------- --------- +text_language: text_english 0.0296053 +text_language: text_simplified_chinese 0.106577 +text_language: text_en_ch_mixed 0.0888106 +text_background: white 0.0880222 +text_background: single_colored 0.0752833 +text_background: multi_colored 0.0723353 +-------------------------------------- --------- + + +【Table Attribute】 +---------------------------------- -------- +language: table_en 0.876843 +language: table_simplified_chinese 0.872133 +language: table_en_ch_mixed 0.941139 +line: full_line 0.895071 +line: less_line 0.897401 +line: fewer_line 0.842987 +line: wireless_line 0.847398 +with_span: True 0.865542 +with_span: False 0.881582 +include_equation: True 0.839543 +include_equation: False 0.884461 +include_background: True 0.886555 +include_background: False 0.8707 +table_layout: vertical 0.884036 +table_layout: horizontal 0.875826 +---------------------------------- -------- + +``` + +> **Notes:** +> - The metrics reported in the README.md are the average of 5 runs. Each run may show a variance of ±0.005 for the overall_EN and overall_ZH scores. \ No newline at end of file