diff --git a/README.md b/README.md index 2753219..3812c76 100755 --- a/README.md +++ b/README.md @@ -1139,7 +1139,7 @@ print(output_text) # Parse a single image python3 dots_ocr/parser.py demo/demo_image1.jpg # Parse a single PDF -python3 dots_ocr/parser.py demo/demo_pdf1.pdf --num_threads 64 # try bigger num_threads for pdf with a large number of pages +python3 dots_ocr/parser.py demo/demo_pdf1.pdf --num_thread 64 # try bigger num_threads for pdf with a large number of pages # Layout detection only python3 dots_ocr/parser.py demo/demo_image1.jpg --prompt prompt_layout_only_en diff --git a/assets/wechat.jpg b/assets/wechat.jpg index 6aecef3..b905d3c 100644 Binary files a/assets/wechat.jpg and b/assets/wechat.jpg differ diff --git a/dots_ocr/parser.py b/dots_ocr/parser.py index e351e87..1f1a1d8 100755 --- a/dots_ocr/parser.py +++ b/dots_ocr/parser.py @@ -190,7 +190,7 @@ class DotsOCRParser: def parse_pdf(self, input_path, filename, prompt_mode, save_dir): print(f"loading pdf: {input_path}") - images_origin = load_images_from_pdf(input_path) + images_origin = load_images_from_pdf(input_path, dpi=self.dpi) total_pages = len(images_origin) tasks = [ { diff --git a/requirements.txt b/requirements.txt index a327bb4..7eed6f1 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -# 生产环境依赖 # streamlit gradio gradio_image_annotation