From 37b62d9b7c08457e0943c5d9703bf7f91cc6bb3e Mon Sep 17 00:00:00 2001 From: openmartin Date: Tue, 5 Aug 2025 15:12:02 +0800 Subject: [PATCH] Update parser.py open file with encoding to support windows --- dots_ocr/parser.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dots_ocr/parser.py b/dots_ocr/parser.py index 1f1a1d8..8f78efb 100755 --- a/dots_ocr/parser.py +++ b/dots_ocr/parser.py @@ -116,7 +116,7 @@ class DotsOCRParser: ) if filtered and prompt_mode != 'prompt_layout_only_en': # model output json failed, use filtered process json_file_path = os.path.join(save_dir, f"{save_name}.json") - with open(json_file_path, 'w') as w: + with open(json_file_path, 'w', encoding="utf-8") as w: json.dump(response, w, ensure_ascii=False) image_layout_path = os.path.join(save_dir, f"{save_name}.jpg") @@ -143,7 +143,7 @@ class DotsOCRParser: image_with_layout = origin_image json_file_path = os.path.join(save_dir, f"{save_name}.json") - with open(json_file_path, 'w') as w: + with open(json_file_path, 'w', encoding="utf-8") as w: json.dump(cells, w, ensure_ascii=False) image_layout_path = os.path.join(save_dir, f"{save_name}.jpg") @@ -242,7 +242,7 @@ class DotsOCRParser: raise ValueError(f"file extension {file_ext} not supported, supported extensions are {image_extensions} and pdf") print(f"Parsing finished, results saving to {save_dir}") - with open(os.path.join(output_dir, os.path.basename(filename)+'.jsonl'), 'w') as w: + with open(os.path.join(output_dir, os.path.basename(filename)+'.jsonl'), 'w', encoding="utf-8") as w: for result in results: w.write(json.dumps(result, ensure_ascii=False) + '\n') @@ -346,4 +346,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main()