diff --git a/demo/demo_gradio.py b/demo/demo_gradio.py index 3612c79..4035a6e 100755 --- a/demo/demo_gradio.py +++ b/demo/demo_gradio.py @@ -17,6 +17,7 @@ import re from pathlib import Path from PIL import Image import requests +import shutil # Import shutil for cleanup # Local tool imports from dots_ocr.utils import dict_promptmode_to_prompt @@ -50,28 +51,28 @@ dots_parser = DotsOCRParser( max_pixels=DEFAULT_CONFIG['max_pixels'] ) -# Store processing results -processing_results = { - 'original_image': None, - 'processed_image': None, - 'layout_result': None, - 'markdown_content': None, - 'cells_data': None, - 'temp_dir': None, - 'session_id': None, - 'result_paths': None, - 'pdf_results': None # Store multi-page PDF results -} - -# PDF caching mechanism -pdf_cache = { - "images": [], - "current_page": 0, - "total_pages": 0, - "file_type": None, # 'image' or 'pdf' - "is_parsed": False, # Whether it has been parsed - "results": [] # Store parsing results for each page -} +def get_initial_session_state(): + return { + 'processing_results': { + 'original_image': None, + 'processed_image': None, + 'layout_result': None, + 'markdown_content': None, + 'cells_data': None, + 'temp_dir': None, + 'session_id': None, + 'result_paths': None, + 'pdf_results': None + }, + 'pdf_cache': { + "images": [], + "current_page": 0, + "total_pages": 0, + "file_type": None, + "is_parsed": False, + "results": [] + } + } def read_image_v2(img): """Reads an image, supports URLs and local paths""" @@ -87,32 +88,27 @@ def read_image_v2(img): raise ValueError(f"Invalid image type: {type(img)}") return img -def load_file_for_preview(file_path): +def load_file_for_preview(file_path, session_state): """Loads a file for preview, supports PDF and image files""" - global pdf_cache + pdf_cache = session_state['pdf_cache'] if not file_path or not os.path.exists(file_path): - return None, "
0 / 0
" + return None, "
0 / 0
", session_state file_ext = os.path.splitext(file_path)[1].lower() - if file_ext == '.pdf': - try: - # Read PDF and convert to images (one image per page) + try: + if file_ext == '.pdf': pages = load_images_from_pdf(file_path) pdf_cache["file_type"] = "pdf" - except Exception as e: - return None, f"
PDF loading failed: {str(e)}
" - elif file_ext in ['.jpg', '.jpeg', '.png']: - # For image files, read directly as a single-page image - try: + elif file_ext in ['.jpg', '.jpeg', '.png']: image = Image.open(file_path) pages = [image] pdf_cache["file_type"] = "image" - except Exception as e: - return None, f"
Image loading failed: {str(e)}
" - else: - return None, "
Unsupported file format
" + else: + return None, "
Unsupported file format
", session_state + except Exception as e: + return None, f"
PDF loading failed: {str(e)}
", session_state pdf_cache["images"] = pages pdf_cache["current_page"] = 0 @@ -120,14 +116,14 @@ def load_file_for_preview(file_path): pdf_cache["is_parsed"] = False pdf_cache["results"] = [] - return pages[0], f"
1 / {len(pages)}
" + return pages[0], f"
1 / {len(pages)}
", session_state -def turn_page(direction): +def turn_page(direction, session_state): """Page turning function""" - global pdf_cache + pdf_cache = session_state['pdf_cache'] if not pdf_cache["images"]: - return None, "
0 / 0
", "", "" + return None, "
0 / 0
", "", session_state if direction == "prev": pdf_cache["current_page"] = max(0, pdf_cache["current_page"] - 1) @@ -138,27 +134,18 @@ def turn_page(direction): current_image = pdf_cache["images"][index] # Use the original image by default page_info = f"
{index + 1} / {pdf_cache['total_pages']}
" - # If parsed, display the results for the current page - current_md = "" - current_md_raw = "" current_json = "" if pdf_cache["is_parsed"] and index < len(pdf_cache["results"]): result = pdf_cache["results"][index] - if 'md_content' in result: - # Get the raw markdown content - current_md_raw = result['md_content'] - # Process the content after LaTeX rendering - current_md = result['md_content'] if result['md_content'] else "" - if 'cells_data' in result: + if 'cells_data' in result and result['cells_data']: try: current_json = json.dumps(result['cells_data'], ensure_ascii=False, indent=2) except: current_json = str(result.get('cells_data', '')) - # Use the image with layout boxes (if available) if 'layout_image' in result and result['layout_image']: current_image = result['layout_image'] - return current_image, page_info, current_json + return current_image, page_info, current_json, session_state def get_test_images(): """Gets the list of test images""" @@ -169,13 +156,6 @@ def get_test_images(): if name.lower().endswith(('.png', '.jpg', '.jpeg', '.pdf'))] return test_images -def convert_image_to_base64(image): - """Converts a PIL image to base64 encoding""" - buffered = io.BytesIO() - image.save(buffered, format="PNG") - img_str = base64.b64encode(buffered.getvalue()).decode() - return f"data:image/png;base64,{img_str}" - def create_temp_session_dir(): """Creates a unique temporary directory for each processing request""" session_id = uuid.uuid4().hex[:8] @@ -198,7 +178,6 @@ def parse_image_with_high_level_api(parser, image, prompt_mode, fitz_preprocess= # Use the high-level API parse_image filename = f"demo_{session_id}" results = parser.parse_image( - # input_path=temp_image_path, input_path=image, filename=filename, prompt_mode=prompt_mode, @@ -212,46 +191,32 @@ def parse_image_with_high_level_api(parser, image, prompt_mode, fitz_preprocess= result = results[0] # parse_image returns a list with a single result - # Read the result files layout_image = None - cells_data = None - md_content = None - raw_response = None - filtered = False - - # Read the layout image if 'layout_image_path' in result and os.path.exists(result['layout_image_path']): layout_image = Image.open(result['layout_image_path']) - # Read the JSON data + cells_data = None if 'layout_info_path' in result and os.path.exists(result['layout_info_path']): with open(result['layout_info_path'], 'r', encoding='utf-8') as f: cells_data = json.load(f) - # Read the Markdown content + md_content = None if 'md_content_path' in result and os.path.exists(result['md_content_path']): with open(result['md_content_path'], 'r', encoding='utf-8') as f: md_content = f.read() - # Check for the raw response file (when JSON parsing fails) - if 'filtered' in result: - filtered = result['filtered'] - return { 'layout_image': layout_image, 'cells_data': cells_data, 'md_content': md_content, - 'filtered': filtered, + 'filtered': result.get('filtered', False), 'temp_dir': temp_dir, 'session_id': session_id, 'result_paths': result, - 'input_width': result['input_width'], - 'input_height': result['input_height'], + 'input_width': result.get('input_width', 0), + 'input_height': result.get('input_height', 0), } - except Exception as e: - # Clean up the temporary directory on error - import shutil if os.path.exists(temp_dir): shutil.rmtree(temp_dir, ignore_errors=True) raise e @@ -307,17 +272,10 @@ def parse_pdf_with_high_level_api(parser, pdf_path, prompt_mode): page_content = f.read() page_result['md_content'] = page_content all_md_content.append(page_content) - - # Check for the raw response file (when JSON parsing fails) - page_result['filtered'] = False - if 'filtered' in page_result: - page_result['filtered'] = page_result['filtered'] - + page_result['filtered'] = result.get('filtered', False) parsed_results.append(page_result) - # Merge the content of all pages combined_md = "\n\n---\n\n".join(all_md_content) if all_md_content else "" - return { 'parsed_results': parsed_results, 'combined_md_content': combined_md, @@ -328,42 +286,30 @@ def parse_pdf_with_high_level_api(parser, pdf_path, prompt_mode): } except Exception as e: - # Clean up the temporary directory on error - import shutil if os.path.exists(temp_dir): shutil.rmtree(temp_dir, ignore_errors=True) raise e # ==================== Core Processing Function ==================== -def process_image_inference(test_image_input, file_input, +def process_image_inference(session_state, test_image_input, file_input, prompt_mode, server_ip, server_port, min_pixels, max_pixels, fitz_preprocess=False ): """Core function to handle image/PDF inference""" - global current_config, processing_results, dots_parser, pdf_cache + # Use session_state instead of global variables + processing_results = session_state['processing_results'] + pdf_cache = session_state['pdf_cache'] - # First, clean up previous processing results to avoid confusion with the download button if processing_results.get('temp_dir') and os.path.exists(processing_results['temp_dir']): - import shutil try: shutil.rmtree(processing_results['temp_dir'], ignore_errors=True) except Exception as e: print(f"Failed to clean up previous temporary directory: {e}") - # Reset processing results - processing_results = { - 'original_image': None, - 'processed_image': None, - 'layout_result': None, - 'markdown_content': None, - 'cells_data': None, - 'temp_dir': None, - 'session_id': None, - 'result_paths': None, - 'pdf_results': None - } + # Reset processing results for the current session + session_state['processing_results'] = get_initial_session_state()['processing_results'] + processing_results = session_state['processing_results'] - # Update configuration current_config.update({ 'ip': server_ip, 'port_vllm': server_port, @@ -377,294 +323,119 @@ def process_image_inference(test_image_input, file_input, dots_parser.min_pixels = min_pixels dots_parser.max_pixels = max_pixels - # Determine the input source - input_file_path = None - image = None + input_file_path = file_input if file_input else test_image_input - # Prioritize file input (supports PDF) - if file_input is not None: - input_file_path = file_input - file_ext = os.path.splitext(input_file_path)[1].lower() - - if file_ext == '.pdf': - # PDF file processing - try: - return process_pdf_file(input_file_path, prompt_mode) - except Exception as e: - return None, f"PDF processing failed: {e}", "", "", gr.update(value=None), None, "" - elif file_ext in ['.jpg', '.jpeg', '.png']: - # Image file processing - try: - image = Image.open(input_file_path) - except Exception as e: - return None, f"Failed to read image file: {e}", "", "", gr.update(value=None), None, "" + if not input_file_path: + return None, "Please upload image/PDF file or select test image", "", "", gr.update(value=None), None, "", session_state - # If no file input, check the test image input - if image is None: - if test_image_input and test_image_input != "": - file_ext = os.path.splitext(test_image_input)[1].lower() - if file_ext == '.pdf': - return process_pdf_file(test_image_input, prompt_mode) - else: - try: - image = read_image_v2(test_image_input) - except Exception as e: - return None, f"Failed to read test image: {e}", "", "", gr.update(value=None), gr.update(value=None), None, "" - - if image is None: - return None, "Please upload image/PDF file or select test image", "", "", gr.update(value=None), None, "" + file_ext = os.path.splitext(input_file_path)[1].lower() try: - # Clear PDF cache (for image processing) - pdf_cache["images"] = [] - pdf_cache["current_page"] = 0 - pdf_cache["total_pages"] = 0 - pdf_cache["is_parsed"] = False - pdf_cache["results"] = [] - - # Process using the high-level API of DotsOCRParser - original_image = image - parse_result = parse_image_with_high_level_api(dots_parser, image, prompt_mode, fitz_preprocess) - - # Extract parsing results - layout_image = parse_result['layout_image'] - cells_data = parse_result['cells_data'] - md_content = parse_result['md_content'] - filtered = parse_result['filtered'] - - # Handle parsing failure case - if filtered: - # JSON parsing failed, only text content is available - info_text = f""" -**Image Information:** -- Original Size: {original_image.width} x {original_image.height} -- Processing: JSON parsing failed, using cleaned text output -- Server: {current_config['ip']}:{current_config['port_vllm']} -- Session ID: {parse_result['session_id']} - """ + if file_ext == '.pdf': + # MINIMAL CHANGE: The `process_pdf_file` function is now inlined and uses session_state. + preview_image, page_info, session_state = load_file_for_preview(input_file_path, session_state) + pdf_result = parse_pdf_with_high_level_api(dots_parser, input_file_path, prompt_mode) + + session_state['pdf_cache']["is_parsed"] = True + session_state['pdf_cache']["results"] = pdf_result['parsed_results'] - # Store results processing_results.update({ - 'original_image': original_image, - 'processed_image': None, - 'layout_result': None, - 'markdown_content': md_content, - 'cells_data': None, - 'temp_dir': parse_result['temp_dir'], - 'session_id': parse_result['session_id'], + 'markdown_content': pdf_result['combined_md_content'], + 'cells_data': pdf_result['combined_cells_data'], + 'temp_dir': pdf_result['temp_dir'], + 'session_id': pdf_result['session_id'], + 'pdf_results': pdf_result['parsed_results'] + }) + + total_elements = len(pdf_result['combined_cells_data']) + info_text = f"**PDF Information:**\n- Total Pages: {pdf_result['total_pages']}\n- Server: {current_config['ip']}:{current_config['port_vllm']}\n- Total Detected Elements: {total_elements}\n- Session ID: {pdf_result['session_id']}" + + current_page_layout_image = preview_image + current_page_json = "" + if session_state['pdf_cache']["results"]: + first_result = session_state['pdf_cache']["results"][0] + if 'layout_image' in first_result and first_result['layout_image']: + current_page_layout_image = first_result['layout_image'] + if first_result.get('cells_data'): + try: + current_page_json = json.dumps(first_result['cells_data'], ensure_ascii=False, indent=2) + except: + current_page_json = str(first_result['cells_data']) + + download_zip_path = None + if pdf_result['temp_dir']: + download_zip_path = os.path.join(pdf_result['temp_dir'], f"layout_results_{pdf_result['session_id']}.zip") + with zipfile.ZipFile(download_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + for root, _, files in os.walk(pdf_result['temp_dir']): + for file in files: + if not file.endswith('.zip'): zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), pdf_result['temp_dir'])) + + return ( + current_page_layout_image, info_text, pdf_result['combined_md_content'] or "No markdown content generated", + pdf_result['combined_md_content'] or "No markdown content generated", + gr.update(value=download_zip_path, visible=bool(download_zip_path)), page_info, current_page_json, session_state + ) + + else: # Image processing + image = read_image_v2(input_file_path) + session_state['pdf_cache'] = get_initial_session_state()['pdf_cache'] + + original_image = image + parse_result = parse_image_with_high_level_api(dots_parser, image, prompt_mode, fitz_preprocess) + + if parse_result['filtered']: + info_text = f"**Image Information:**\n- Original Size: {original_image.width} x {original_image.height}\n- Processing: JSON parsing failed, using cleaned text output\n- Server: {current_config['ip']}:{current_config['port_vllm']}\n- Session ID: {parse_result['session_id']}" + processing_results.update({ + 'original_image': original_image, 'markdown_content': parse_result['md_content'], + 'temp_dir': parse_result['temp_dir'], 'session_id': parse_result['session_id'], + 'result_paths': parse_result['result_paths'] + }) + return original_image, info_text, parse_result['md_content'], parse_result['md_content'], gr.update(visible=False), None, "", session_state + + md_content_raw = parse_result['md_content'] or "No markdown content generated" + processing_results.update({ + 'original_image': original_image, 'layout_result': parse_result['layout_image'], + 'markdown_content': parse_result['md_content'], 'cells_data': parse_result['cells_data'], + 'temp_dir': parse_result['temp_dir'], 'session_id': parse_result['session_id'], 'result_paths': parse_result['result_paths'] }) + num_elements = len(parse_result['cells_data']) if parse_result['cells_data'] else 0 + info_text = f"**Image Information:**\n- Original Size: {original_image.width} x {original_image.height}\n- Model Input Size: {parse_result['input_width']} x {parse_result['input_height']}\n- Server: {current_config['ip']}:{current_config['port_vllm']}\n- Detected {num_elements} layout elements\n- Session ID: {parse_result['session_id']}" + + current_json = json.dumps(parse_result['cells_data'], ensure_ascii=False, indent=2) if parse_result['cells_data'] else "" + + download_zip_path = None + if parse_result['temp_dir']: + download_zip_path = os.path.join(parse_result['temp_dir'], f"layout_results_{parse_result['session_id']}.zip") + with zipfile.ZipFile(download_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + for root, _, files in os.walk(parse_result['temp_dir']): + for file in files: + if not file.endswith('.zip'): zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), parse_result['temp_dir'])) + return ( - original_image, # No layout image - info_text, - md_content, - md_content, # Display raw markdown text - gr.update(visible=False), # Hide download button - None, # Page info - "" # Current page JSON output + parse_result['layout_image'], info_text, parse_result['md_content'] or "No markdown content generated", + md_content_raw, gr.update(value=download_zip_path, visible=bool(download_zip_path)), + None, current_json, session_state ) - - # JSON parsing successful case - # Save the raw markdown content (before LaTeX processing) - md_content_raw = md_content or "No markdown content generated" - - # Store results - processing_results.update({ - 'original_image': original_image, - 'processed_image': None, # High-level API does not return processed_image - 'layout_result': layout_image, - 'markdown_content': md_content, - 'cells_data': cells_data, - 'temp_dir': parse_result['temp_dir'], - 'session_id': parse_result['session_id'], - 'result_paths': parse_result['result_paths'] - }) - - # Prepare display information - num_elements = len(cells_data) if cells_data else 0 - info_text = f""" -**Image Information:** -- Original Size: {original_image.width} x {original_image.height} -- Model Input Size: {parse_result['input_width']} x {parse_result['input_height']} -- Server: {current_config['ip']}:{current_config['port_vllm']} -- Detected {num_elements} layout elements -- Session ID: {parse_result['session_id']} - """ - - # Current page JSON output - current_json = "" - if cells_data: - try: - current_json = json.dumps(cells_data, ensure_ascii=False, indent=2) - except: - current_json = str(cells_data) - - # Create the download ZIP file - download_zip_path = None - if parse_result['temp_dir']: - download_zip_path = os.path.join(parse_result['temp_dir'], f"layout_results_{parse_result['session_id']}.zip") - try: - with zipfile.ZipFile(download_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: - for root, dirs, files in os.walk(parse_result['temp_dir']): - for file in files: - if file.endswith('.zip'): - continue - file_path = os.path.join(root, file) - arcname = os.path.relpath(file_path, parse_result['temp_dir']) - zipf.write(file_path, arcname) - except Exception as e: - print(f"Failed to create download ZIP: {e}") - download_zip_path = None - - return ( - layout_image, - info_text, - md_content or "No markdown content generated", - md_content_raw, # Raw markdown text - gr.update(value=download_zip_path, visible=True) if download_zip_path else gr.update(visible=False), # Set the download file - None, # Page info (not displayed for image processing) - current_json # Current page JSON - ) - except Exception as e: - return None, f"Error during processing: {e}", "", "", gr.update(value=None), None, "" + import traceback + traceback.print_exc() + return None, f"Error during processing: {e}", "", "", gr.update(value=None), None, "", session_state -def process_pdf_file(pdf_path, prompt_mode): - """Dedicated function for processing PDF files""" - global pdf_cache, processing_results, dots_parser - - try: - # First, load the PDF for preview - preview_image, page_info = load_file_for_preview(pdf_path) - - # Parse the PDF using DotsOCRParser - pdf_result = parse_pdf_with_high_level_api(dots_parser, pdf_path, prompt_mode) - - # Update the PDF cache - pdf_cache["is_parsed"] = True - pdf_cache["results"] = pdf_result['parsed_results'] - - # Handle LaTeX table rendering - combined_md = pdf_result['combined_md_content'] - combined_md_raw = combined_md or "No markdown content generated" # Save the raw content - - # Store results - processing_results.update({ - 'original_image': None, - 'processed_image': None, - 'layout_result': None, - 'markdown_content': combined_md, - 'cells_data': pdf_result['combined_cells_data'], - 'temp_dir': pdf_result['temp_dir'], - 'session_id': pdf_result['session_id'], - 'result_paths': None, - 'pdf_results': pdf_result['parsed_results'] - }) - - # Prepare display information - total_elements = len(pdf_result['combined_cells_data']) - info_text = f""" -**PDF Information:** -- Total Pages: {pdf_result['total_pages']} -- Server: {current_config['ip']}:{current_config['port_vllm']} -- Total Detected Elements: {total_elements} -- Session ID: {pdf_result['session_id']} - """ - - # Content of the current page (first page) - current_page_md = "" - current_page_md_raw = "" - current_page_json = "" - current_page_layout_image = preview_image # Use the original preview image by default - - if pdf_cache["results"] and len(pdf_cache["results"]) > 0: - current_result = pdf_cache["results"][0] - if current_result['md_content']: - # Raw markdown content - current_page_md_raw = current_result['md_content'] - # Process the content after LaTeX rendering - - current_page_md = current_result['md_content'] - if current_result['cells_data']: - try: - current_page_json = json.dumps(current_result['cells_data'], ensure_ascii=False, indent=2) - except: - current_page_json = str(current_result['cells_data']) - # Use the image with layout boxes (if available) - if 'layout_image' in current_result and current_result['layout_image']: - current_page_layout_image = current_result['layout_image'] - - # Create the download ZIP file - download_zip_path = None - if pdf_result['temp_dir']: - download_zip_path = os.path.join(pdf_result['temp_dir'], f"layout_results_{pdf_result['session_id']}.zip") - try: - with zipfile.ZipFile(download_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: - for root, dirs, files in os.walk(pdf_result['temp_dir']): - for file in files: - if file.endswith('.zip'): - continue - file_path = os.path.join(root, file) - arcname = os.path.relpath(file_path, pdf_result['temp_dir']) - zipf.write(file_path, arcname) - except Exception as e: - print(f"Failed to create download ZIP: {e}") - download_zip_path = None - - return ( - current_page_layout_image, # Use the image with layout boxes - info_text, - combined_md or "No markdown content generated", # Display the markdown for the entire PDF - combined_md_raw or "No markdown content generated", # Display the raw markdown for the entire PDF - gr.update(value=download_zip_path, visible=True) if download_zip_path else gr.update(visible=False), # Set the download file - page_info, - current_page_json - ) - - except Exception as e: - # Reset the PDF cache - pdf_cache["images"] = [] - pdf_cache["current_page"] = 0 - pdf_cache["total_pages"] = 0 - pdf_cache["is_parsed"] = False - pdf_cache["results"] = [] - raise e - -def clear_all_data(): +# MINIMAL CHANGE: Functions now take `session_state` as an argument. +def clear_all_data(session_state): """Clears all data""" - global processing_results, pdf_cache + processing_results = session_state['processing_results'] - # Clean up the temporary directory if processing_results.get('temp_dir') and os.path.exists(processing_results['temp_dir']): - import shutil try: shutil.rmtree(processing_results['temp_dir'], ignore_errors=True) except Exception as e: print(f"Failed to clean up temporary directory: {e}") - # Reset processing results - processing_results = { - 'original_image': None, - 'processed_image': None, - 'layout_result': None, - 'markdown_content': None, - 'cells_data': None, - 'temp_dir': None, - 'session_id': None, - 'result_paths': None, - 'pdf_results': None - } - - # Reset the PDF cache - pdf_cache = { - "images": [], - "current_page": 0, - "total_pages": 0, - "file_type": None, - "is_parsed": False, - "results": [] - } + # Reset the session state by returning a new initial state + new_session_state = get_initial_session_state() return ( None, # Clear file input @@ -675,7 +446,8 @@ def clear_all_data(): "🕐 Waiting for parsing result...", # Clear raw Markdown text gr.update(visible=False), # Hide download button "
0 / 0
", # Reset page info - "🕐 Waiting for parsing result..." # Clear current page JSON + "🕐 Waiting for parsing result...", # Clear current page JSON + new_session_state ) def update_prompt_display(prompt_mode): @@ -746,6 +518,7 @@ def create_gradio_interface(): """ with gr.Blocks(theme="ocean", css=css, title='dots.ocr') as demo: + session_state = gr.State(value=get_initial_session_state()) # Title gr.HTML(""" @@ -779,7 +552,6 @@ def create_gradio_interface(): label="Select Prompt", choices=["prompt_layout_all_en", "prompt_layout_only_en", "prompt_ocr"], value="prompt_layout_all_en", - show_label=True ) # Display current prompt content @@ -844,11 +616,10 @@ def create_gradio_interface(): with gr.TabItem("Markdown Render Preview"): md_output = gr.Markdown( "## Please click the parse button to parse or select for single-task recognition...", - label="Markdown Preview", max_height=600, latex_delimiters=[ {"left": "$$", "right": "$$", "display": True}, - {"left": "$", "right": "$", "display": False}, + {"left": "$", "right": "$", "display": False} ], show_copy_button=False, elem_id="markdown_output" @@ -888,61 +659,68 @@ def create_gradio_interface(): fn=update_prompt_display, inputs=prompt_mode, outputs=prompt_display, - show_progress=False ) # Show preview on file upload file_input.upload( + # fn=lambda file_data, state: load_file_for_preview(file_data, state), fn=load_file_for_preview, - inputs=file_input, - outputs=[result_image, page_info], - show_progress=False + inputs=[file_input, session_state], + outputs=[result_image, page_info, session_state] ) - # Page navigation + # Also handle test image selection + test_image_input.change( + # fn=lambda path, state: load_file_for_preview(path, state), + fn=load_file_for_preview, + inputs=[test_image_input, session_state], + outputs=[result_image, page_info, session_state] + ) + prev_btn.click( - fn=lambda: turn_page("prev"), - outputs=[result_image, page_info, current_page_json], - show_progress=False + fn=lambda s: turn_page("prev", s), + inputs=[session_state], + outputs=[result_image, page_info, current_page_json, session_state] ) next_btn.click( - fn=lambda: turn_page("next"), - outputs=[result_image, page_info, current_page_json], - show_progress=False + fn=lambda s: turn_page("next", s), + inputs=[session_state], + outputs=[result_image, page_info, current_page_json, session_state] ) process_btn.click( fn=process_image_inference, inputs=[ - test_image_input, file_input, + session_state, test_image_input, file_input, prompt_mode, server_ip, server_port, min_pixels, max_pixels, fitz_preprocess ], outputs=[ result_image, info_display, md_output, md_raw_output, - download_btn, page_info, current_page_json - ], - show_progress=True + download_btn, page_info, current_page_json, session_state + ] ) clear_btn.click( fn=clear_all_data, + inputs=[session_state], outputs=[ file_input, test_image_input, result_image, info_display, md_output, md_raw_output, - download_btn, page_info, current_page_json - ], - show_progress=False + download_btn, page_info, current_page_json, session_state + ] ) return demo # ==================== Main Program ==================== if __name__ == "__main__": + import sys + port = int(sys.argv[1]) demo = create_gradio_interface() demo.queue().launch( server_name="0.0.0.0", - server_port=7860, + server_port=port, debug=True )