2787 lines
114 KiB
Python
2787 lines
114 KiB
Python
import os
|
||
import io
|
||
import uuid
|
||
import json
|
||
import zipfile
|
||
import tempfile
|
||
import threading
|
||
import queue
|
||
import shutil
|
||
from pathlib import Path
|
||
from PIL import Image
|
||
import requests
|
||
import gradio as gr
|
||
import re
|
||
import math
|
||
import datetime
|
||
|
||
# Local project imports (assumed available)
|
||
from dots_ocr.utils import dict_promptmode_to_prompt
|
||
from dots_ocr.utils.consts import MIN_PIXELS, MAX_PIXELS
|
||
from dots_ocr.utils.demo_utils.display import read_image
|
||
from dots_ocr.parser import DotsOCRParser
|
||
|
||
# ---------------- Config & globals ----------------
|
||
DEFAULT_CONFIG = {
|
||
"ip": "127.0.0.1",
|
||
"port_vllm": 8000,
|
||
"min_pixels": MIN_PIXELS,
|
||
"max_pixels": MAX_PIXELS,
|
||
}
|
||
|
||
# Absolute constraints discovered from runtime:
|
||
ABS_MIN_PIXELS = 3136
|
||
ABS_MAX_PIXELS = 11289600
|
||
|
||
current_config = DEFAULT_CONFIG.copy()
|
||
|
||
# default parser instance (can be overridden per-task)
|
||
dots_parser = DotsOCRParser(
|
||
ip=DEFAULT_CONFIG["ip"],
|
||
port=DEFAULT_CONFIG["port_vllm"],
|
||
dpi=200,
|
||
min_pixels=DEFAULT_CONFIG["min_pixels"],
|
||
max_pixels=DEFAULT_CONFIG["max_pixels"],
|
||
)
|
||
|
||
RESULTS_CACHE = {} # rid -> result dict or placeholder
|
||
TASK_QUEUE = queue.Queue()
|
||
# Worker pool for background processing (adjustable via UI)
|
||
WORKER_THREADS = []
|
||
MAX_CONCURRENCY = 6
|
||
THREAD_LOCK = threading.Lock()
|
||
RETRY_COUNTS = {} # rid -> attempts
|
||
MAX_AUTO_RETRIES = 5
|
||
RETRY_BACKOFF_BASE = 1.7
|
||
DEFAULT_SCRIPT_TEMPLATE = """# 高级脚本使用说明
|
||
# 提供对象: api
|
||
# 日志: 使用 print(...) 或 debug(...) 输出到下方“脚本日志”实时区域。
|
||
# api.get_ids() -> [rid,...] 按当前 UI 顺序返回
|
||
# api.get_status(rid) -> {'status','ui': {'tab','nohf','source'}, 'filtered': bool, 'input_width': int, 'input_height': int}
|
||
# api.get_texts(rid) -> {
|
||
# 'md': 原始 Markdown, 'md_nohf': 原始 NOHF Markdown, 'json': 原始 JSON,
|
||
# 'md_edit': 编辑版 Markdown 或 None, 'md_nohf_edit': 编辑版 NOHF Markdown 或 None, 'json_edit': 编辑版 JSON 或 None
|
||
# }
|
||
# api.choose_texts(rid, prefer_ui=True, prefer_edit=True, prefer_nohf=None) -> {'md','json'}
|
||
# - prefer_ui: True 时根据当前 UI 的 NOHF/来源选择内容
|
||
# - prefer_edit: True 时优先用编辑内容(若存在)
|
||
# - prefer_nohf: 显式指定是否使用 NOHF(覆盖 UI),None 表示跟随 UI
|
||
# api.list_paths(rid) -> {
|
||
# 'temp_dir': str, 'session_id': str,
|
||
# 'result': {'md':path,'md_nohf':path,'json':path,'layout':path or None,'image':path or None},
|
||
# 'edited': {'md':path or None,'md_nohf':path or None,'json':path or None}
|
||
# }
|
||
# api.path_exists(path) -> bool 判断路径是否存在
|
||
# api.build_export(name='custom') -> ExportBuilder
|
||
# ExportBuilder:
|
||
# .add_text('dir/file.md', '...') 写入文本
|
||
# .add_bytes('bin/data.bin', b'...') 写入二进制
|
||
# .add_file('/abs/path/file.md', 'dir/file.md') 拷贝已有文件
|
||
# .mkdir('subdir/') 创建目录
|
||
# .finalize() -> zip_path 打包为 zip 并返回路径
|
||
#
|
||
# 约定: 定义 main(api) 并返回以下之一:
|
||
# - ExportBuilder 实例(将自动 finalize)
|
||
# - 目录路径或文件路径(目录将被打包为 zip)
|
||
# - None(若存在变量 export=ExportBuilder,将自动 finalize)
|
||
#
|
||
# 示例:按 UI 所见优先使用“编辑源码”与 NOHF,导出每个结果的 md/json,同时附带原始与编辑文件
|
||
def main(api):
|
||
ids = api.get_ids()
|
||
eb = api.build_export('custom_export')
|
||
for i, rid in enumerate(ids, start=1):
|
||
st = api.get_status(rid)
|
||
if st['status'] != 'done':
|
||
continue
|
||
choice = api.choose_texts(rid, prefer_ui=True, prefer_edit=True)
|
||
eb.add_text(f'result_{i}_{rid}/content.md', choice['md'] or '')
|
||
eb.add_text(f'result_{i}_{rid}/data.json', choice['json'] or '{}')
|
||
paths = api.list_paths(rid)
|
||
# 附带原始文件
|
||
for p in (paths.get('result') or {}).values():
|
||
if p and api.path_exists(p):
|
||
name = Path(p).name
|
||
eb.add_file(p, f'result_{i}_{rid}/raw/{name}')
|
||
# 附带编辑文件
|
||
for p in (paths.get('edited') or {}).values():
|
||
if p and api.path_exists(p):
|
||
name = Path(p).name
|
||
eb.add_file(p, f'result_{i}_{rid}/edited/{name}')
|
||
return eb
|
||
"""
|
||
|
||
|
||
# ---------------- Helpers ----------------
|
||
def read_image_v2(img):
|
||
"""Read image from URL or local path / PIL.Image. Supports file paths and URLs."""
|
||
if isinstance(img, Image.Image):
|
||
return img
|
||
if isinstance(img, str) and img.startswith(("http://", "https://")):
|
||
with requests.get(img, stream=True) as r:
|
||
r.raise_for_status()
|
||
return Image.open(io.BytesIO(r.content)).convert("RGB")
|
||
if isinstance(img, str) and os.path.exists(img):
|
||
return Image.open(img).convert("RGB")
|
||
try:
|
||
img_res = read_image(img, use_native=True)
|
||
if isinstance(img_res, tuple) and isinstance(img_res[0], Image.Image):
|
||
return img_res[0]
|
||
except Exception:
|
||
pass
|
||
raise ValueError(f"Unsupported image input: {type(img)} / {repr(img)[:200]}")
|
||
|
||
|
||
def create_temp_session_dir():
|
||
session_id = uuid.uuid4().hex[:8]
|
||
temp_dir = os.path.join(tempfile.gettempdir(), f"dots_ocr_demo_{session_id}")
|
||
os.makedirs(temp_dir, exist_ok=True)
|
||
return temp_dir, session_id
|
||
|
||
|
||
def classify_parse_failure(exc, min_p, max_p):
|
||
"""Return a user-friendly error message for known failure causes."""
|
||
msg = str(exc)
|
||
reasons = []
|
||
# Absolute & semantic constraints
|
||
if min_p < ABS_MIN_PIXELS:
|
||
reasons.append(
|
||
f"Min Pixels 过小:{min_p},必须 >= {ABS_MIN_PIXELS}。建议提高 Min Pixels。"
|
||
)
|
||
if max_p > ABS_MAX_PIXELS:
|
||
reasons.append(
|
||
f"Max Pixels 过大:{max_p},必须 <= {ABS_MAX_PIXELS}。建议降低 Max Pixels。"
|
||
)
|
||
if min_p >= max_p:
|
||
reasons.append(
|
||
f"像素参数不合法:Min Pixels({min_p}) >= Max Pixels({max_p}),必须满足 Min Pixels < Max Pixels。"
|
||
)
|
||
|
||
lower = msg.lower()
|
||
if "no results returned from parser" in lower or "no results returned" in lower:
|
||
reasons.append(
|
||
"解析未返回结果。可能原因:图像过小、Min Pixels 设置过小或过滤过强。"
|
||
f"建议:Min Pixels >= {ABS_MIN_PIXELS} 且 Max Pixels <= {ABS_MAX_PIXELS}。"
|
||
)
|
||
if "failed to read input" in lower or "cannot identify image file" in lower:
|
||
reasons.append("无法读取输入文件,请确认文件是否为有效图片或PDF。")
|
||
if ("connection" in lower and "refused" in lower) or ("connectionerror" in lower):
|
||
reasons.append("无法连接后端推理服务,请检查 Server IP/Port 与服务状态。")
|
||
|
||
if not reasons:
|
||
reasons.append(f"未知错误:{msg}")
|
||
|
||
detail = "\n".join(f"- {r}" for r in reasons)
|
||
cfg = f"(当前参数:min_pixels={min_p}, max_pixels={max_p})"
|
||
return f"解析失败:\n{detail}\n{cfg}"
|
||
|
||
|
||
def _is_transient_backend_error(exc: Exception):
|
||
lower = str(exc).lower()
|
||
# Common signals: connection refused/reset, timeout, gateway, service unavailable
|
||
keywords = [
|
||
"connection refused",
|
||
"connectionerror",
|
||
"timeout",
|
||
"timed out",
|
||
"gateway",
|
||
"service unavailable",
|
||
"failed to establish a new connection",
|
||
"max retries exceeded",
|
||
"read timeout",
|
||
"connect timeout",
|
||
]
|
||
return any(k in lower for k in keywords)
|
||
|
||
|
||
def parse_image_with_high_level_api(parser, image, prompt_mode, fitz_preprocess=False):
|
||
"""
|
||
Calls parser.parse_image with a PIL image (or accepts image path if parser expects path).
|
||
Returns dictionary with artifacts. Keeps a temp PNG of the input for traceability.
|
||
"""
|
||
temp_dir, session_id = create_temp_session_dir()
|
||
if not isinstance(image, Image.Image):
|
||
image = read_image_v2(image)
|
||
temp_image_path = os.path.join(temp_dir, f"input_{session_id}.png")
|
||
image.save(temp_image_path, "PNG")
|
||
|
||
filename = f"demo_{session_id}"
|
||
results = parser.parse_image(
|
||
input_path=image,
|
||
filename=filename,
|
||
prompt_mode=prompt_mode,
|
||
save_dir=temp_dir,
|
||
fitz_preprocess=fitz_preprocess,
|
||
)
|
||
if not results:
|
||
raise RuntimeError("No results returned from parser")
|
||
|
||
result = results[0]
|
||
layout_image = None
|
||
if result.get("layout_image_path") and os.path.exists(result["layout_image_path"]):
|
||
try:
|
||
layout_image = Image.open(result["layout_image_path"]).convert("RGB")
|
||
except Exception:
|
||
layout_image = None
|
||
|
||
cells_data = None
|
||
if result.get("layout_info_path") and os.path.exists(result["layout_info_path"]):
|
||
with open(result["layout_info_path"], "r", encoding="utf-8") as f:
|
||
cells_data = json.load(f)
|
||
|
||
md_content = None
|
||
if result.get("md_content_path") and os.path.exists(result["md_content_path"]):
|
||
with open(result["md_content_path"], "r", encoding="utf-8") as f:
|
||
md_content = f.read()
|
||
|
||
md_content_nohf = None
|
||
if result.get("md_content_nohf_path") and os.path.exists(
|
||
result["md_content_nohf_path"]
|
||
):
|
||
with open(result["md_content_nohf_path"], "r", encoding="utf-8") as f:
|
||
md_content_nohf = f.read()
|
||
|
||
json_code = ""
|
||
if cells_data is not None:
|
||
try:
|
||
json_code = json.dumps(cells_data, ensure_ascii=False, indent=2)
|
||
except Exception:
|
||
json_code = str(cells_data)
|
||
|
||
return {
|
||
"original_image": image,
|
||
"layout_image": layout_image,
|
||
"cells_data": cells_data,
|
||
"md_content": md_content,
|
||
"md_content_nohf": md_content_nohf,
|
||
"json_code": json_code,
|
||
"filtered": result.get("filtered", False),
|
||
"temp_dir": temp_dir,
|
||
"session_id": session_id,
|
||
"result_paths": result,
|
||
"input_width": result.get("input_width", 0),
|
||
"input_height": result.get("input_height", 0),
|
||
"input_temp_path": temp_image_path,
|
||
}
|
||
|
||
|
||
def _validate_pixels(min_p, max_p):
|
||
"""Coerce pixel parameters. Do NOT auto-swap; semantic errors are handled by pre-validation."""
|
||
try:
|
||
min_p = int(min_p)
|
||
except Exception:
|
||
min_p = DEFAULT_CONFIG["min_pixels"]
|
||
try:
|
||
max_p = int(max_p)
|
||
except Exception:
|
||
max_p = DEFAULT_CONFIG["max_pixels"]
|
||
if min_p <= 0:
|
||
min_p = DEFAULT_CONFIG["min_pixels"]
|
||
if max_p <= 0:
|
||
max_p = DEFAULT_CONFIG["max_pixels"]
|
||
return min_p, max_p
|
||
|
||
|
||
def _set_parser_config(server_ip, server_port, min_pixels, max_pixels):
|
||
min_pixels, max_pixels = _validate_pixels(min_pixels, max_pixels)
|
||
current_config.update(
|
||
{
|
||
"ip": server_ip,
|
||
"port_vllm": int(server_port),
|
||
"min_pixels": min_pixels,
|
||
"max_pixels": max_pixels,
|
||
}
|
||
)
|
||
dots_parser.ip = server_ip
|
||
dots_parser.port = int(server_port)
|
||
dots_parser.min_pixels = min_pixels
|
||
dots_parser.max_pixels = max_pixels
|
||
|
||
|
||
def purge_queue(rid):
|
||
"""Best-effort remove tasks matching rid from queue."""
|
||
pending = []
|
||
try:
|
||
while True:
|
||
task = TASK_QUEUE.get_nowait()
|
||
if task and isinstance(task, tuple):
|
||
if task[0] != rid:
|
||
pending.append(task)
|
||
TASK_QUEUE.task_done()
|
||
except queue.Empty:
|
||
pass
|
||
for t in pending:
|
||
TASK_QUEUE.put(t)
|
||
|
||
|
||
# ---------------- Export helpers ----------------
|
||
def export_one_rid(rid):
|
||
st = RESULTS_CACHE.get(rid)
|
||
if not st:
|
||
return None
|
||
temp_dir = st.get("temp_dir")
|
||
if not temp_dir or not os.path.isdir(temp_dir):
|
||
return None
|
||
out_dir, _sess = create_temp_session_dir()
|
||
zip_path = os.path.join(out_dir, f"export_{rid}.zip")
|
||
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
||
for rt, _, files in os.walk(temp_dir):
|
||
for f in files:
|
||
src = os.path.join(rt, f)
|
||
rel = os.path.relpath(src, temp_dir)
|
||
zf.write(src, os.path.join(f"result_{rid}", rel))
|
||
return zip_path
|
||
|
||
|
||
def ensure_export_ready(rid):
|
||
"""Create and cache export zip path if not present."""
|
||
st = RESULTS_CACHE.get(rid) or {}
|
||
if not st or st.get("status") != "done":
|
||
return None
|
||
path = st.get("export_path")
|
||
if path and os.path.exists(path):
|
||
return path
|
||
path = export_one_rid(rid)
|
||
if path:
|
||
st["export_path"] = path
|
||
RESULTS_CACHE[rid] = st
|
||
return path
|
||
|
||
|
||
# ---------------- Script API & execution ----------------
|
||
class ExportBuilder:
|
||
def __init__(self, name=None):
|
||
root, sid = create_temp_session_dir()
|
||
sub = f"script_export_{sid}"
|
||
if name:
|
||
sub = f"{name}_{sid}"
|
||
self.root_dir = os.path.join(root, sub)
|
||
os.makedirs(self.root_dir, exist_ok=True)
|
||
self._final_zip = None
|
||
|
||
def _abspath(self, rel_path: str):
|
||
rel_path = rel_path.lstrip("/\\")
|
||
return os.path.join(self.root_dir, rel_path)
|
||
|
||
def mkdir(self, rel_dir: str):
|
||
p = self._abspath(rel_dir)
|
||
os.makedirs(p, exist_ok=True)
|
||
return p
|
||
|
||
def add_text(self, rel_path: str, content: str, encoding: str = "utf-8"):
|
||
p = self._abspath(rel_path)
|
||
os.makedirs(os.path.dirname(p), exist_ok=True)
|
||
with open(p, "w", encoding=encoding) as f:
|
||
f.write("" if content is None else str(content))
|
||
return p
|
||
|
||
def add_bytes(self, rel_path: str, data: bytes):
|
||
p = self._abspath(rel_path)
|
||
os.makedirs(os.path.dirname(p), exist_ok=True)
|
||
with open(p, "wb") as f:
|
||
f.write(data or b"")
|
||
return p
|
||
|
||
def add_file(self, src_path: str, dest_rel_path: str = None):
|
||
if not src_path or not os.path.exists(src_path):
|
||
return None
|
||
dest_rel_path = dest_rel_path or os.path.basename(src_path)
|
||
p = self._abspath(dest_rel_path)
|
||
os.makedirs(os.path.dirname(p), exist_ok=True)
|
||
shutil.copy2(src_path, p)
|
||
return p
|
||
|
||
def finalize(self, zip_name: str = None):
|
||
if self._final_zip and os.path.exists(self._final_zip):
|
||
return self._final_zip
|
||
out_dir, sid = create_temp_session_dir()
|
||
zip_name = zip_name or f"script_export_{sid}.zip"
|
||
zip_path = os.path.join(out_dir, zip_name)
|
||
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
||
for rt, _, files in os.walk(self.root_dir):
|
||
for f in files:
|
||
src = os.path.join(rt, f)
|
||
rel = os.path.relpath(src, self.root_dir)
|
||
zf.write(src, rel)
|
||
self._final_zip = zip_path
|
||
return zip_path
|
||
|
||
|
||
class ScriptAPI:
|
||
def __init__(self, ids_snapshot):
|
||
self._ids = list(ids_snapshot or [])
|
||
|
||
def get_ids(self):
|
||
return list(self._ids)
|
||
|
||
def get_status(self, rid: str):
|
||
st = dict(RESULTS_CACHE.get(rid) or {})
|
||
ui = dict(st.get("ui") or {})
|
||
return {
|
||
"status": st.get("status", "pending"),
|
||
"ui": {
|
||
"tab": ui.get("tab", "md"),
|
||
"nohf": bool(ui.get("nohf", False)),
|
||
"source": ui.get("source", "源码"),
|
||
},
|
||
"filtered": bool(st.get("filtered", False)),
|
||
"input_width": int(st.get("input_width", 0) or 0),
|
||
"input_height": int(st.get("input_height", 0) or 0),
|
||
}
|
||
|
||
def get_texts(self, rid: str):
|
||
st = dict(RESULTS_CACHE.get(rid) or {})
|
||
edits = dict(st.get("edits") or {})
|
||
return {
|
||
"md": st.get("md_content") or "",
|
||
"md_nohf": st.get("md_content_nohf") or "",
|
||
"json": st.get("json_code") or "",
|
||
"md_edit": edits.get("md"),
|
||
"md_nohf_edit": edits.get("nohf"),
|
||
"json_edit": edits.get("json"),
|
||
}
|
||
|
||
def choose_texts(
|
||
self,
|
||
rid: str,
|
||
prefer_ui: bool = True,
|
||
prefer_edit: bool = True,
|
||
prefer_nohf: bool | None = None,
|
||
):
|
||
st = dict(RESULTS_CACHE.get(rid) or {})
|
||
ui = dict(st.get("ui") or {})
|
||
# UI 指示
|
||
ui_nohf = bool(ui.get("nohf", False))
|
||
ui_source_is_edit = str(ui.get("source", "源码")) == "编辑源码"
|
||
# 选择 nohf
|
||
use_nohf = ui_nohf if prefer_nohf is None else bool(prefer_nohf)
|
||
# 选择是否优先编辑
|
||
prefer_edit_final = bool(prefer_edit or (prefer_ui and ui_source_is_edit))
|
||
t = self.get_texts(rid)
|
||
# Markdown
|
||
md_orig = t["md_nohf"] if use_nohf else t["md"]
|
||
md_edit = t["md_nohf_edit"] if use_nohf else t["md_edit"]
|
||
md = (md_edit if (prefer_edit_final and md_edit is not None) else md_orig) or ""
|
||
# JSON
|
||
json_text = (
|
||
t["json_edit"]
|
||
if (prefer_edit_final and t.get("json_edit") is not None)
|
||
else t["json"]
|
||
) or ""
|
||
return {"md": md, "json": json_text}
|
||
|
||
def list_paths(self, rid: str):
|
||
st = dict(RESULTS_CACHE.get(rid) or {})
|
||
rp = dict(st.get("result_paths") or {})
|
||
md_p = rp.get("md_content_path")
|
||
nohf_p = rp.get("md_content_nohf_path")
|
||
json_p = rp.get("layout_info_path") or rp.get("json_path")
|
||
image_p = rp.get("layout_image_path") or None
|
||
# 编辑路径(若存在)
|
||
edited_md = None
|
||
edited_nohf = None
|
||
edited_json = None
|
||
try:
|
||
edited_md = _edited_filepath(st, "md")
|
||
if not os.path.exists(edited_md):
|
||
edited_md = None
|
||
except Exception:
|
||
edited_md = None
|
||
try:
|
||
edited_nohf = _edited_filepath(st, "nohf")
|
||
if not os.path.exists(edited_nohf):
|
||
edited_nohf = None
|
||
except Exception:
|
||
edited_nohf = None
|
||
try:
|
||
edited_json = _edited_filepath(st, "json")
|
||
if not os.path.exists(edited_json):
|
||
edited_json = None
|
||
except Exception:
|
||
edited_json = None
|
||
return {
|
||
"temp_dir": st.get("temp_dir"),
|
||
"session_id": st.get("session_id"),
|
||
"result": {
|
||
"md": md_p if (md_p and os.path.exists(md_p)) else None,
|
||
"md_nohf": nohf_p if (nohf_p and os.path.exists(nohf_p)) else None,
|
||
"json": json_p if (json_p and os.path.exists(json_p)) else None,
|
||
"layout": image_p if (image_p and os.path.exists(image_p)) else None,
|
||
"input_image": (
|
||
st.get("input_temp_path")
|
||
if (
|
||
st.get("input_temp_path")
|
||
and os.path.exists(st.get("input_temp_path"))
|
||
)
|
||
else None
|
||
),
|
||
},
|
||
"edited": {
|
||
"md": edited_md,
|
||
"md_nohf": edited_nohf,
|
||
"json": edited_json,
|
||
},
|
||
}
|
||
|
||
def path_exists(self, p: str) -> bool:
|
||
try:
|
||
return bool(p) and os.path.exists(p)
|
||
except Exception:
|
||
return False
|
||
|
||
def build_export(self, name: str | None = None):
|
||
return ExportBuilder(name=name)
|
||
|
||
|
||
def _safe_builtins():
|
||
base = (
|
||
__builtins__
|
||
if isinstance(__builtins__, dict)
|
||
else getattr(__builtins__, "__dict__", {})
|
||
)
|
||
allow = [
|
||
"abs",
|
||
"min",
|
||
"max",
|
||
"sum",
|
||
"len",
|
||
"range",
|
||
"enumerate",
|
||
"map",
|
||
"filter",
|
||
"zip",
|
||
"list",
|
||
"dict",
|
||
"set",
|
||
"tuple",
|
||
"str",
|
||
"int",
|
||
"float",
|
||
"bool",
|
||
"print",
|
||
"any",
|
||
"all",
|
||
"sorted",
|
||
]
|
||
return {k: base[k] for k in allow if k in base}
|
||
|
||
|
||
def run_user_script(script_code: str, ids_snapshot):
|
||
"""
|
||
非流式执行用户脚本,捕获标准输出并返回(zip_path, logs)。
|
||
"""
|
||
api = ScriptAPI(ids_snapshot)
|
||
ns = {
|
||
"__builtins__": _safe_builtins(),
|
||
"api": api,
|
||
"json": json,
|
||
"re": re,
|
||
"math": math,
|
||
"datetime": datetime,
|
||
"Path": Path,
|
||
"io": io,
|
||
"ExportBuilder": ExportBuilder,
|
||
}
|
||
import contextlib
|
||
from io import StringIO
|
||
|
||
buf = StringIO()
|
||
zip_path = None
|
||
try:
|
||
code = script_code or ""
|
||
with contextlib.redirect_stdout(buf):
|
||
exec(code, ns, ns)
|
||
result = None
|
||
main_fn = ns.get("main")
|
||
if callable(main_fn):
|
||
result = main_fn(api)
|
||
else:
|
||
result = ns.get("RESULT") or ns.get("OUTPUT_PATH")
|
||
if isinstance(result, ExportBuilder):
|
||
zip_path = result.finalize()
|
||
elif isinstance(result, str) and result:
|
||
if os.path.isdir(result):
|
||
eb = ExportBuilder("script_dir_export")
|
||
for rt, _, files in os.walk(result):
|
||
for f in files:
|
||
src = os.path.join(rt, f)
|
||
rel = os.path.relpath(src, result)
|
||
eb.add_file(src, rel)
|
||
zip_path = eb.finalize()
|
||
elif os.path.exists(result):
|
||
zip_path = result
|
||
if not zip_path:
|
||
exp = ns.get("export")
|
||
if isinstance(exp, ExportBuilder):
|
||
zip_path = exp.finalize()
|
||
except Exception as e:
|
||
err = f"[Script Error] {type(e).__name__}: {e}"
|
||
return None, (buf.getvalue() + "\n" + err)
|
||
return (
|
||
zip_path if (zip_path and os.path.exists(zip_path)) else None
|
||
), buf.getvalue()
|
||
|
||
|
||
def run_user_script_stream(script_code: str, ids_snapshot):
|
||
"""生成器:实时输出日志,并在结束时返回下载地址与完成状态。"""
|
||
# 日志队列
|
||
log_q = queue.Queue()
|
||
|
||
def _emit(kind, payload=None):
|
||
log_q.put((kind, payload))
|
||
|
||
def debug(*args, **kwargs):
|
||
text = " ".join(str(a) for a in args)
|
||
if text:
|
||
_emit("log", text)
|
||
|
||
# 准备脚本命名空间(与非流式版本一致,但覆盖 print/debug)
|
||
api = ScriptAPI(ids_snapshot)
|
||
ns = {
|
||
"__builtins__": _safe_builtins(),
|
||
"api": api,
|
||
"json": json,
|
||
"re": re,
|
||
"math": math,
|
||
"datetime": datetime,
|
||
"Path": Path,
|
||
"io": io,
|
||
"ExportBuilder": ExportBuilder,
|
||
# 专用日志函数
|
||
"debug": debug,
|
||
"print": debug,
|
||
}
|
||
|
||
result_holder = {"zip_path": None, "error": None}
|
||
|
||
def _worker():
|
||
try:
|
||
code = script_code or ""
|
||
exec(code, ns, ns)
|
||
res = None
|
||
main_fn = ns.get("main")
|
||
if callable(main_fn):
|
||
res = main_fn(api)
|
||
else:
|
||
res = ns.get("RESULT") or ns.get("OUTPUT_PATH")
|
||
zip_path = None
|
||
if isinstance(res, ExportBuilder):
|
||
zip_path = res.finalize()
|
||
elif isinstance(res, str) and res:
|
||
if os.path.isdir(res):
|
||
eb = ExportBuilder("script_dir_export")
|
||
for rt, _, files in os.walk(res):
|
||
for f in files:
|
||
src = os.path.join(rt, f)
|
||
rel = os.path.relpath(src, res)
|
||
eb.add_file(src, rel)
|
||
zip_path = eb.finalize()
|
||
elif os.path.exists(res):
|
||
zip_path = res
|
||
if not zip_path:
|
||
exp = ns.get("export")
|
||
if isinstance(exp, ExportBuilder):
|
||
zip_path = exp.finalize()
|
||
result_holder["zip_path"] = (
|
||
zip_path if (zip_path and os.path.exists(zip_path)) else None
|
||
)
|
||
except Exception as e:
|
||
result_holder["error"] = f"[Script Error] {type(e).__name__}: {e}"
|
||
finally:
|
||
_emit("done", None)
|
||
|
||
# 启动脚本线程
|
||
t = threading.Thread(target=_worker, daemon=True)
|
||
t.start()
|
||
|
||
# 初始状态显示
|
||
spinner_html = (
|
||
"<div style='display:flex;align-items:center;gap:8px;'>"
|
||
"<svg width='18' height='18' viewBox='0 0 50 50' style='animation:spin 1s linear infinite'>"
|
||
"<circle cx='25' cy='25' r='20' stroke='#FF576D' stroke-width='4' fill='none' stroke-linecap='round' "
|
||
"stroke-dasharray='31.4 31.4'>" # dash pattern for arc
|
||
"</circle></svg>"
|
||
"<style>@keyframes spin{0%{transform:rotate(0deg)}100%{transform:rotate(360deg)}}</style>"
|
||
"<span>脚本运行中…</span></div>"
|
||
)
|
||
log_buf_lines = []
|
||
# 初始仅显示运行中动画,日志区域留空
|
||
yield None, spinner_html, ""
|
||
|
||
# 实时拉取日志并渲染
|
||
while True:
|
||
try:
|
||
kind, payload = log_q.get(timeout=0.2)
|
||
except queue.Empty:
|
||
if not t.is_alive():
|
||
# 线程已结束但没有新的事件,跳到收尾
|
||
break
|
||
else:
|
||
continue
|
||
|
||
if kind == "log":
|
||
# 追加日志并推送更新
|
||
if isinstance(payload, str):
|
||
for line in payload.splitlines() or [payload]:
|
||
if line.strip() == "":
|
||
continue
|
||
log_buf_lines.append(line)
|
||
yield None, spinner_html, "```\n" + "\n".join(
|
||
log_buf_lines[-200:]
|
||
) + "\n```" # 限制最后200行
|
||
elif kind == "done":
|
||
break
|
||
|
||
# 收尾:根据结果/错误输出最终状态
|
||
if result_holder.get("error"):
|
||
log_buf_lines.append(result_holder["error"])
|
||
status_html = (
|
||
"<div style='display:flex;align-items:center;gap:8px;color:#fca5a5'>"
|
||
"<span>❌ 脚本执行失败</span></div>"
|
||
)
|
||
yield None, status_html, "```\n" + "\n".join(log_buf_lines[-500:]) + "\n```"
|
||
else:
|
||
status_html = (
|
||
"<div style='display:flex;align-items:center;gap:8px;color:#86efac'>"
|
||
"<span>✅ 脚本执行完成</span></div>"
|
||
)
|
||
if result_holder.get("zip_path"):
|
||
yield result_holder["zip_path"], status_html, "```\n" + "\n".join(
|
||
log_buf_lines[-500:]
|
||
) + "\n```"
|
||
else:
|
||
log_buf_lines.append(
|
||
"(无可下载文件返回,若需导出请返回 ExportBuilder 或目录/文件路径)"
|
||
)
|
||
yield None, status_html, "```\n" + "\n".join(log_buf_lines[-500:]) + "\n```"
|
||
"""
|
||
执行用户脚本,返回 (zip_path or None, log_text)
|
||
"""
|
||
api = ScriptAPI(ids_snapshot)
|
||
ns = {
|
||
"__builtins__": _safe_builtins(),
|
||
"api": api,
|
||
# 常用库(只读注入)
|
||
"json": json,
|
||
"re": re,
|
||
"math": math,
|
||
"datetime": datetime,
|
||
"Path": Path,
|
||
"io": io,
|
||
# 导出构建器类型(如需构造)
|
||
"ExportBuilder": ExportBuilder,
|
||
}
|
||
import contextlib
|
||
from io import StringIO
|
||
|
||
buf = StringIO()
|
||
zip_path = None
|
||
try:
|
||
code = script_code or ""
|
||
with contextlib.redirect_stdout(buf):
|
||
exec(code, ns, ns)
|
||
result = None
|
||
main_fn = ns.get("main")
|
||
if callable(main_fn):
|
||
result = main_fn(api)
|
||
else:
|
||
result = ns.get("RESULT") or ns.get("OUTPUT_PATH")
|
||
# 结果归档处理
|
||
if isinstance(result, ExportBuilder):
|
||
zip_path = result.finalize()
|
||
elif isinstance(result, str) and result:
|
||
if os.path.isdir(result):
|
||
eb = ExportBuilder("script_dir_export")
|
||
for rt, _, files in os.walk(result):
|
||
for f in files:
|
||
src = os.path.join(rt, f)
|
||
rel = os.path.relpath(src, result)
|
||
eb.add_file(src, rel)
|
||
zip_path = eb.finalize()
|
||
elif os.path.exists(result):
|
||
zip_path = result
|
||
if not zip_path:
|
||
exp = ns.get("export")
|
||
if isinstance(exp, ExportBuilder):
|
||
zip_path = exp.finalize()
|
||
except Exception as e:
|
||
err = f"[Script Error] {type(e).__name__}: {e}"
|
||
return None, (buf.getvalue() + "\n" + err)
|
||
return (
|
||
zip_path if (zip_path and os.path.exists(zip_path)) else None
|
||
), buf.getvalue()
|
||
|
||
|
||
def export_selected_rids(ids, selected_labels):
|
||
"""
|
||
Build a combined zip for multiple selected results based on their current images (no reupload).
|
||
Only includes items with status == 'done'.
|
||
"""
|
||
if not ids or not selected_labels:
|
||
return None
|
||
# Map labels "Result N" -> indices
|
||
sel_indices = []
|
||
for label in selected_labels:
|
||
try:
|
||
idx = int(str(label).split()[-1]) - 1
|
||
if 0 <= idx < len(ids):
|
||
sel_indices.append(idx)
|
||
except Exception:
|
||
continue
|
||
if not sel_indices:
|
||
return None
|
||
|
||
out_dir, session_id = create_temp_session_dir()
|
||
zip_path = os.path.join(out_dir, f"export_selected_{session_id}.zip")
|
||
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
||
for i in sel_indices:
|
||
rid = ids[i]
|
||
st = RESULTS_CACHE.get(rid) or {}
|
||
if st.get("status") != "done":
|
||
continue
|
||
temp_dir = st.get("temp_dir")
|
||
if not temp_dir or not os.path.isdir(temp_dir):
|
||
# fallback: ensure individual export then include that zip
|
||
single_zip = ensure_export_ready(rid)
|
||
if single_zip and os.path.exists(single_zip):
|
||
zf.write(single_zip, os.path.join(f"result_{i+1}_{rid}.zip"))
|
||
continue
|
||
base_dir = f"result_{i+1}_{rid}"
|
||
for rt, _, files in os.walk(temp_dir):
|
||
for f in files:
|
||
src = os.path.join(rt, f)
|
||
rel = os.path.relpath(src, temp_dir)
|
||
zf.write(src, os.path.join(base_dir, rel))
|
||
return zip_path if os.path.exists(zip_path) else None
|
||
|
||
|
||
# --------- Edited sources helpers ----------
|
||
def _get_base_name_from_result(st: dict):
|
||
"""Infer base filename like 'demo_xxx' from result paths or session id."""
|
||
rp = st.get("result_paths") or {}
|
||
for key in ("md_content_path", "md_content_nohf_path", "layout_info_path"):
|
||
p = rp.get(key)
|
||
if p and isinstance(p, str):
|
||
base = os.path.splitext(os.path.basename(p))[0]
|
||
if key == "md_content_nohf_path" and base.endswith("_nohf"):
|
||
base = base[: -len("_nohf")]
|
||
return base
|
||
sid = st.get("session_id")
|
||
if sid:
|
||
return f"demo_{sid}"
|
||
return f"demo_{uuid.uuid4().hex[:8]}"
|
||
|
||
|
||
def _edited_dir_for(st: dict):
|
||
temp_dir = st.get("temp_dir")
|
||
if not temp_dir:
|
||
temp_dir, _ = create_temp_session_dir()
|
||
st["temp_dir"] = temp_dir
|
||
d = os.path.join(temp_dir, "edited")
|
||
os.makedirs(d, exist_ok=True)
|
||
return d
|
||
|
||
|
||
def _edited_filepath(st: dict, which: str):
|
||
"""
|
||
which in {'md','nohf','json'}
|
||
"""
|
||
base = _get_base_name_from_result(st)
|
||
if which == "md":
|
||
name = f"{base}.md"
|
||
elif which == "nohf":
|
||
name = f"{base}_nohf.md"
|
||
elif which == "json":
|
||
name = f"{base}.json"
|
||
else:
|
||
raise ValueError(f"unknown edited type: {which}")
|
||
return os.path.join(_edited_dir_for(st), name)
|
||
|
||
|
||
def _save_edited_to_disk(st: dict, which: str, content: str):
|
||
path = _edited_filepath(st, which)
|
||
with open(path, "w", encoding="utf-8") as f:
|
||
f.write(content if content is not None else "")
|
||
return path
|
||
|
||
|
||
def _delete_edited_from_disk(st: dict, which: str):
|
||
try:
|
||
path = _edited_filepath(st, which)
|
||
if os.path.exists(path):
|
||
os.remove(path)
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def _invalidate_export_zip(rid: str):
|
||
st = RESULTS_CACHE.get(rid) or {}
|
||
old = st.get("export_path")
|
||
if old and isinstance(old, str) and os.path.exists(old):
|
||
try:
|
||
os.remove(old)
|
||
except Exception:
|
||
pass
|
||
if "export_path" in st:
|
||
st["export_path"] = None
|
||
RESULTS_CACHE[rid] = st
|
||
|
||
|
||
# ---------------- UI state helpers (per-card) ----------------
|
||
def _default_ui_state():
|
||
# 增加 source: '源码' 或 '编辑源码'
|
||
return {"preview": True, "nohf": False, "tab": "md", "source": "源码"}
|
||
|
||
|
||
def _ensure_ui_state(rid):
|
||
st = RESULTS_CACHE.get(rid) or {}
|
||
ui = st.get("ui")
|
||
if not isinstance(ui, dict):
|
||
ui = _default_ui_state()
|
||
st["ui"] = ui
|
||
RESULTS_CACHE[rid] = st
|
||
else:
|
||
# 兼容旧状态缺少新字段
|
||
if "source" not in ui:
|
||
ui["source"] = "源码"
|
||
if "tab" not in ui:
|
||
ui["tab"] = "md"
|
||
if "preview" not in ui:
|
||
ui["preview"] = True
|
||
if "nohf" not in ui:
|
||
ui["nohf"] = False
|
||
RESULTS_CACHE[rid] = st
|
||
return ui
|
||
|
||
|
||
# ---------------- Background worker ----------------
|
||
def background_processor():
|
||
while True:
|
||
try:
|
||
task = TASK_QUEUE.get(timeout=1)
|
||
except queue.Empty:
|
||
continue
|
||
if task is None:
|
||
# Important: mark done for sentinel to keep queue counters balanced
|
||
try:
|
||
TASK_QUEUE.task_done()
|
||
finally:
|
||
pass
|
||
break
|
||
rid, filepath, prompt_mode, server_ip, server_port, min_p, max_p, fitz_flag = (
|
||
task
|
||
)
|
||
image = None
|
||
try:
|
||
# Build parser instance for this task
|
||
local_parser = DotsOCRParser(
|
||
ip=server_ip,
|
||
port=int(server_port),
|
||
dpi=200,
|
||
min_pixels=min_p,
|
||
max_pixels=max_p,
|
||
)
|
||
|
||
# Read image
|
||
try:
|
||
fp_lower = str(filepath).lower() if isinstance(filepath, str) else ""
|
||
if fitz_flag or fp_lower.endswith(".pdf"):
|
||
try:
|
||
import fitz as _fitz
|
||
|
||
doc = _fitz.open(filepath)
|
||
page = doc.load_page(0)
|
||
pix = page.get_pixmap()
|
||
mode = "RGBA" if pix.alpha else "RGB"
|
||
image = Image.frombytes(
|
||
mode, (pix.width, pix.height), pix.samples
|
||
)
|
||
doc.close()
|
||
except Exception:
|
||
image = read_image_v2(filepath)
|
||
else:
|
||
image = read_image_v2(filepath)
|
||
except Exception as e:
|
||
raise RuntimeError(f"Failed to read input {filepath}: {e}")
|
||
|
||
# Parse
|
||
result = parse_image_with_high_level_api(
|
||
local_parser, image, prompt_mode, fitz_preprocess=fitz_flag
|
||
)
|
||
result["status"] = "done"
|
||
|
||
# Preserve source/input path but prefer prev.source_path if available
|
||
prev = RESULTS_CACHE.get(rid) or {}
|
||
|
||
# Preserve UI state across re-parses/results
|
||
prev_ui = prev.get("ui") if isinstance(prev, dict) else None
|
||
result["ui"] = prev_ui if isinstance(prev_ui, dict) else _default_ui_state()
|
||
|
||
if isinstance(prev, dict) and isinstance(prev.get("edits"), dict):
|
||
result["edits"] = dict(prev.get("edits"))
|
||
|
||
if isinstance(prev, dict) and prev.get("source_path"):
|
||
result["source_path"] = prev.get("source_path")
|
||
else:
|
||
if isinstance(filepath, str) and os.path.exists(filepath):
|
||
result["source_path"] = filepath
|
||
else:
|
||
result["source_path"] = result.get("input_temp_path")
|
||
|
||
if isinstance(prev, dict) and prev.get("input_path"):
|
||
result["input_path"] = prev.get("input_path")
|
||
|
||
# Commit result
|
||
RESULTS_CACHE[rid] = result
|
||
|
||
# Pre-build export zip for first-click download
|
||
try:
|
||
zip_path = ensure_export_ready(rid)
|
||
if zip_path:
|
||
result = RESULTS_CACHE.get(rid, result)
|
||
result["export_path"] = zip_path
|
||
RESULTS_CACHE[rid] = result
|
||
except Exception:
|
||
pass
|
||
|
||
except Exception as e:
|
||
# Auto-retry for transient backend errors (e.g., server down temporarily)
|
||
if _is_transient_backend_error(e):
|
||
attempts = RETRY_COUNTS.get(rid, 0)
|
||
if attempts < MAX_AUTO_RETRIES:
|
||
RETRY_COUNTS[rid] = attempts + 1
|
||
delay = min(10.0, (RETRY_BACKOFF_BASE**attempts))
|
||
# keep state pending, annotate attempts
|
||
prev = RESULTS_CACHE.get(rid, {}) or {}
|
||
pend_state = dict(prev)
|
||
pend_state.update(
|
||
{
|
||
"status": "pending",
|
||
"retry_attempts": attempts + 1,
|
||
}
|
||
)
|
||
RESULTS_CACHE[rid] = pend_state
|
||
|
||
# Re-enqueue after delay on a timer to avoid blocking worker
|
||
def _requeue_later():
|
||
TASK_QUEUE.put(
|
||
(
|
||
rid,
|
||
filepath,
|
||
prompt_mode,
|
||
server_ip,
|
||
int(server_port),
|
||
min_p,
|
||
max_p,
|
||
fitz_flag,
|
||
)
|
||
)
|
||
|
||
threading.Timer(delay, _requeue_later).start()
|
||
# Do not mark error; move on
|
||
continue
|
||
|
||
# Build a rich error state that preserves re-parse materials
|
||
prev = RESULTS_CACHE.get(rid, {}) or {}
|
||
err_state = dict(prev) # preserve input_path etc.
|
||
err_state["status"] = "error"
|
||
err_state["md_content"] = classify_parse_failure(e, min_p, max_p)
|
||
|
||
# Save a temporary PNG for re-parse if we have an image in memory
|
||
if isinstance(image, Image.Image):
|
||
try:
|
||
tmp_dir, _sid = create_temp_session_dir()
|
||
tmp_path = os.path.join(tmp_dir, f"error_input_{rid}.png")
|
||
image.save(tmp_path, "PNG")
|
||
err_state["original_image"] = image
|
||
err_state["input_temp_path"] = tmp_path
|
||
err_state["temp_dir"] = tmp_dir
|
||
except Exception:
|
||
err_state["original_image"] = image
|
||
if isinstance(filepath, str) and filepath:
|
||
err_state.setdefault("source_path", filepath)
|
||
|
||
# Preserve UI state if missing
|
||
if not isinstance(err_state.get("ui"), dict):
|
||
err_state["ui"] = _default_ui_state()
|
||
|
||
RESULTS_CACHE[rid] = err_state
|
||
finally:
|
||
# Mark the non-sentinel task as done
|
||
try:
|
||
# If previous branch already marked sentinel done, skip double mark
|
||
if task is not None:
|
||
TASK_QUEUE.task_done()
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def _stop_all_workers():
|
||
"""Stop all worker threads gracefully by sending sentinels and joining."""
|
||
global WORKER_THREADS
|
||
with THREAD_LOCK:
|
||
n = len(WORKER_THREADS)
|
||
if n == 0:
|
||
return
|
||
# Send one sentinel per worker
|
||
for _ in range(n):
|
||
TASK_QUEUE.put(None)
|
||
# Join all workers
|
||
for t in WORKER_THREADS:
|
||
try:
|
||
t.join(timeout=5.0)
|
||
except Exception:
|
||
pass
|
||
WORKER_THREADS = []
|
||
|
||
|
||
def _start_workers(count: int):
|
||
"""Start exactly `count` worker threads if not already running."""
|
||
global WORKER_THREADS
|
||
with THREAD_LOCK:
|
||
running = len(WORKER_THREADS)
|
||
need = max(0, int(count) - running)
|
||
for _ in range(need):
|
||
t = threading.Thread(target=background_processor, daemon=True)
|
||
t.start()
|
||
WORKER_THREADS.append(t)
|
||
|
||
|
||
def start_background_processor():
|
||
"""Ensure at least one worker is running (used by legacy calls)."""
|
||
_start_workers(max(1, MAX_CONCURRENCY))
|
||
|
||
|
||
def set_max_concurrency(n: int):
|
||
"""Restart worker pool to match desired concurrency."""
|
||
global MAX_CONCURRENCY
|
||
n = int(n) if isinstance(n, (int, float)) else 1
|
||
if n <= 0:
|
||
n = 1
|
||
MAX_CONCURRENCY = n
|
||
# Restart workers to apply new concurrency
|
||
_stop_all_workers()
|
||
_start_workers(MAX_CONCURRENCY)
|
||
|
||
|
||
# ---------------- Queueing / task helpers ----------------
|
||
def _pixel_reasons(min_p, max_p):
|
||
reasons = []
|
||
if min_p < ABS_MIN_PIXELS:
|
||
reasons.append(f"Min Pixels 过小:{min_p},必须 >= {ABS_MIN_PIXELS}。")
|
||
if max_p > ABS_MAX_PIXELS:
|
||
reasons.append(f"Max Pixels 过大:{max_p},必须 <= {ABS_MAX_PIXELS}。")
|
||
if min_p >= max_p:
|
||
reasons.append(
|
||
f"像素参数不合法:Min Pixels({min_p}) >= Max Pixels({max_p}),必须满足 Min Pixels < Max Pixels。"
|
||
)
|
||
return reasons
|
||
|
||
|
||
def add_tasks_to_queue(
|
||
file_list, prompt_mode, server_ip, server_port, min_p, max_p, fitz, cur_ids
|
||
):
|
||
"""Queue uploaded file paths (expects file_list of local file paths or tuples (parse_path, source_path))."""
|
||
if not file_list:
|
||
return cur_ids, "No images uploaded."
|
||
|
||
min_p, max_p = _validate_pixels(min_p, max_p)
|
||
start_background_processor()
|
||
|
||
ids = list(cur_ids or [])
|
||
skipped = 0
|
||
queued = 0
|
||
|
||
for fp in file_list:
|
||
# Normalize: support tuple (parse_path, source_path)
|
||
parse_fp = None
|
||
source_fp = None
|
||
if isinstance(fp, (list, tuple)) and len(fp) >= 1:
|
||
parse_fp = fp[0]
|
||
# If tuple contains original source as second element, use it
|
||
source_fp = fp[1] if len(fp) >= 2 else fp[0]
|
||
else:
|
||
parse_fp = fp
|
||
source_fp = fp
|
||
|
||
if isinstance(parse_fp, (list, tuple)):
|
||
parse_fp = parse_fp[0] if len(parse_fp) > 0 else None
|
||
|
||
rid = uuid.uuid4().hex[:8]
|
||
ids.append(rid)
|
||
|
||
# placeholder with input_path so re-parse works even before parse
|
||
RESULTS_CACHE[rid] = {
|
||
"status": "pending",
|
||
"input_path": parse_fp,
|
||
"source_path": source_fp,
|
||
"ui": _default_ui_state(), # 初始化每项的独立 UI 状态
|
||
}
|
||
|
||
reason = _pixel_reasons(min_p, max_p)
|
||
if reason:
|
||
RESULTS_CACHE[rid] = {
|
||
"status": "error",
|
||
"md_content": "参数越界,未开始解析:\n"
|
||
+ "\n".join(f"- {r}" for r in reason)
|
||
+ f"\n(当前参数:min_pixels={min_p}, max_pixels={max_p})",
|
||
"input_path": parse_fp,
|
||
"source_path": source_fp,
|
||
"ui": _default_ui_state(),
|
||
}
|
||
skipped += 1
|
||
continue
|
||
|
||
TASK_QUEUE.put(
|
||
(
|
||
rid,
|
||
parse_fp,
|
||
prompt_mode,
|
||
server_ip,
|
||
int(server_port),
|
||
min_p,
|
||
max_p,
|
||
fitz,
|
||
)
|
||
)
|
||
queued += 1
|
||
|
||
info = f"Queued {queued} item(s)."
|
||
if skipped:
|
||
info += f" Skipped {skipped} due to invalid pixel limits."
|
||
return ids, info
|
||
|
||
|
||
def enqueue_single_reparse(
|
||
rid, reupload_path, prompt_mode, server_ip, server_port, min_p, max_p, fitz
|
||
):
|
||
"""
|
||
Enqueue a reparse for single result id.
|
||
Path selection priority:
|
||
reupload_path -> result.source_path -> result.input_temp_path -> result.input_path -> result.original_image (dump to temp PNG)
|
||
"""
|
||
min_p, max_p = _validate_pixels(min_p, max_p)
|
||
start_background_processor()
|
||
st = RESULTS_CACHE.get(rid, {}) or {}
|
||
|
||
# Pixel constraints: if invalid, set error state and return (do not enqueue)
|
||
reason = _pixel_reasons(min_p, max_p)
|
||
if reason:
|
||
new_state = st.copy()
|
||
new_state.update(
|
||
{
|
||
"status": "error",
|
||
"md_content": "参数越界,未开始解析:\n"
|
||
+ "\n".join(f"- {r}" for r in reason)
|
||
+ f"\n(当前参数:min_pixels={min_p}, max_pixels={max_p})",
|
||
}
|
||
)
|
||
# 保留 UI 状态
|
||
if "ui" not in new_state:
|
||
new_state["ui"] = _default_ui_state()
|
||
RESULTS_CACHE[rid] = new_state
|
||
return
|
||
|
||
if isinstance(reupload_path, (tuple, list)):
|
||
reupload_path = reupload_path[0] if len(reupload_path) > 0 else None
|
||
|
||
filepath = None
|
||
if reupload_path:
|
||
filepath = reupload_path
|
||
elif st.get("source_path"):
|
||
filepath = st.get("source_path")
|
||
elif st.get("input_temp_path"):
|
||
filepath = st.get("input_temp_path")
|
||
elif st.get("input_path"):
|
||
filepath = st.get("input_path")
|
||
else:
|
||
img = st.get("original_image")
|
||
if isinstance(img, Image.Image):
|
||
tmp_dir, _ = create_temp_session_dir()
|
||
tmp_path = os.path.join(tmp_dir, f"reparse_{rid}.png")
|
||
try:
|
||
img.save(tmp_path, "PNG")
|
||
filepath = tmp_path
|
||
except Exception:
|
||
filepath = None
|
||
|
||
if not filepath:
|
||
new_state = st.copy()
|
||
new_state.update(
|
||
{
|
||
"status": "error",
|
||
"md_content": "重解析失败:未找到可用的图片来源。请重新上传图片或检查缓存目录。",
|
||
}
|
||
)
|
||
if "ui" not in new_state:
|
||
new_state["ui"] = _default_ui_state()
|
||
RESULTS_CACHE[rid] = new_state
|
||
return
|
||
|
||
new_state = st.copy()
|
||
new_state.update(
|
||
{
|
||
"status": "pending",
|
||
"input_path": filepath,
|
||
"last_used_config": {
|
||
"ip": server_ip,
|
||
"port": int(server_port),
|
||
"min_pixels": min_p,
|
||
"max_pixels": max_p,
|
||
"prompt_mode": prompt_mode,
|
||
},
|
||
}
|
||
)
|
||
# 保留 UI 状态
|
||
if "ui" not in new_state:
|
||
new_state["ui"] = _default_ui_state()
|
||
RESULTS_CACHE[rid] = new_state
|
||
TASK_QUEUE.put(
|
||
(rid, filepath, prompt_mode, server_ip, int(server_port), min_p, max_p, fitz)
|
||
)
|
||
|
||
|
||
def delete_one(ids, rid, tick):
|
||
new_ids = [x for x in (ids or []) if x != rid]
|
||
st = RESULTS_CACHE.get(rid)
|
||
temp_dir = st.get("temp_dir") if st else None
|
||
if rid in RESULTS_CACHE:
|
||
del RESULTS_CACHE[rid]
|
||
if rid in RETRY_COUNTS:
|
||
del RETRY_COUNTS[rid]
|
||
purge_queue(rid)
|
||
if temp_dir and os.path.exists(temp_dir):
|
||
threading.Thread(
|
||
target=lambda: shutil.rmtree(temp_dir, ignore_errors=True), daemon=True
|
||
).start()
|
||
return new_ids, int(tick or 0) + 1
|
||
|
||
|
||
# ---------------- Gradio UI ----------------
|
||
def create_gradio_interface():
|
||
css = """
|
||
/* basic theme */
|
||
:root { --bg:#0b1220; --card:#111827; --muted:#9ca3af; --accent:#FF576D; --text:#e5e7eb; }
|
||
body, .gradio-container { background: var(--bg) !important; color: var(--text) !important; }
|
||
.result-card { background: var(--card); border:1px solid #1f2937; border-radius:8px; padding:10px; margin-bottom:12px; }
|
||
.muted { color: var(--muted); font-size:0.9em; }
|
||
|
||
/* skeleton shimmer */
|
||
.skeleton { position:relative; overflow:hidden; background:#0f172a; border-radius:6px; }
|
||
.skeleton::after {
|
||
content:""; position:absolute; inset:0; transform:translateX(-100%);
|
||
background:linear-gradient(90deg, rgba(255,255,255,0), rgba(255,255,255,0.06), rgba(255,255,255,0));
|
||
animation:shimmer 1.2s infinite;
|
||
}
|
||
@keyframes shimmer { 100% { transform:translateX(100%);} }
|
||
|
||
/* Hide unwanted footer/buttons (robust selectors) */
|
||
footer, .footer, #footer, footer[role="contentinfo"] { display:none !important; }
|
||
[aria-label="Use via API"], [aria-label*="API"], [title*="API"], a[href*="/api"], a[href*="api_docs"], a[href*="gradio.app"] { display:none !important; }
|
||
button[aria-label="Settings"], button[aria-label*="设置"], [aria-label="Built with Gradio"] { display:none !important; }
|
||
|
||
/* Script log area: single inner scrollbar on <pre>, outer container hidden overflow */
|
||
.script-log { max-height: 260px; overflow: hidden; border:1px solid #1f2937; border-radius:6px; padding:0; }
|
||
.script-log pre {
|
||
max-height: 260px;
|
||
overflow: auto;
|
||
margin: 0;
|
||
padding: 6px;
|
||
background: transparent;
|
||
scrollbar-width: thin; /* Firefox */
|
||
scrollbar-color: rgba(255,255,255,0.2) transparent;
|
||
}
|
||
.script-log pre::-webkit-scrollbar { width: 6px; height: 6px; }
|
||
.script-log pre::-webkit-scrollbar-track { background: transparent; }
|
||
.script-log pre::-webkit-scrollbar-thumb { background: rgba(255,255,255,0.12); border-radius: 4px; }
|
||
.script-log pre:hover::-webkit-scrollbar-thumb { background: rgba(255,255,255,0.25); }
|
||
"""
|
||
|
||
with gr.Blocks(css=css, title="dots.ocr") as demo:
|
||
# Left column controls
|
||
with gr.Row():
|
||
with gr.Column(scale=1):
|
||
file_input = gr.File(
|
||
label="Upload Multiple Images",
|
||
type="filepath",
|
||
file_count="multiple",
|
||
file_types=[".jpg", ".jpeg", ".png", ".pdf"],
|
||
)
|
||
# Filter out the unwanted 'prompt_grounding_ocr' mode
|
||
allowed_modes = [
|
||
m
|
||
for m in dict_promptmode_to_prompt.keys()
|
||
if m != "prompt_grounding_ocr"
|
||
]
|
||
if not allowed_modes:
|
||
allowed_modes = list(dict_promptmode_to_prompt.keys())
|
||
prompt_mode = gr.Dropdown(
|
||
label="Prompt Mode",
|
||
choices=allowed_modes,
|
||
value=allowed_modes[0],
|
||
)
|
||
prompt_display = gr.Textbox(
|
||
label="Prompt Preview",
|
||
value=dict_promptmode_to_prompt[allowed_modes[0]],
|
||
interactive=False,
|
||
lines=4,
|
||
)
|
||
|
||
with gr.Row():
|
||
parse_btn = gr.Button("🔍 Parse", variant="primary")
|
||
clear_btn = gr.Button("🗑️ Clear")
|
||
|
||
with gr.Accordion("Advanced Config", open=False):
|
||
fitz_preprocess = gr.Checkbox(label="fitz_preprocess", value=True)
|
||
server_ip = gr.Textbox(
|
||
label="Server IP", value=DEFAULT_CONFIG["ip"]
|
||
)
|
||
server_port = gr.Number(
|
||
label="Port", value=DEFAULT_CONFIG["port_vllm"], precision=0
|
||
)
|
||
min_pixels = gr.Number(
|
||
label="Min Pixels", value=DEFAULT_CONFIG["min_pixels"]
|
||
)
|
||
max_pixels = gr.Number(
|
||
label="Max Pixels", value=DEFAULT_CONFIG["max_pixels"]
|
||
)
|
||
concurrency = gr.Number(
|
||
label="Max Concurrency",
|
||
value=MAX_CONCURRENCY, # 与实际生效的后台并发保持一致(支持刷新后保持)
|
||
precision=0,
|
||
interactive=True,
|
||
)
|
||
confirm_delete = gr.Checkbox(
|
||
label="删除前确认(推荐)", value=True, interactive=True
|
||
)
|
||
|
||
# Right column: results & actions
|
||
with gr.Column(scale=5):
|
||
info_display = gr.Markdown("Waiting...", elem_id="info_box")
|
||
ids_state = gr.State(value=[])
|
||
store_tick = gr.State(value=0)
|
||
render_bump = gr.State(value=0) # 仅用于在状态变化时触发结果重渲染
|
||
confirm_delete_state = gr.State(value=True)
|
||
confirm_delete.change(
|
||
lambda v: v, inputs=[confirm_delete], outputs=[confirm_delete_state]
|
||
)
|
||
|
||
progress_timer = gr.Timer(1.0)
|
||
|
||
# Actions 面板(多选)
|
||
with gr.Accordion("Actions", open=False):
|
||
selected_group = gr.CheckboxGroup(
|
||
label="Select Items", choices=[], value=[], interactive=True
|
||
)
|
||
with gr.Row():
|
||
select_all_btn = gr.Button("全选")
|
||
clear_sel_btn = gr.Button("清空选择")
|
||
with gr.Row():
|
||
bulk_reparse_btn = gr.Button("🔁 重解析所选")
|
||
delete_selected_btn = gr.Button("🗑️ 删除所选", variant="stop")
|
||
export_selected_btn = gr.DownloadButton("📦 导出所选")
|
||
# 高级脚本导出
|
||
with gr.Accordion("高级脚本", open=False):
|
||
gr.Markdown(
|
||
"在下方编辑并运行自定义 Python 脚本以自由处理当前解析结果并导出为任意目录/文件结构。"
|
||
"<br/>脚本将在受限环境中执行,可通过 api 对象访问只读数据与构建导出压缩包。",
|
||
elem_classes=["muted"],
|
||
)
|
||
script_code = gr.Code(
|
||
label="Python 脚本",
|
||
language="python",
|
||
value=DEFAULT_SCRIPT_TEMPLATE,
|
||
lines=24,
|
||
interactive=True,
|
||
)
|
||
with gr.Row():
|
||
run_script_btn = gr.Button("▶ 运行脚本", variant="primary")
|
||
script_download_btn = gr.DownloadButton("📦 下载脚本输出")
|
||
script_status = gr.HTML("")
|
||
script_log = gr.Markdown(
|
||
"", elem_id="script_log", elem_classes=["script-log"]
|
||
)
|
||
|
||
# 流式执行脚本:实时打印日志与运行状态,并在完成后绑定下载按钮
|
||
run_script_btn.click(
|
||
run_user_script_stream,
|
||
inputs=[script_code, ids_state],
|
||
outputs=[script_download_btn, script_status, script_log],
|
||
show_progress="hidden",
|
||
)
|
||
# 批量删除确认面板
|
||
with gr.Row(visible=False) as bulk_delete_confirm_panel:
|
||
gr.Markdown(
|
||
"确认删除所选结果?该操作不可恢复。",
|
||
elem_classes=["muted"],
|
||
)
|
||
bulk_confirm_delete_btn = gr.Button("确认删除", variant="stop")
|
||
bulk_cancel_delete_btn = gr.Button("取消")
|
||
|
||
# Render results dynamically
|
||
@gr.render(inputs=[ids_state, render_bump])
|
||
def render_results(ids, _bump):
|
||
if not ids:
|
||
return gr.Markdown("No results yet.")
|
||
with gr.Column():
|
||
for idx, rid in enumerate(ids):
|
||
data = RESULTS_CACHE.get(rid, {}) or {}
|
||
status = data.get("status", "pending")
|
||
|
||
# 确保每张卡都有独立 UI 状态(并写回缓存,保证后续使用)
|
||
ui = _ensure_ui_state(rid)
|
||
preview_on = bool(ui.get("preview", True))
|
||
nohf_on = bool(ui.get("nohf", False))
|
||
active_tab = ui.get("tab", "md")
|
||
if active_tab not in ("md", "json"):
|
||
active_tab = "md"
|
||
source_sel = ui.get("source", "源码")
|
||
if source_sel not in ("源码", "编辑源码"):
|
||
source_sel = "源码"
|
||
|
||
with gr.Column(
|
||
elem_classes=["result-card"], elem_id=f"card-{rid}"
|
||
):
|
||
with gr.Row():
|
||
gr.Markdown(
|
||
f"### Result {idx+1} <span class='muted'>RID: {rid}</span>"
|
||
)
|
||
|
||
if status == "error":
|
||
gr.Markdown(
|
||
f"⚠️ 解析失败:\n\n{data.get('md_content','Unknown error')}",
|
||
elem_classes=["muted"],
|
||
)
|
||
|
||
if status == "done":
|
||
orig_img = data.get("original_image")
|
||
layout_img = data.get("layout_image")
|
||
with gr.Row():
|
||
gr.Image(
|
||
value=orig_img, label="Original", height=300
|
||
)
|
||
gr.Image(
|
||
value=layout_img, label="Layout", height=300
|
||
)
|
||
elif status == "pending":
|
||
with gr.Row():
|
||
gr.HTML(
|
||
"<div class='skeleton' style='width:100%;height:300px;'></div>"
|
||
)
|
||
gr.HTML(
|
||
"<div class='skeleton' style='width:100%;height:300px;'></div>"
|
||
)
|
||
|
||
# badges
|
||
with gr.Row():
|
||
badge_md = gr.HTML(
|
||
f"<span class='muted'>MD: {'Preview' if preview_on else 'Source'}</span>"
|
||
)
|
||
badge_nohf = gr.HTML(
|
||
f"<span class='muted'>NOHF: {'On' if nohf_on else 'Off'}</span>"
|
||
)
|
||
|
||
# controls
|
||
with gr.Row():
|
||
rid_box = gr.Textbox(value=rid, visible=False)
|
||
preview_cb = gr.Checkbox(
|
||
label="Preview Markdown",
|
||
value=preview_on,
|
||
)
|
||
nohf_cb = gr.Checkbox(label="NOHF", value=nohf_on)
|
||
|
||
# 视图切换
|
||
selected_label = (
|
||
"Markdown" if active_tab == "md" else "JSON"
|
||
)
|
||
with gr.Row():
|
||
view_radio = gr.Radio(
|
||
label="视图",
|
||
choices=["Markdown", "JSON"],
|
||
value=selected_label,
|
||
)
|
||
|
||
# 内容来源(仅完成状态可用)
|
||
with gr.Row():
|
||
source_radio = gr.Radio(
|
||
label="内容来源",
|
||
choices=["源码", "编辑源码"],
|
||
value=source_sel,
|
||
interactive=True,
|
||
visible=(status == "done"),
|
||
)
|
||
|
||
# 内容获取助手
|
||
def _get_texts(rid_value, nohf_flag):
|
||
st = RESULTS_CACHE.get(rid_value, {}) or {}
|
||
md_orig = st.get("md_content") or ""
|
||
md_nohf_orig = st.get("md_content_nohf") or ""
|
||
md_current_orig = (
|
||
md_nohf_orig if nohf_flag else md_orig
|
||
)
|
||
edits = st.get("edits") or {}
|
||
md_edit = (
|
||
edits.get("nohf")
|
||
if nohf_flag
|
||
else edits.get("md")
|
||
)
|
||
if md_edit is None:
|
||
md_edit = md_current_orig
|
||
json_orig = st.get("json_code") or ""
|
||
json_edit = edits.get("json")
|
||
if json_edit is None:
|
||
json_edit = json_orig
|
||
return (
|
||
md_current_orig,
|
||
md_edit,
|
||
json_orig,
|
||
json_edit,
|
||
)
|
||
|
||
(
|
||
md_orig_val,
|
||
md_edit_val,
|
||
json_orig_val,
|
||
json_edit_val,
|
||
) = _get_texts(rid, nohf_on)
|
||
is_md_init = selected_label == "Markdown"
|
||
use_edit_init = source_sel == "编辑源码"
|
||
|
||
# 单一预览组件(Markdown 用)
|
||
md_preview = gr.Markdown(
|
||
value=(
|
||
md_edit_val if use_edit_init else md_orig_val
|
||
),
|
||
visible=(
|
||
status == "done" and is_md_init and preview_on
|
||
),
|
||
)
|
||
# 原始源码(只读)
|
||
md_code_orig = gr.Code(
|
||
language="markdown",
|
||
value=md_orig_val,
|
||
interactive=False,
|
||
visible=(
|
||
status == "done"
|
||
and is_md_init
|
||
and (not preview_on)
|
||
and (not use_edit_init)
|
||
),
|
||
)
|
||
# 编辑源码(可编辑、自动保存)
|
||
md_code_edit = gr.Code(
|
||
language="markdown",
|
||
value=md_edit_val,
|
||
interactive=True,
|
||
visible=(
|
||
status == "done"
|
||
and is_md_init
|
||
and (not preview_on)
|
||
and use_edit_init
|
||
),
|
||
)
|
||
|
||
# JSON(原始与编辑)
|
||
json_code_orig = gr.Code(
|
||
language="json",
|
||
value=json_orig_val,
|
||
interactive=False,
|
||
visible=(
|
||
status == "done"
|
||
and (not is_md_init)
|
||
and (not use_edit_init)
|
||
),
|
||
)
|
||
json_code_edit = gr.Code(
|
||
language="json",
|
||
value=json_edit_val,
|
||
interactive=True,
|
||
visible=(
|
||
status == "done"
|
||
and (not is_md_init)
|
||
and use_edit_init
|
||
),
|
||
)
|
||
|
||
# 仅编辑模式显示
|
||
restore_btn = gr.Button(
|
||
"还原当前内容",
|
||
visible=(status == "done" and use_edit_init),
|
||
)
|
||
|
||
# 统一可见性/内容更新
|
||
def _apply_all(
|
||
preview, use_nohf, view_label, src_label, rid_value
|
||
):
|
||
preview = bool(preview)
|
||
use_nohf = bool(use_nohf)
|
||
is_md = str(view_label) == "Markdown"
|
||
use_edit = str(src_label) == "编辑源码"
|
||
|
||
# 写回 UI 状态
|
||
st = RESULTS_CACHE.get(rid_value, {}) or {}
|
||
ui0 = dict(st.get("ui") or _default_ui_state())
|
||
ui0["preview"] = preview
|
||
ui0["nohf"] = use_nohf
|
||
ui0["tab"] = "md" if is_md else "json"
|
||
ui0["source"] = "编辑源码" if use_edit else "源码"
|
||
st["ui"] = ui0
|
||
RESULTS_CACHE[rid_value] = st
|
||
|
||
md_o, md_e, j_o, j_e = _get_texts(
|
||
rid_value, use_nohf
|
||
)
|
||
return (
|
||
gr.update(
|
||
value=f"<span class='muted'>MD: {'Preview' if preview else 'Source'}</span>"
|
||
),
|
||
gr.update(
|
||
value=f"<span class='muted'>NOHF: {'On' if use_nohf else 'Off'}</span>"
|
||
),
|
||
gr.update(
|
||
value=(md_e if use_edit else md_o),
|
||
visible=(is_md and preview),
|
||
),
|
||
gr.update(
|
||
value=md_o,
|
||
visible=(
|
||
is_md
|
||
and (not preview)
|
||
and (not use_edit)
|
||
),
|
||
),
|
||
gr.update(
|
||
value=md_e,
|
||
visible=(
|
||
is_md and (not preview) and use_edit
|
||
),
|
||
),
|
||
gr.update(
|
||
value=j_o,
|
||
visible=(not is_md and (not use_edit)),
|
||
),
|
||
gr.update(
|
||
value=j_e, visible=(not is_md and use_edit)
|
||
),
|
||
gr.update(visible=use_edit),
|
||
)
|
||
|
||
# 绑定控制项变化:预览、NOHF、视图、来源
|
||
preview_cb.change(
|
||
_apply_all,
|
||
inputs=[
|
||
preview_cb,
|
||
nohf_cb,
|
||
view_radio,
|
||
source_radio,
|
||
rid_box,
|
||
],
|
||
outputs=[
|
||
badge_md,
|
||
badge_nohf,
|
||
md_preview,
|
||
md_code_orig,
|
||
md_code_edit,
|
||
json_code_orig,
|
||
json_code_edit,
|
||
restore_btn,
|
||
],
|
||
show_progress="hidden",
|
||
)
|
||
nohf_cb.change(
|
||
_apply_all,
|
||
inputs=[
|
||
preview_cb,
|
||
nohf_cb,
|
||
view_radio,
|
||
source_radio,
|
||
rid_box,
|
||
],
|
||
outputs=[
|
||
badge_md,
|
||
badge_nohf,
|
||
md_preview,
|
||
md_code_orig,
|
||
md_code_edit,
|
||
json_code_orig,
|
||
json_code_edit,
|
||
restore_btn,
|
||
],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
def _on_view_change(
|
||
view_label,
|
||
rid_value,
|
||
preview_flag,
|
||
nohf_flag,
|
||
src_label,
|
||
):
|
||
st = RESULTS_CACHE.get(rid_value, {}) or {}
|
||
ui0 = dict(st.get("ui") or _default_ui_state())
|
||
ui0["tab"] = (
|
||
"md"
|
||
if str(view_label) == "Markdown"
|
||
else "json"
|
||
)
|
||
st["ui"] = ui0
|
||
RESULTS_CACHE[rid_value] = st
|
||
return _apply_all(
|
||
preview_flag,
|
||
nohf_flag,
|
||
view_label,
|
||
src_label,
|
||
rid_value,
|
||
)
|
||
|
||
view_radio.change(
|
||
_on_view_change,
|
||
inputs=[
|
||
view_radio,
|
||
rid_box,
|
||
preview_cb,
|
||
nohf_cb,
|
||
source_radio,
|
||
],
|
||
outputs=[
|
||
badge_md,
|
||
badge_nohf,
|
||
md_preview,
|
||
md_code_orig,
|
||
md_code_edit,
|
||
json_code_orig,
|
||
json_code_edit,
|
||
restore_btn,
|
||
],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
def _on_source_change(
|
||
src_label,
|
||
rid_value,
|
||
preview_flag,
|
||
nohf_flag,
|
||
view_label,
|
||
):
|
||
st = RESULTS_CACHE.get(rid_value, {}) or {}
|
||
ui0 = dict(st.get("ui") or _default_ui_state())
|
||
ui0["source"] = (
|
||
"编辑源码"
|
||
if str(src_label) == "编辑源码"
|
||
else "源码"
|
||
)
|
||
st["ui"] = ui0
|
||
RESULTS_CACHE[rid_value] = st
|
||
return _apply_all(
|
||
preview_flag,
|
||
nohf_flag,
|
||
view_label,
|
||
src_label,
|
||
rid_value,
|
||
)
|
||
|
||
source_radio.change(
|
||
_on_source_change,
|
||
inputs=[
|
||
source_radio,
|
||
rid_box,
|
||
preview_cb,
|
||
nohf_cb,
|
||
view_radio,
|
||
],
|
||
outputs=[
|
||
badge_md,
|
||
badge_nohf,
|
||
md_preview,
|
||
md_code_orig,
|
||
md_code_edit,
|
||
json_code_orig,
|
||
json_code_edit,
|
||
restore_btn,
|
||
],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
# Action buttons per-card
|
||
with gr.Row():
|
||
reparse_btn = gr.Button(
|
||
"🔁 重新解析",
|
||
interactive=(status in ("done", "error")),
|
||
)
|
||
export_btn = gr.DownloadButton(
|
||
"📦 导出",
|
||
interactive=(status == "done"),
|
||
value=(
|
||
data.get("export_path")
|
||
if status == "done"
|
||
else None
|
||
),
|
||
)
|
||
delete_btn = gr.Button("🗑️ 删除", variant="stop")
|
||
|
||
# 自动保存(编辑器变更即写盘 + 刷新导出 + 可能的 Markdown 预览)
|
||
def _save_md_edit(
|
||
val,
|
||
rid_value,
|
||
nohf_flag,
|
||
preview_flag,
|
||
view_label,
|
||
src_label,
|
||
ids,
|
||
selected_labels,
|
||
):
|
||
st = RESULTS_CACHE.get(rid_value, {}) or {}
|
||
if st.get("status") != "done":
|
||
# 同步“导出所选”以防其它项在编辑(极少见)
|
||
path_sel = export_selected_rids(
|
||
ids, selected_labels
|
||
)
|
||
return (
|
||
gr.update(),
|
||
gr.update(),
|
||
gr.update(value=path_sel),
|
||
)
|
||
which = "nohf" if bool(nohf_flag) else "md"
|
||
edits = dict(st.get("edits") or {})
|
||
edits[which] = val or ""
|
||
st["edits"] = edits
|
||
RESULTS_CACHE[rid_value] = st
|
||
try:
|
||
_save_edited_to_disk(st, which, val or "")
|
||
except Exception:
|
||
pass
|
||
_invalidate_export_zip(rid_value)
|
||
new_zip = ensure_export_ready(rid_value)
|
||
|
||
# 刷新“导出所选”
|
||
path_sel = export_selected_rids(
|
||
ids, selected_labels
|
||
)
|
||
|
||
# 若当前正处于 Markdown/预览/编辑模式,则更新预览内容
|
||
is_md = str(view_label) == "Markdown"
|
||
use_edit = str(src_label) == "编辑源码"
|
||
if is_md and use_edit and bool(preview_flag):
|
||
return (
|
||
gr.update(value=val or ""),
|
||
gr.update(value=new_zip),
|
||
gr.update(value=path_sel),
|
||
)
|
||
return (
|
||
gr.update(),
|
||
gr.update(value=new_zip),
|
||
gr.update(value=path_sel),
|
||
)
|
||
|
||
md_code_edit.change(
|
||
_save_md_edit,
|
||
inputs=[
|
||
md_code_edit,
|
||
rid_box,
|
||
nohf_cb,
|
||
preview_cb,
|
||
view_radio,
|
||
source_radio,
|
||
ids_state,
|
||
selected_group,
|
||
],
|
||
outputs=[
|
||
md_preview,
|
||
export_btn,
|
||
export_selected_btn,
|
||
],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
def _save_json_edit(
|
||
val, rid_value, ids, selected_labels
|
||
):
|
||
st = RESULTS_CACHE.get(rid_value, {}) or {}
|
||
if st.get("status") != "done":
|
||
path_sel = export_selected_rids(
|
||
ids, selected_labels
|
||
)
|
||
return gr.update(), gr.update(value=path_sel)
|
||
edits = dict(st.get("edits") or {})
|
||
edits["json"] = val or ""
|
||
st["edits"] = edits
|
||
RESULTS_CACHE[rid_value] = st
|
||
try:
|
||
_save_edited_to_disk(st, "json", val or "")
|
||
except Exception:
|
||
pass
|
||
_invalidate_export_zip(rid_value)
|
||
new_zip = ensure_export_ready(rid_value)
|
||
path_sel = export_selected_rids(
|
||
ids, selected_labels
|
||
)
|
||
return gr.update(value=new_zip), gr.update(
|
||
value=path_sel
|
||
)
|
||
|
||
json_code_edit.change(
|
||
_save_json_edit,
|
||
inputs=[
|
||
json_code_edit,
|
||
rid_box,
|
||
ids_state,
|
||
selected_group,
|
||
],
|
||
outputs=[export_btn, export_selected_btn],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
# 还原当前内容
|
||
def _restore_current(
|
||
src_label,
|
||
rid_value,
|
||
nohf_flag,
|
||
preview_flag,
|
||
view_label,
|
||
ids,
|
||
selected_labels,
|
||
):
|
||
st = RESULTS_CACHE.get(rid_value, {}) or {}
|
||
which = (
|
||
"json"
|
||
if str(view_label) == "JSON"
|
||
else ("nohf" if bool(nohf_flag) else "md")
|
||
)
|
||
# 删除编辑版
|
||
edits = dict(st.get("edits") or {})
|
||
if which in edits:
|
||
edits.pop(which, None)
|
||
st["edits"] = edits
|
||
RESULTS_CACHE[rid_value] = st
|
||
try:
|
||
_delete_edited_from_disk(st, which)
|
||
except Exception:
|
||
pass
|
||
# 重新取原始内容
|
||
md_o, md_e, j_o, j_e = _get_texts(
|
||
rid_value, bool(nohf_flag)
|
||
)
|
||
# 刷新导出
|
||
_invalidate_export_zip(rid_value)
|
||
new_zip = ensure_export_ready(rid_value)
|
||
path_sel = export_selected_rids(
|
||
ids, selected_labels
|
||
)
|
||
# 更新编辑器与预览
|
||
up_md_editor = (
|
||
gr.update(value=md_o)
|
||
if which in ("md", "nohf")
|
||
else gr.update()
|
||
)
|
||
up_json_editor = (
|
||
gr.update(value=j_o)
|
||
if which == "json"
|
||
else gr.update()
|
||
)
|
||
is_md = str(view_label) == "Markdown"
|
||
use_edit = str(src_label) == "编辑源码"
|
||
up_preview = (
|
||
gr.update(value=(md_e if use_edit else md_o))
|
||
if is_md and bool(preview_flag)
|
||
else gr.update()
|
||
)
|
||
return (
|
||
up_md_editor,
|
||
up_json_editor,
|
||
up_preview,
|
||
gr.update(value=new_zip),
|
||
gr.update(value=path_sel),
|
||
)
|
||
|
||
restore_btn.click(
|
||
_restore_current,
|
||
inputs=[
|
||
source_radio,
|
||
rid_box,
|
||
nohf_cb,
|
||
preview_cb,
|
||
view_radio,
|
||
ids_state,
|
||
selected_group,
|
||
],
|
||
outputs=[
|
||
md_code_edit,
|
||
json_code_edit,
|
||
md_preview,
|
||
export_btn,
|
||
export_selected_btn,
|
||
],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
# Reparse panel (collapsed)
|
||
with gr.Column(visible=False) as reparse_panel:
|
||
gr.Markdown("**重解析**")
|
||
with gr.Row():
|
||
reparse_current_btn = gr.Button(
|
||
"基于当前图片直接重解析", variant="primary"
|
||
)
|
||
|
||
# Delete confirm panel (collapsed)
|
||
with gr.Row(visible=False) as delete_confirm_panel:
|
||
gr.Markdown(
|
||
"确认删除该结果?该操作不可恢复。",
|
||
elem_classes=["muted"],
|
||
)
|
||
confirm_delete_btn = gr.Button(
|
||
"确认删除", variant="stop"
|
||
)
|
||
cancel_delete_btn = gr.Button("取消")
|
||
|
||
# 绑定其他交互
|
||
reparse_btn.click(
|
||
lambda: gr.update(visible=True),
|
||
outputs=[reparse_panel],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
def _start_reparse_current(
|
||
rid_value,
|
||
p_mode,
|
||
ip_addr,
|
||
port_val,
|
||
minp,
|
||
maxp,
|
||
fitz_flag,
|
||
tick,
|
||
ids,
|
||
selected_labels,
|
||
):
|
||
try:
|
||
enqueue_single_reparse(
|
||
rid_value,
|
||
None,
|
||
p_mode,
|
||
ip_addr,
|
||
int(port_val),
|
||
int(minp),
|
||
int(maxp),
|
||
fitz_flag,
|
||
)
|
||
# 重建“导出所选”
|
||
path_sel = export_selected_rids(
|
||
ids, selected_labels
|
||
)
|
||
return (
|
||
int(tick or 0) + 1,
|
||
gr.update(visible=False),
|
||
gr.update(value=path_sel),
|
||
)
|
||
except Exception as e:
|
||
RESULTS_CACHE[rid_value] = {
|
||
"status": "error",
|
||
"md_content": f"Reparse error: {e}",
|
||
# 保留 UI 状态
|
||
"ui": _ensure_ui_state(rid_value),
|
||
}
|
||
path_sel = export_selected_rids(
|
||
ids, selected_labels
|
||
)
|
||
return (
|
||
int(tick or 0) + 1,
|
||
gr.update(visible=False),
|
||
gr.update(value=path_sel),
|
||
)
|
||
|
||
reparse_current_btn.click(
|
||
_start_reparse_current,
|
||
inputs=[
|
||
rid_box,
|
||
prompt_mode,
|
||
server_ip,
|
||
server_port,
|
||
min_pixels,
|
||
max_pixels,
|
||
fitz_preprocess,
|
||
store_tick,
|
||
ids_state,
|
||
selected_group,
|
||
],
|
||
outputs=[
|
||
store_tick,
|
||
reparse_panel,
|
||
export_selected_btn,
|
||
],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
def _on_delete_click(
|
||
rid_value, ids, need_confirm, tick
|
||
):
|
||
# 如果需要确认,仅展开确认面板,不修改选择框/导出按钮
|
||
if need_confirm:
|
||
return (
|
||
gr.update(visible=True),
|
||
ids,
|
||
tick,
|
||
gr.update(), # selected_group 不变
|
||
gr.update(), # export button 不变
|
||
)
|
||
# 直接删除:更新 ids/tick,并同步 Actions 的选择项与导出按钮
|
||
new_ids, new_tick = delete_one(ids, rid_value, tick)
|
||
choices = [
|
||
f"Result {i+1}"
|
||
for i in range(len(new_ids or []))
|
||
]
|
||
return (
|
||
gr.update(visible=False),
|
||
new_ids,
|
||
new_tick,
|
||
gr.update(choices=choices, value=[]),
|
||
gr.update(value=None), # 清空导出
|
||
)
|
||
|
||
# 单卡删除输出同步 selected_group 与 export_selected_btn
|
||
delete_btn.click(
|
||
_on_delete_click,
|
||
inputs=[
|
||
rid_box,
|
||
ids_state,
|
||
confirm_delete_state,
|
||
store_tick,
|
||
],
|
||
outputs=[
|
||
delete_confirm_panel,
|
||
ids_state,
|
||
store_tick,
|
||
selected_group,
|
||
export_selected_btn,
|
||
],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
def _confirm_delete(rid_value, ids, tick):
|
||
new_ids, new_tick = delete_one(ids, rid_value, tick)
|
||
choices = [
|
||
f"Result {i+1}"
|
||
for i in range(len(new_ids or []))
|
||
]
|
||
return (
|
||
new_ids,
|
||
new_tick,
|
||
gr.update(visible=False),
|
||
gr.update(choices=choices, value=[]),
|
||
gr.update(value=None),
|
||
)
|
||
|
||
# 确认删除后同步 selected_group 与 export_selected_btn
|
||
confirm_delete_btn.click(
|
||
_confirm_delete,
|
||
inputs=[rid_box, ids_state, store_tick],
|
||
outputs=[
|
||
ids_state,
|
||
store_tick,
|
||
delete_confirm_panel,
|
||
selected_group,
|
||
export_selected_btn,
|
||
],
|
||
show_progress="hidden",
|
||
)
|
||
cancel_delete_btn.click(
|
||
lambda: gr.update(visible=False),
|
||
outputs=[delete_confirm_panel],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
# Top-level events
|
||
def _on_prompt_mode_change(m):
|
||
return dict_promptmode_to_prompt.get(m, "")
|
||
|
||
prompt_mode.change(
|
||
fn=_on_prompt_mode_change,
|
||
inputs=[prompt_mode],
|
||
outputs=[prompt_display],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
def process_images_simple(
|
||
file_list,
|
||
p_mode,
|
||
server_ip_val,
|
||
server_port_val,
|
||
min_p_val,
|
||
max_p_val,
|
||
fitz_val,
|
||
cur_ids,
|
||
tick,
|
||
):
|
||
"""
|
||
Process images with selected prompt mode. Grounding mode is removed; all files go through normal path.
|
||
"""
|
||
minp, maxp = _validate_pixels(min_p_val, max_p_val)
|
||
_set_parser_config(server_ip_val, server_port_val, minp, maxp)
|
||
|
||
# normalize file_list (gradio file element may pass nested lists)
|
||
files = []
|
||
if not file_list:
|
||
return (
|
||
gr.update(value=None),
|
||
gr.update(value="No files uploaded."),
|
||
cur_ids,
|
||
tick,
|
||
gr.update(choices=[], value=[]),
|
||
gr.update(value=None), # 清空导出
|
||
)
|
||
|
||
# build normalized list
|
||
for f in file_list:
|
||
if isinstance(f, (list, tuple)):
|
||
files.append(f[0] if len(f) > 0 else None)
|
||
else:
|
||
files.append(f)
|
||
|
||
# Normal path: queue originals
|
||
new_ids, info = add_tasks_to_queue(
|
||
files,
|
||
p_mode,
|
||
server_ip_val,
|
||
server_port_val,
|
||
minp,
|
||
maxp,
|
||
fitz_val,
|
||
cur_ids,
|
||
)
|
||
# Update checkbox group choices
|
||
choices = [f"Result {i+1}" for i in range(len(new_ids or []))]
|
||
return (
|
||
gr.update(value=None),
|
||
gr.update(value=info),
|
||
new_ids,
|
||
int(tick or 0) + 1,
|
||
gr.update(choices=choices, value=[]),
|
||
gr.update(value=None), # 清空导出
|
||
)
|
||
|
||
parse_btn.click(
|
||
fn=process_images_simple,
|
||
inputs=[
|
||
file_input,
|
||
prompt_mode,
|
||
server_ip,
|
||
server_port,
|
||
min_pixels,
|
||
max_pixels,
|
||
fitz_preprocess,
|
||
ids_state,
|
||
store_tick,
|
||
],
|
||
outputs=[
|
||
file_input,
|
||
info_display,
|
||
ids_state,
|
||
store_tick,
|
||
selected_group,
|
||
export_selected_btn,
|
||
],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
# Concurrency change handler: apply immediately
|
||
def _on_concurrency_change(n):
|
||
try:
|
||
set_max_concurrency(int(n))
|
||
return gr.update(value=f"并发已设置为 {int(n)}。")
|
||
except Exception as e:
|
||
return gr.update(value=f"设置并发失败:{e}")
|
||
|
||
concurrency.change(
|
||
_on_concurrency_change,
|
||
inputs=[concurrency],
|
||
outputs=[info_display],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
# 会话加载时同步 UI 与当前真实并发(解决刷新后 UI 值与实际不一致)
|
||
def _sync_concurrency_on_session_load():
|
||
try:
|
||
# 如有需要,补齐 worker 到目标并发数(不会减少已有线程)
|
||
_start_workers(max(1, MAX_CONCURRENCY))
|
||
return (
|
||
gr.update(value=int(MAX_CONCURRENCY)),
|
||
gr.update(
|
||
value=f"已同步当前并发为 {int(MAX_CONCURRENCY)}。"
|
||
),
|
||
)
|
||
except Exception as e:
|
||
return (
|
||
gr.update(value=int(MAX_CONCURRENCY)),
|
||
gr.update(value=f"同步并发时发生异常:{e}"),
|
||
)
|
||
|
||
demo.load(
|
||
_sync_concurrency_on_session_load,
|
||
inputs=None,
|
||
outputs=[concurrency, info_display],
|
||
)
|
||
|
||
# 生成导出 ZIP(基于当前选择),用于首次点击即可下载
|
||
def _update_export_for_selection(ids, selected_labels):
|
||
path = export_selected_rids(ids, selected_labels)
|
||
return gr.update(
|
||
value=path if path and os.path.exists(path) else None
|
||
)
|
||
|
||
# Actions: 全选/清空
|
||
def _select_all(ids):
|
||
choices = [f"Result {i+1}" for i in range(len(ids or []))]
|
||
# 预生成 zip
|
||
path = export_selected_rids(ids, choices)
|
||
return (
|
||
gr.update(choices=choices, value=choices),
|
||
gr.update(
|
||
value=path if path and os.path.exists(path) else None
|
||
),
|
||
)
|
||
|
||
def _clear_selection(ids):
|
||
choices = [f"Result {i+1}" for i in range(len(ids or []))]
|
||
return (
|
||
gr.update(choices=choices, value=[]),
|
||
gr.update(value=None),
|
||
)
|
||
|
||
select_all_btn.click(
|
||
_select_all,
|
||
inputs=[ids_state],
|
||
outputs=[selected_group, export_selected_btn],
|
||
show_progress="hidden",
|
||
)
|
||
clear_sel_btn.click(
|
||
_clear_selection,
|
||
inputs=[ids_state],
|
||
outputs=[selected_group, export_selected_btn],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
# 当用户手动变更选择时,预构建导出 zip 并绑定到按钮
|
||
selected_group.change(
|
||
_update_export_for_selection,
|
||
inputs=[ids_state, selected_group],
|
||
outputs=[export_selected_btn],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
# Actions: 批量重解析(基于当前图片)
|
||
def bulk_reparse(
|
||
selected_labels, ids, p_mode, ip, port, minp, maxp, fitz, tick
|
||
):
|
||
if not ids or not selected_labels:
|
||
path_sel = export_selected_rids(ids, selected_labels)
|
||
return (
|
||
gr.update(value="未选择任何结果。"),
|
||
int(tick or 0),
|
||
gr.update(value=path_sel),
|
||
)
|
||
# Map labels -> rids
|
||
count = 0
|
||
for label in selected_labels:
|
||
try:
|
||
idx = int(str(label).split()[-1]) - 1
|
||
rid = ids[idx]
|
||
enqueue_single_reparse(
|
||
rid,
|
||
None,
|
||
p_mode,
|
||
ip,
|
||
int(port),
|
||
int(minp),
|
||
int(maxp),
|
||
fitz,
|
||
)
|
||
count += 1
|
||
except Exception:
|
||
continue
|
||
path_sel = export_selected_rids(ids, selected_labels)
|
||
return (
|
||
gr.update(value=f"已触发 {count} 个重解析任务。"),
|
||
int(tick or 0) + 1,
|
||
gr.update(value=path_sel),
|
||
)
|
||
|
||
bulk_reparse_btn.click(
|
||
bulk_reparse,
|
||
inputs=[
|
||
selected_group,
|
||
ids_state,
|
||
prompt_mode,
|
||
server_ip,
|
||
server_port,
|
||
min_pixels,
|
||
max_pixels,
|
||
fitz_preprocess,
|
||
store_tick,
|
||
],
|
||
outputs=[info_display, store_tick, export_selected_btn],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
# Actions: 删除所选(尊重“删除前确认”)
|
||
def delete_selected_action(ids, selected_labels, tick):
|
||
# 先从“原始 ids 列表”解析出要删除的 rid 列表,避免索引随删除而错位
|
||
if not ids or not selected_labels:
|
||
choices = [f"Result {i+1}" for i in range(len(ids or []))]
|
||
return (
|
||
ids,
|
||
int(tick or 0),
|
||
gr.update(choices=choices, value=[]),
|
||
gr.update(value=None),
|
||
)
|
||
# 解析 label -> index(去重、过滤非法)
|
||
sel_indices = []
|
||
for label in selected_labels:
|
||
try:
|
||
idx = int(str(label).split()[-1]) - 1
|
||
if 0 <= idx < len(ids):
|
||
sel_indices.append(idx)
|
||
except Exception:
|
||
continue
|
||
if not sel_indices:
|
||
choices = [f"Result {i+1}" for i in range(len(ids or []))]
|
||
return (
|
||
ids,
|
||
int(tick or 0),
|
||
gr.update(choices=choices, value=[]),
|
||
gr.update(value=None),
|
||
)
|
||
sel_indices = sorted(set(sel_indices))
|
||
rids_to_delete = [ids[i] for i in sel_indices]
|
||
|
||
new_ids = list(ids)
|
||
new_tick = int(tick or 0)
|
||
# 基于 rid 删除,避免受索引变化影响
|
||
for rid in rids_to_delete:
|
||
new_ids, new_tick = delete_one(new_ids, rid, new_tick)
|
||
|
||
choices = [f"Result {i+1}" for i in range(len(new_ids or []))]
|
||
return (
|
||
new_ids,
|
||
new_tick,
|
||
gr.update(choices=choices, value=[]),
|
||
gr.update(value=None),
|
||
)
|
||
|
||
def _on_bulk_delete_click(ids, selected_labels, need_confirm, tick):
|
||
if need_confirm:
|
||
# 展示确认面板,不改动任何选择与导出
|
||
return (
|
||
gr.update(visible=True),
|
||
ids,
|
||
tick,
|
||
gr.update(),
|
||
gr.update(),
|
||
)
|
||
# 直接删除并隐藏确认面板
|
||
new_ids, new_tick, sel_update, export_update = (
|
||
delete_selected_action(ids, selected_labels, tick)
|
||
)
|
||
return (
|
||
gr.update(visible=False),
|
||
new_ids,
|
||
new_tick,
|
||
sel_update,
|
||
export_update,
|
||
)
|
||
|
||
delete_selected_btn.click(
|
||
_on_bulk_delete_click,
|
||
inputs=[
|
||
ids_state,
|
||
selected_group,
|
||
confirm_delete_state,
|
||
store_tick,
|
||
],
|
||
outputs=[
|
||
bulk_delete_confirm_panel,
|
||
ids_state,
|
||
store_tick,
|
||
selected_group,
|
||
export_selected_btn,
|
||
],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
def _bulk_confirm_delete(ids, selected_labels, tick):
|
||
new_ids, new_tick, sel_update, export_update = (
|
||
delete_selected_action(ids, selected_labels, tick)
|
||
)
|
||
return (
|
||
new_ids,
|
||
new_tick,
|
||
sel_update,
|
||
export_update,
|
||
gr.update(visible=False),
|
||
)
|
||
|
||
bulk_confirm_delete_btn.click(
|
||
_bulk_confirm_delete,
|
||
inputs=[ids_state, selected_group, store_tick],
|
||
outputs=[
|
||
ids_state,
|
||
store_tick,
|
||
selected_group,
|
||
export_selected_btn,
|
||
bulk_delete_confirm_panel,
|
||
],
|
||
show_progress="hidden",
|
||
)
|
||
bulk_cancel_delete_btn.click(
|
||
lambda: gr.update(visible=False),
|
||
outputs=[bulk_delete_confirm_panel],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
# 进度信息
|
||
def update_progress_info(ids, tick, bump):
|
||
if not ids:
|
||
return (
|
||
gr.update(value="Waiting..."),
|
||
tick,
|
||
int(bump or 0),
|
||
)
|
||
pending = 0
|
||
done = 0
|
||
errors = 0
|
||
status_signature = []
|
||
for rid in ids:
|
||
st = RESULTS_CACHE.get(rid, {})
|
||
status = st.get("status", "pending")
|
||
status_signature.append((rid, status))
|
||
if status == "done":
|
||
done += 1
|
||
elif status == "error":
|
||
errors += 1
|
||
else:
|
||
pending += 1
|
||
qsize = TASK_QUEUE.qsize()
|
||
running = max(0, pending - qsize)
|
||
|
||
# Info text
|
||
if pending == 0:
|
||
info = (
|
||
f"进度:完成 {done}"
|
||
+ ("" if errors == 0 else f",错误 {errors}")
|
||
+ "。"
|
||
)
|
||
else:
|
||
info = f"进度:完成 {done},错误 {errors},正在解析 {running},排队 {qsize},待处理合计 {pending}。"
|
||
|
||
# Only bump render when any item's status changed
|
||
sig_tuple = tuple(status_signature)
|
||
last_sig = getattr(update_progress_info, "_last_status_sig", None)
|
||
bump_out = int(bump or 0)
|
||
if last_sig != sig_tuple:
|
||
setattr(update_progress_info, "_last_status_sig", sig_tuple)
|
||
bump_out = bump_out + 1
|
||
|
||
# Only tick when coarse counts change (avoid unnecessary churn)
|
||
key = f"{done}_{errors}_{pending}"
|
||
last_key = getattr(update_progress_info, "_last_counts_key", None)
|
||
new_tick = int(tick or 0)
|
||
if last_key != key:
|
||
setattr(update_progress_info, "_last_counts_key", key)
|
||
new_tick = new_tick + 1
|
||
|
||
return (
|
||
gr.update(value=info),
|
||
new_tick,
|
||
bump_out,
|
||
)
|
||
|
||
# 计时器不再触达 selected_group,杜绝与用户交互竞争导致选择重置/计时停止
|
||
progress_timer.tick(
|
||
fn=update_progress_info,
|
||
inputs=[ids_state, store_tick, render_bump],
|
||
outputs=[info_display, store_tick, render_bump],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
# Clear all
|
||
def clear_all():
|
||
global RESULTS_CACHE
|
||
while not TASK_QUEUE.empty():
|
||
try:
|
||
TASK_QUEUE.get_nowait()
|
||
TASK_QUEUE.task_done()
|
||
except queue.Empty:
|
||
break
|
||
RESULTS_CACHE = {}
|
||
RETRY_COUNTS.clear()
|
||
# Do not stop workers; keep them alive
|
||
return (
|
||
[],
|
||
0,
|
||
gr.update(value="Waiting..."),
|
||
0,
|
||
gr.update(choices=[], value=[]),
|
||
gr.update(value=None),
|
||
)
|
||
|
||
clear_btn.click(
|
||
clear_all,
|
||
inputs=None,
|
||
outputs=[
|
||
ids_state,
|
||
store_tick,
|
||
info_display,
|
||
render_bump,
|
||
selected_group,
|
||
export_selected_btn,
|
||
],
|
||
show_progress="hidden",
|
||
)
|
||
|
||
return demo
|
||
|
||
|
||
# ---------------- main ----------------
|
||
def _queue_compat(blocks: gr.Blocks):
|
||
"""
|
||
Gradio version compatibility layer for Blocks.queue:
|
||
- Try Gradio 4.x: default_concurrency_limit + status_update_rate
|
||
- Fallback to Gradio 3.x: concurrency_count + status_update_rate
|
||
- Final fallback: no-arg queue()
|
||
"""
|
||
try:
|
||
# Gradio 4.x path
|
||
return blocks.queue(default_concurrency_limit=20, status_update_rate=0.2)
|
||
except TypeError:
|
||
try:
|
||
# Gradio 3.x path
|
||
return blocks.queue(concurrency_count=16, status_update_rate=0.2)
|
||
except TypeError:
|
||
# Minimal fallback
|
||
return blocks.queue()
|
||
|
||
|
||
def _launch_compat(app: gr.Blocks, port: int):
|
||
"""
|
||
Gradio version compatibility for launch parameters.
|
||
"""
|
||
try:
|
||
app.launch(
|
||
server_name="0.0.0.0",
|
||
server_port=port,
|
||
debug=True,
|
||
show_api=False, # 3.x/部分4.x可用
|
||
)
|
||
except TypeError:
|
||
# Fallback without show_api
|
||
app.launch(
|
||
server_name="0.0.0.0",
|
||
server_port=port,
|
||
debug=True,
|
||
)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
import sys
|
||
|
||
port = int(sys.argv[1]) if len(sys.argv) > 1 else 7860
|
||
demo = create_gradio_interface()
|
||
app = _queue_compat(demo)
|
||
_launch_compat(app, port)
|