# coding:utf-8 import requests from concurrent.futures import ThreadPoolExecutor from docs.config import getOcrUrl def combine_text(rows): ''' 合并文字 :param rows: :return: ''' text = "" for ind, row in enumerate(rows): if ind < len(rows) - 1: next_len = rows[ind + 1][0][1] if abs(row[0][1] - next_len) > row[0][3] / 2: text += "\n" text += row[1] return text def tr_ocr(*args): ''' 特定的ocr :param args: :return: ''' try: ind, picture_path = args[0] img_file = { 'file': open(picture_path, 'rb') } url = "" print(getOcrUrl) for once in range(2): ip = requests.get(getOcrUrl).text print(ip) if ip: url = f"http://{ip}/api/tr-run/" break print("url-->",url) if url: res = requests.post(url=url, data={"is_draw": 0}, files=img_file) res_json = res.json() data = res_json.get("data", {}) raw_out = data.get("raw_out", []) if raw_out: text = combine_text(raw_out) return {ind: text} return {ind: ""} except Exception as e: print(e) return {-1: ""} def tr_ocr_patch(picture_paths): total_result = {} thread_pool = ThreadPoolExecutor(max_workers=10) results = thread_pool.map(tr_ocr, picture_paths) for res in results: total_result.update(res) return total_result if __name__ == '__main__': import time paths = [(0, "../docs/2.png"), (1, "../docs/1.png")] start = time.time() content = tr_ocr_patch(paths) print(content) print(time.time() - start)