12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 |
- # coding:utf-8
- import requests
- from concurrent.futures import ThreadPoolExecutor
- from docs.config import getOcrUrl
- def combine_text(rows):
- '''
- 合并文字
- :param rows:
- :return:
- '''
- text = ""
- for ind, row in enumerate(rows):
- if ind < len(rows) - 1:
- next_len = rows[ind + 1][0][1]
- if abs(row[0][1] - next_len) > row[0][3] / 2:
- text += "\n"
- text += row[1]
- return text
- def tr_ocr(*args):
- '''
- 特定的ocr
- :param args:
- :return:
- '''
- try:
- ind, picture_path = args[0]
- img_file = {
- 'file': open(picture_path, 'rb')
- }
- url = ""
- print(getOcrUrl)
- for once in range(2):
- ip = requests.get(getOcrUrl).text
- print(ip)
- if ip:
- url = f"http://{ip}/api/tr-run/"
- break
- print("url-->",url)
- if url:
- res = requests.post(url=url, data={"is_draw": 0}, files=img_file)
- res_json = res.json()
- data = res_json.get("data", {})
- raw_out = data.get("raw_out", [])
- if raw_out:
- text = combine_text(raw_out)
- return {ind: text}
- return {ind: ""}
- except Exception as e:
- print(e)
- return {-1: ""}
- def tr_ocr_patch(picture_paths):
- total_result = {}
- thread_pool = ThreadPoolExecutor(max_workers=10)
- results = thread_pool.map(tr_ocr, picture_paths)
- for res in results:
- total_result.update(res)
- return total_result
- if __name__ == '__main__':
- import time
- paths = [(0, "../docs/2.png"), (1, "../docs/1.png")]
- start = time.time()
- content = tr_ocr_patch(paths)
- print(content)
- print(time.time() - start)
|