tr_ocr.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. # coding:utf-8
  2. import requests
  3. from concurrent.futures import ThreadPoolExecutor
  4. from docs.config import getOcrUrl
  5. def combine_text(rows):
  6. '''
  7. 合并文字
  8. :param rows:
  9. :return:
  10. '''
  11. text = ""
  12. for ind, row in enumerate(rows):
  13. if ind < len(rows) - 1:
  14. next_len = rows[ind + 1][0][1]
  15. if abs(row[0][1] - next_len) > row[0][3] / 2:
  16. text += "\n"
  17. text += row[1]
  18. return text
  19. def tr_ocr(*args):
  20. '''
  21. 特定的ocr
  22. :param args:
  23. :return:
  24. '''
  25. try:
  26. ind, picture_path = args[0]
  27. img_file = {
  28. 'file': open(picture_path, 'rb')
  29. }
  30. url = ""
  31. print(getOcrUrl)
  32. for once in range(2):
  33. ip = requests.get(getOcrUrl).text
  34. print(ip)
  35. if ip:
  36. url = f"http://{ip}/api/tr-run/"
  37. break
  38. print("url-->",url)
  39. if url:
  40. res = requests.post(url=url, data={"is_draw": 0}, files=img_file)
  41. res_json = res.json()
  42. data = res_json.get("data", {})
  43. raw_out = data.get("raw_out", [])
  44. if raw_out:
  45. text = combine_text(raw_out)
  46. return {ind: text}
  47. return {ind: ""}
  48. except Exception as e:
  49. print(e)
  50. return {-1: ""}
  51. def tr_ocr_patch(picture_paths):
  52. total_result = {}
  53. thread_pool = ThreadPoolExecutor(max_workers=10)
  54. results = thread_pool.map(tr_ocr, picture_paths)
  55. for res in results:
  56. total_result.update(res)
  57. return total_result
  58. if __name__ == '__main__':
  59. import time
  60. paths = [(0, "../docs/2.png"), (1, "../docs/1.png")]
  61. start = time.time()
  62. content = tr_ocr_patch(paths)
  63. print(content)
  64. print(time.time() - start)