ai
/
nsq_convert2txt


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
							# coding:utf-8
import requests
from concurrent.futures import ThreadPoolExecutor
from docs.config import getOcrUrl


def combine_text(rows):
    '''
    合并文字
    :param rows:
    :return:
    '''
    text = ""
    for ind, row in enumerate(rows):
        if ind < len(rows) - 1:
            next_len = rows[ind + 1][0][1]
            if abs(row[0][1] - next_len) > row[0][3] / 2:
                text += "\n"
        text += row[1]
    return text


def tr_ocr(*args):
    '''
    特定的ocr
    :param args:
    :return:
    '''
    try:
        ind, picture_path = args[0]
        img_file = {
            'file': open(picture_path, 'rb')
        }
        url = ""
        print(getOcrUrl)
        for once in range(2):
            ip = requests.get(getOcrUrl).text
            print(ip)
            if ip:
                url = f"http://{ip}/api/tr-run/"
                break
        print("url-->",url)
        if url:
            res = requests.post(url=url, data={"is_draw": 0}, files=img_file)
            res_json = res.json()
            data = res_json.get("data", {})
            raw_out = data.get("raw_out", [])
            if raw_out:
                text = combine_text(raw_out)
                return {ind: text}
        return {ind: ""}
    except Exception as e:
        print(e)
        return {-1: ""}


def tr_ocr_patch(picture_paths):
    total_result = {}
    thread_pool = ThreadPoolExecutor(max_workers=10)
    results = thread_pool.map(tr_ocr, picture_paths)
    for res in results:
        total_result.update(res)
    return total_result


if __name__ == '__main__':
    import time

    paths = [(0, "../docs/2.png"), (1, "../docs/1.png")]
    start = time.time()
    content = tr_ocr_patch(paths)
    print(content)
    print(time.time() - start)