# -*- coding: utf-8 -*- """ Created on 2024-10-31 --------- @summary: 解析图片文本 --------- @author: Dzr """ import io import pathlib import random import re import string from pathlib import Path from urllib.request import urlretrieve import numpy as np from PIL import Image, ImageOps from ddddocr import DdddOcr from ddddocr import base64_to_image, get_img_base64 from fontTools.misc.transform import Offset from fontTools.pens.freetypePen import FreeTypePen # pip install freetype-py from fontTools.ttLib import TTFont _root = Path(__file__).parent _cache_dir = _root.joinpath('cache') _cache_dir.mkdir(exist_ok=True) _font_dir = _cache_dir.joinpath('font') _font_dir.mkdir(exist_ok=True) _image_dir = _cache_dir.joinpath('image') _image_dir.mkdir(exist_ok=True) def get_random(length=4): return ''.join(random.sample(string.ascii_letters + string.digits, length)) def parse_font_url(html): result = re.search(r"'icomoon';src:url\('(.*?)'\)", html, re.S) if result is None: raise ValueError(f'字体库 url "{result}" ') return result.group(1) def create_file(filename): file = _font_dir.joinpath(filename) file.touch(exist_ok=True) return file def download_font(html, font_type='ttf', to_local=False): filename = f'{get_random(6)}.{font_type}' tmp = create_file(filename) url = parse_font_url(html) urlretrieve(url, filename=tmp) if not to_local: file_bytes = tmp.read_bytes() tmp.unlink(missing_ok=True) tmp = file_bytes return tmp def image_to_bytes(image, filetype='JPEG'): byte_stream = io.BytesIO() image.save(byte_stream, format=filetype) byte_array = byte_stream.getvalue() return byte_array def open_image(image_path): if isinstance(image_path, bytes): img = Image.open(io.BytesIO(image_path)) elif isinstance(image_path, tuple): img = Image.open(io.BytesIO(image_path[1])) elif isinstance(image_path, str): img = base64_to_image(get_img_base64(image_path)) else: assert isinstance(image_path, pathlib.PurePath) img = Image.open(image_path) return img def rgb_image_is_pure_white(image_path): image = open_image(image_path) if image.mode != 'RGB': image = image.convert('RGB') # 获取图片的宽度和高度 width, height = image.size # 遍历图片中的所有像素 for y in range(height): for x in range(width): # 获取当前像素的RGB值 r, g, b = image.getpixel((x, y)) # 判断像素是否为白色(RGB值都为255) if not (r == 255 and g == 255 and b == 255): return False return True def grey_image_is_pure_white(image_path): img = open_image(image_path) # 确保图像是灰度图像 if img.mode != 'L': img = img.convert('L') # 获取图像数据 pixels = list(img.getdata()) # 检查所有像素值是否都等于最大灰度值 return all(pixel == 255 for pixel in pixels) def is_pure_white(image_path, mode='L'): if mode == 'L': return grey_image_is_pure_white(image_path) elif mode == 'RGB': return rgb_image_is_pure_white(image_path) else: raise AssertionError(f'{mode} is not supported') class ImageToText: def __init__(self, file, cache=False, ocr=False, callback=None, image_scale=5, auto_delete=True): """ @param file: 字体文件 @param cache: 缓存字体图片到本地磁盘 @param ocr: 图片识别启用Ocr @param image_scale: 图片缩放倍数 @param callback: 图片文本识别处理的回调函数 @param auto_delete: 自动清除字体图片 """ if not isinstance(file, (bytes, str, pathlib.PurePath)): raise TypeError("未知文件类型") if isinstance(file, bytes): self._font = TTFont(io.BytesIO(file)) elif isinstance(file, str): self._font = TTFont(file) else: assert isinstance(file, pathlib.PurePath) self._font = TTFont(file) # 字体图片映射关系 self._font_maps = {} self._image_scale = image_scale # 缓存 self._cache_images = {} self._to_local = cache self._auto_delete = False if cache is True else auto_delete # Ocr self._callback = None self._enable_ocr = ocr if ocr is True: if callback is not None and callable(callback): self._callback = callback else: ddddocr = DdddOcr(beta=False, old=True, show_ad=False) def _classification(files): if isinstance(files, tuple): img = files[1] else: img = files return ddddocr.classification(img) self._callback = _classification def to_xml(self): filename = self._font.reader.file.name font_f = Path(filename).with_suffix('.xml') self._font.saveXML(font_f) @property def font_maps(self): return self._font_maps def parse_font(self): self._font_encode() if self._enable_ocr: self._font_draw() self._font_ocr() def _font_encode(self): for unicode, name in self._font.getBestCmap().items(): code = f'&#{str(hex(unicode))[1:]}' # 0x100c4 => 𐃄 glyph = {'name': name, 'code': hex(unicode), 'zh': ''} self._font_maps[code] = glyph # print(code, glyph) def _font_draw(self): glyph_set = self._font.getGlyphSet() for code, glyph_dict in self._font_maps.items(): # print(code, glyph_dict) glyph = glyph_set[glyph_dict['name']] # 获取字形 pen = FreeTypePen(None) # 创建变换笔(FreeTypePen)实例,绘制字形 glyph.draw(pen) # 绘制字形 # 获取字形的宽度,以及从字体文件的 OS/2 表中获取推荐的上升高度和下降高度,确定图像的高度 width, ascender, descender = ( glyph.width, self._font['OS/2'].usWinAscent, -self._font['OS/2'].usWinDescent, ) height = ascender - descender # 创建图像并转换为数组 single_font_image = pen.array( width=width, height=height, transform=Offset(0, -descender), contain=False, evenOdd=False, ) # 转换为灰度图像数组 single_font_image = np.array(single_font_image) * 255 # 反转颜色(使得黑色变为白色,白色变为黑色) single_font_image = 255 - single_font_image # 创建 PIL 图像对象 single_font_image = Image.fromarray(single_font_image) # 转换为灰度模式 single_font_image = single_font_image.convert("L") # 图片添加边框 single_font_image = ImageOps.expand(single_font_image, border=6, fill=255) # 计算新的宽度和高度 new_width = single_font_image.width // self._image_scale new_height = single_font_image.height // self._image_scale # 调整图片大小 single_font_image = single_font_image.resize( (new_width, new_height), resample=Image.Resampling.LANCZOS ) image_name = f'{glyph_dict["code"]}.jpg' if not self._to_local: image_bytes = image_to_bytes(single_font_image) self._cache_images[code] = (image_name, image_bytes, 'jpg') else: single_font_image.save(_image_dir.joinpath(image_name)) # 保存图像,灰度图 def _extract_text(self, files): text = '' if not is_pure_white(files, mode='L'): text = self._callback(files) return text def _font_ocr(self): for code, glyph_dict in dict(self._font_maps).items(): if not self._to_local: files = self._cache_images[code] text = self._extract_text(files) else: files = _image_dir.joinpath(f'{glyph_dict["code"]}.jpg') text = self._extract_text(files) self._font_maps[code]['zh'] = text def __contains__(self, key): return key in self._font_maps def __getitem__(self, key): if key in self._font_maps: return self._font_maps[key] else: raise KeyError(key) def get(self, key, default=None): try: return self.__getitem__(key) except KeyError: return default def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.__del__() return def _del(self, missing_ok=False): if self._auto_delete: for img_f in _image_dir.iterdir(): img_f.unlink(missing_ok=True) for font_f in _font_dir.iterdir(): font_f.unlink(missing_ok=True) try: # _image_dir.rmdir() # _font_dir.rmdir() _cache_dir.rmdir() except OSError as e: if not missing_ok: raise e def __del__(self): self._del(missing_ok=True) FontTranslator = ImageToText def parse_font(font_file, *, ocr=False, ocr_extract=None, **kwargs): ocr = True if ocr_extract is not None and callable(ocr_extract) else ocr translator = ImageToText(font_file, ocr=ocr, callback=ocr_extract, **kwargs) translator.parse_font() return translator