picture_extract.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. # coding:utf-8
  2. from PIL import Image
  3. import os
  4. def extract_image(xObject, base_dir, image_name):
  5. try:
  6. if xObject.attrs["Subtype"].name == 'Image':
  7. size = (xObject.attrs["Width"], xObject.attrs["Height"])
  8. data = xObject.get_data()
  9. if xObject.attrs["ColorSpace"].name == 'DeviceRGB':
  10. mode = "RGB"
  11. else:
  12. mode = "P"
  13. # 保存图片的文件名前缀
  14. img_pre = os.path.join(base_dir, image_name)
  15. if 'Filter' in xObject.attrs:
  16. if xObject.attrs["Filter"].name == 'FlateDecode':
  17. img = Image.frombytes(mode, size, data)
  18. img.save(img_pre + ".png")
  19. return img_pre + ".png"
  20. elif xObject.attrs["Filter"].name == 'DCTDecode':
  21. img = open(img_pre + ".jpg", "wb")
  22. img.write(data)
  23. img.close()
  24. return img_pre + ".jpg"
  25. elif xObject.attrs["Filter"].name == 'JPXDecode':
  26. img = open(img_pre + ".jp2", "wb")
  27. img.write(data)
  28. img.close()
  29. return img_pre + ".jp2"
  30. elif xObject.attrs["Filter"].name == 'CCITTFaxDecode':
  31. img = open(img_pre + ".tiff", "wb")
  32. img.write(data)
  33. img.close()
  34. return img_pre + ".tiff"
  35. else:
  36. img = Image.frombytes(mode, size, data)
  37. img.save(img_pre + ".png")
  38. return img_pre + ".png"
  39. except Exception as e:
  40. # print(f"图片提取失败-->{e}")
  41. return None
  42. class Picture(object):
  43. def __init__(self, min_y, max_y, height, width, image_path):
  44. self.min_y = min_y
  45. self.max_y = max_y
  46. self.width = width
  47. self.height = height
  48. self.image_path = image_path
  49. self.content = ""