浏览代码

first commit

lijunliang 1 年之前
当前提交
b68c46ebc1
共有 100 个文件被更改,包括 10461 次插入0 次删除
  1. 137 0
      app.py
  2. 31 0
      docs/config.py
  3. 31 0
      extract.sh
  4. 88 0
      extractFileServer.py
  5. 47 0
      file_processing/__init__.py
  6. 二进制
      file_processing/__pycache__/__init__.cpython-37.pyc
  7. 二进制
      file_processing/__pycache__/file_compress.cpython-37.pyc
  8. 二进制
      file_processing/__pycache__/file_doc.cpython-37.pyc
  9. 二进制
      file_processing/__pycache__/file_docx.cpython-37.pyc
  10. 二进制
      file_processing/__pycache__/file_pdf.cpython-37.pyc
  11. 二进制
      file_processing/__pycache__/file_picture.cpython-37.pyc
  12. 二进制
      file_processing/__pycache__/file_swf.cpython-37.pyc
  13. 二进制
      file_processing/__pycache__/file_xlsx.cpython-37.pyc
  14. 24 0
      file_processing/extract.sh
  15. 134 0
      file_processing/file_compress.py
  16. 23 0
      file_processing/file_doc.py
  17. 108 0
      file_processing/file_docx.py
  18. 221 0
      file_processing/file_pdf.py
  19. 102 0
      file_processing/file_picture.py
  20. 65 0
      file_processing/file_swf.py
  21. 12 0
      file_processing/file_txt.py
  22. 44 0
      file_processing/file_xlsx.py
  23. 二进制
      file_processing/models/__pycache__/__init__.cpython-37.pyc
  24. 二进制
      file_processing/models/__pycache__/picture_extract.cpython-37.pyc
  25. 二进制
      file_processing/models/__pycache__/table_extract.cpython-37.pyc
  26. 54 0
      file_processing/models/picture_extract.py
  27. 43 0
      file_processing/models/table_extract.py
  28. 73 0
      file_processing/tr_ocr.py
  29. 23 0
      need_package/PyPDF2-master/.coveragerc
  30. 36 0
      need_package/PyPDF2-master/.github/workflows/unit-tests.yaml
  31. 11 0
      need_package/PyPDF2-master/.gitignore
  32. 39 0
      need_package/PyPDF2-master/.pre-commit-config.yaml
  33. 19 0
      need_package/PyPDF2-master/.travis.yml
  34. 631 0
      need_package/PyPDF2-master/CHANGELOG
  35. 29 0
      need_package/PyPDF2-master/LICENSE
  36. 2 0
      need_package/PyPDF2-master/MANIFEST.in
  37. 二进制
      need_package/PyPDF2-master/PDF_Samples/AutoCad_Diagram.pdf
  38. 103 0
      need_package/PyPDF2-master/PDF_Samples/AutoCad_Simple.pdf
  39. 二进制
      need_package/PyPDF2-master/PDF_Samples/GeoBase_NHNC1_Data_Model_UML_EN.pdf
  40. 26 0
      need_package/PyPDF2-master/PDF_Samples/README.txt
  41. 二进制
      need_package/PyPDF2-master/PDF_Samples/SF424_page2.pdf
  42. 二进制
      need_package/PyPDF2-master/PDF_Samples/Seige_of_Vicksburg_Sample_OCR.pdf
  43. 二进制
      need_package/PyPDF2-master/PDF_Samples/jpeg.pdf
  44. 5 0
      need_package/PyPDF2-master/PyPDF2/__init__.py
  45. 1 0
      need_package/PyPDF2-master/PyPDF2/_version.py
  46. 424 0
      need_package/PyPDF2-master/PyPDF2/filters.py
  47. 1227 0
      need_package/PyPDF2-master/PyPDF2/generic.py
  48. 553 0
      need_package/PyPDF2-master/PyPDF2/merger.py
  49. 152 0
      need_package/PyPDF2-master/PyPDF2/pagerange.py
  50. 3070 0
      need_package/PyPDF2-master/PyPDF2/pdf.py
  51. 311 0
      need_package/PyPDF2-master/PyPDF2/utils.py
  52. 358 0
      need_package/PyPDF2-master/PyPDF2/xmp.py
  53. 44 0
      need_package/PyPDF2-master/README.md
  54. 二进制
      need_package/PyPDF2-master/Resources/attachment.pdf
  55. 二进制
      need_package/PyPDF2-master/Resources/commented-xmp.pdf
  56. 二进制
      need_package/PyPDF2-master/Resources/commented.pdf
  57. 二进制
      need_package/PyPDF2-master/Resources/crazyones.pdf
  58. 1 0
      need_package/PyPDF2-master/Resources/crazyones.txt
  59. 二进制
      need_package/PyPDF2-master/Resources/git.pdf
  60. 二进制
      need_package/PyPDF2-master/Resources/jpeg.pdf
  61. 0 0
      need_package/PyPDF2-master/Resources/jpeg.txt
  62. 二进制
      need_package/PyPDF2-master/Resources/libreoffice-writer-password.pdf
  63. 二进制
      need_package/PyPDF2-master/Resources/pdflatex-outline.pdf
  64. 14 0
      need_package/PyPDF2-master/Sample_Code/README.txt
  65. 0 0
      need_package/PyPDF2-master/Sample_Code/__init__.py
  66. 51 0
      need_package/PyPDF2-master/Sample_Code/basic_features.py
  67. 20 0
      need_package/PyPDF2-master/Sample_Code/basic_merging.py
  68. 38 0
      need_package/PyPDF2-master/Sample_Code/makesimple.py
  69. 19 0
      need_package/PyPDF2-master/Sample_Code/makesimple.sh
  70. 54 0
      need_package/PyPDF2-master/Scripts/2-up.py
  71. 0 0
      need_package/PyPDF2-master/Scripts/__init__.py
  72. 57 0
      need_package/PyPDF2-master/Scripts/pdf-image-extractor.py
  73. 80 0
      need_package/PyPDF2-master/Scripts/pdfcat
  74. 0 0
      need_package/PyPDF2-master/Tests/__init__.py
  75. 53 0
      need_package/PyPDF2-master/Tests/test_basic_features.py
  76. 42 0
      need_package/PyPDF2-master/Tests/test_merger.py
  77. 129 0
      need_package/PyPDF2-master/Tests/test_reader.py
  78. 9 0
      need_package/PyPDF2-master/Tests/test_utils.py
  79. 108 0
      need_package/PyPDF2-master/Tests/test_workflows.py
  80. 22 0
      need_package/PyPDF2-master/Tests/test_xmp.py
  81. 93 0
      need_package/PyPDF2-master/Tests/tests.py
  82. 0 0
      need_package/PyPDF2-master/__init__.py
  83. 5 0
      need_package/PyPDF2-master/requirements/ci.in
  84. 46 0
      need_package/PyPDF2-master/requirements/ci.txt
  85. 57 0
      need_package/PyPDF2-master/setup.py
  86. 6 0
      need_package/PyPDF2-master/tox.ini
  87. 7 0
      need_package/readme.md
  88. 11 0
      need_package/servicerd/PKG-INFO
  89. 0 0
      need_package/servicerd/README.txt
  90. 0 0
      need_package/servicerd/build/lib/servicerd/__init__.py
  91. 96 0
      need_package/servicerd/build/lib/servicerd/async_queue.py
  92. 135 0
      need_package/servicerd/build/lib/servicerd/client.py
  93. 0 0
      need_package/servicerd/build/lib/servicerd/proto/__init__.py
  94. 110 0
      need_package/servicerd/build/lib/servicerd/proto/ggclassefity_pb2.py
  95. 4 0
      need_package/servicerd/build/lib/servicerd/proto/ggclassefity_pb2_grpc.py
  96. 150 0
      need_package/servicerd/build/lib/servicerd/proto/heartbeat_pb2.py
  97. 69 0
      need_package/servicerd/build/lib/servicerd/proto/heartbeat_pb2_grpc.py
  98. 235 0
      need_package/servicerd/build/lib/servicerd/proto/queue_pb2.py
  99. 104 0
      need_package/servicerd/build/lib/servicerd/proto/queue_pb2_grpc.py
  100. 335 0
      need_package/servicerd/build/lib/servicerd/proto/service_pb2.py

+ 137 - 0
app.py

@@ -0,0 +1,137 @@
+# coding:utf-8
+from util.file_operations import save_file
+from util.file_operations import generate_directory
+from util.file_operations import del_directory
+from util.attach_tools import download
+import file_processing
+import os
+import uuid
+from util.check_timeout import limit_decor
+from loguru import logger
+from proto import fileText_pb2
+from util.fs_client import FileServeClient
+from docs.config import error_number
+from file_processing import model_type
+from docs.config import oss_txt_config
+from docs.config import duplicatesRedisConfig
+from util.hash_file import cal_md5
+from util.redis_helper import RedisString
+
+FS = FileServeClient(oss_txt_config)
+
+Redis = RedisString(duplicatesRedisConfig)
+
+
+class FileApps(object):
+    def __init__(self, request):
+        self._file_name = request.get("file_name", "")
+        self._file_url = request.get("file_url", "")
+        self._file_bytes = request.get("file_bytes", "")
+        self._file_type = request.get("file_type", "")
+        self._return_type = request.get("return_type", 0)
+        self._extract_type = request.get("extract_type", 0)
+        self._base_dir = ""
+
+    def start(self):
+        file, self._base_dir = self.__save()
+        if (not file) or (not os.path.exists(file)):
+            return [self.create_response(file_path=self._file_name, content_result="", state=error_number["下载错误"])]
+        response_object = self.__convert_start([file], [])
+        # # 清理文件夹
+        del_directory(self._base_dir)
+        return response_object
+
+    def __convert_start(self, files_path, result):
+        response_object, un_files = self.compress_method(files_path)
+        result.extend(response_object)
+        if un_files:
+            return self.__convert_start(un_files, result)
+        return result
+
+    # 主调度
+    @limit_decor(700, 3)
+    def compress_method(self, files_path):
+        un_files = []  # 带解压文件
+        response_object = []
+        for file_path in files_path:
+            try:
+                suffix = file_path.split(".")[-1].lower()
+                file_md5 = cal_md5(file_path)
+                if suffix not in model_type:
+                    response_object.append(
+                        self.create_response(file_path=file_path, content_result="", state=error_number["类型不支持"]))
+                    continue
+                if self._return_type == 0 or self._return_type == 2:
+                    exists_url = Redis.string_get(file_md5)
+                    if exists_url:
+                        file_name = os.path.basename(file_path)
+                        file_path = file_path.replace(self._base_dir, "")  # 清除文件夹显示
+                        text_content = FS.download_text_content(exists_url) if self._return_type == 2 else ""
+                        response_object.append(fileText_pb2.Result(fileName=file_name, textContent=text_content,
+                                                                   textUrl=exists_url, filePath=file_path,
+                                                                   errorState=error_number["成功"]))
+                        continue
+                content_result, state = file_processing.convert_start(file_path, suffix)
+                if isinstance(content_result, (list, tuple)):
+                    un_files.extend(content_result)
+                else:
+                    response_object.append(
+                        self.create_response(file_path=file_path, content_result=content_result, state=state,
+                                             file_md5=file_md5))
+            except Exception as e:
+                response_object.append(
+                    self.create_response(file_path=file_path, content_result="", state=error_number["解析错误"],
+                                         file_md5=""))
+                logger.warning(f"附件解析失败{file_path}-{e}")
+        return response_object, un_files
+
+    def __save(self):
+        """
+        文件储存
+        :return:
+        """
+        file_dir = ""
+        try:
+            base_dir = os.path.dirname("./docs/")
+            fs_name = str(uuid.uuid1())
+            file_dir = os.path.join(base_dir, fs_name)
+            generate_directory(file_dir)
+            file_path = os.path.join(file_dir, self._file_name)
+            print(f"save_type:{self._extract_type},file_name:{file_path},file_url:{self._file_url}")
+            if self._extract_type == 0:
+                status, _ = download(self._file_url, file_path, 1)
+            elif self._extract_type == 1:
+                status, _ = save_file(file=self._file_bytes, filename=file_path)
+            elif self._extract_type == 2:
+                status, _ = download(self._file_url, file_path, 2)
+            else:
+                status = "500"
+            if status == "500":
+                return "", ""
+        except Exception as e:
+            print("save false-->", e)
+            del_directory(file_dir)
+            return "", ""
+        return file_path, file_dir
+
+    def create_response(self, file_path: str, content_result: str, state: str, file_md5: str):
+        file_name = os.path.basename(file_path)  # 获取文件名称
+        file_path = file_path.replace(self._base_dir, "")  # 清除文件夹显示
+        url = ""
+        oss_url = str(uuid.uuid1())
+        if self._return_type == 0 and content_result:
+            status, request_id = FS.upload_text_file(oss_url, content_result)
+            if status == 200:
+                url = oss_url
+                content_result = ""
+        elif self._return_type == 2 and content_result:
+            status, request_id = FS.upload_text_file(oss_url, content_result)
+            if status == 200:
+                url = oss_url
+        if url:
+            insert_redis = Redis.string_set(file_md5, url)
+            if not insert_redis:
+                logger.warning(f"文件去重失败,插入失败--->{url}")
+        response_object = fileText_pb2.Result(fileName=file_name, textContent=content_result, textUrl=url,
+                                              filePath=file_path, errorState=state)
+        return response_object

+ 31 - 0
docs/config.py

@@ -0,0 +1,31 @@
+# coding:utf-8
+
+picture_config = {
+    "service_name": 'ocr_service',
+    "rd_server": "192.168.3.12:10021",
+    "balance_type": 2
+}
+
+oss_file_config = {
+    "access_key_id": "LTAI4G5x9aoZx8dDamQ7vfZi",
+    "access_key_secret": "Bk98FsbPYXcJe72n1bG3Ssf73acuNh",
+    "endpoint": "oss-cn-beijing.aliyuncs.com",
+    "bucket_name": "jy-datafile",
+}
+
+oss_txt_config = {
+    "access_key_id": "LTAI4G5x9aoZx8dDamQ7vfZi",
+    "access_key_secret": "Bk98FsbPYXcJe72n1bG3Ssf73acuNh",
+    "endpoint": "oss-cn-beijing.aliyuncs.com",
+    "bucket_name": "topjy",
+}
+error_number = {
+    "成功": "200",
+    "下载错误": "300",
+    "解析错误": "400",
+    "类型不支持": "500",
+}
+
+getOcrUrl = "http://192.168.3.13:8888/ocr"
+
+duplicatesRedisConfig = {"host": '192.168.3.239', "port": 6379}

+ 31 - 0
extract.sh

@@ -0,0 +1,31 @@
+#!/bin/bash
+fonts=""
+a="$(swfextract $1 |grep 'Font'|awk -F')' '{print $NF}')"
+if [ -n "$fonts" ]
+then
+   echo "$(swfstrings $1)"
+else
+   jpegs=""
+   jpegs="$(swfextract $1 |grep 'JPEG'|awk -F')' '{print $NF}')"
+   echo "$jpegs"
+   OLD_IFS="$IFS"
+   IFS=","
+   arr=($jpegs)
+   IFS="$OLD_IFS"
+   for s in ${arr[@]}
+   do
+     swfextract $1 -j $s -o $2/$s.jpg
+   done
+   pngs=""
+   pngs="$(swfextract $1 |grep 'PNG'|awk -F')' '{print $NF}')"
+   echo "$pngs"
+   OLD_IFS="$IFS"
+   IFS=","
+   arr=($pngs)
+   IFS="$OLD_IFS"
+   for s in ${arr[@]}
+   do
+     swfextract $1 -p $s -o $2/$s.png
+   done
+   echo "ok"
+fi

+ 88 - 0
extractFileServer.py

@@ -0,0 +1,88 @@
+# coding:utf-8
+
+from proto import fileText_pb2
+from servicerd.service import ServiceManage
+from proto import fileText_pb2_grpc
+from concurrent import futures
+from loguru import logger
+from app import FileApps
+import grpc
+import argparse
+import time
+import json
+import file_processing
+
+logger.add('./logs/runtime_{time}.log', rotation='00:00')
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-host', '--serve_host', type=str, default="192.168.3.13", help='提供服务的主机地址')
+# parser.add_argument('-host', '--serve_host', type=str, default="192.168.21.183", help='提供服务的主机地址')
+parser.add_argument('-port', '--serve_port', type=int, default=11103, help='提供>服务的主机端口')
+parser.add_argument('-chost', '--consul_serve_host', type=str, default='192.168.3.12', help='consul主机地址')
+parser.add_argument('-cport', '--consul_serve_port', type=int, default=10021, help='consul主机端口')
+
+args = parser.parse_args()
+
+_ONE_DAY_IN_SECONDS = 60 * 60 * 24
+_SERVICE_NAME = 'file_service'
+_HOST = args.serve_host
+_PORT = args.serve_port
+RD_SERVER = "{}:{}".format(args.consul_serve_host, args.consul_serve_port)
+WORKERS = 1
+
+
+class FileExtractServices(fileText_pb2_grpc.FileExtractServicer):
+
+    def FileExtract(self, request, context):
+        # TODO 调业务
+        try:
+            response_result = []
+            try:
+                other = request.other
+                other_json = json.loads(other)
+            except:
+                other_json = {}
+            file_processing.file_picture.OcrType = "tr" if other_json.get("ocr_type", "") == "tr" else "common"
+            for file in request.message:
+                request_attr = {"file_name": file.fileName,
+                                "file_url": file.fileUrl,
+                                "file_bytes": file.fileBytes,
+                                "file_type": file.fileType,
+                                "return_type": file.returnType,
+                                "extract_type": file.extractType,
+                                }
+                file_factory = FileApps(request_attr)
+                parse_result = file_factory.start()
+                response_result.extend(parse_result)
+            return fileText_pb2.FileResponse(result=response_result, other=request.other)
+        except Exception as e:
+            print(e)
+            return fileText_pb2.FileResponse(result=[])
+
+
+def start_server(destory_fn: any):
+    """
+    启动服务
+    :param destory_fn:
+    :return:
+    """
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=WORKERS))
+    fileText_pb2_grpc.add_FileExtractServicer_to_server(FileExtractServices(), server)
+    server.add_insecure_port('[::]:%d' % (_PORT))
+    server.start()
+    try:
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        destory_fn()
+        server.stop(0)
+
+
+if __name__ == '__main__':
+    sm = ServiceManage(rd_server=RD_SERVER,
+                       name=_SERVICE_NAME,
+                       local_ip=_HOST,
+                       local_port=_PORT,
+                       workers=WORKERS,
+                       balance_type=2)
+    start_server(sm.destory)

+ 47 - 0
file_processing/__init__.py

@@ -0,0 +1,47 @@
+# coding:utf-8
+from file_processing.file_docx import read_docx
+from file_processing.file_doc import read_doc
+from file_processing.file_xlsx import read_xlsx
+from file_processing.file_swf import read_swf
+from file_processing.file_compress import CompressFiles
+from file_processing.file_pdf import PdfRead
+from file_processing.file_picture import execute_ocr
+from loguru import logger
+from docs.config import error_number
+from file_processing.file_txt import read_txt
+from file_processing.file_picture import ocr, ocr_patch
+
+compress_file = CompressFiles()
+pdf_read = PdfRead()
+
+model_type = {
+    "pdf": pdf_read.read_pdf,
+    "doc": read_doc,
+    "docx": read_docx,
+    "xlsx": read_xlsx,
+    "xls": read_xlsx,
+    "txt": read_txt,
+    "swf": read_swf,
+    "rar": compress_file.extract_file,
+    "zip": compress_file.extract_file,
+    "jpg": ocr,
+    "png": ocr,
+    "jpeg": ocr
+}
+
+__all__ = [compress_file, model_type]
+
+
+def convert_start(file_path, file_type) -> any:
+    """
+    文件转换开始
+    :param file_path:
+    :param file_type:
+    :return:
+    """
+    try:
+        text, state = model_type[file_type](file_path)
+    except Exception as e:
+        logger.warning(f"文件解析失败--》{e}")
+        return "", error_number["解析错误"]
+    return text, state

二进制
file_processing/__pycache__/__init__.cpython-37.pyc


二进制
file_processing/__pycache__/file_compress.cpython-37.pyc


二进制
file_processing/__pycache__/file_doc.cpython-37.pyc


二进制
file_processing/__pycache__/file_docx.cpython-37.pyc


二进制
file_processing/__pycache__/file_pdf.cpython-37.pyc


二进制
file_processing/__pycache__/file_picture.cpython-37.pyc


二进制
file_processing/__pycache__/file_swf.cpython-37.pyc


二进制
file_processing/__pycache__/file_xlsx.cpython-37.pyc


+ 24 - 0
file_processing/extract.sh

@@ -0,0 +1,24 @@
+#!/bin/bash
+jpegs=""
+jpegs="$(swfextract $1 |grep 'JPEG'|awk -F')' '{print $NF}')"
+echo "$jpegs"
+#要将$a分割开,可以这样:
+OLD_IFS="$IFS"
+IFS=","
+arr=($jpegs)
+IFS="$OLD_IFS"
+for s in ${arr[@]}
+do
+  swfextract $1 -j $s -o $2/$s.jpg 
+done
+pngs=""
+pngs="$(swfextract $1 |grep 'PNG'|awk -F')' '{print $NF}')"
+echo "$pngs"
+OLD_IFS="$IFS"
+IFS=","
+arr=($pngs)
+for s in ${arr[@]}
+do
+  swfextract $1 -p $s -o $2/$s.png
+done
+echo "ok"

+ 134 - 0
file_processing/file_compress.py

@@ -0,0 +1,134 @@
+"""
+coding=uft-8
+gz/tar/tgz/zip/rar
+常用五种类型压缩文件的解压
+"""
+
+from typing import List
+import zipfile
+from rarfile import RarFile
+from loguru import logger
+import os
+import re
+from docs.config import error_number
+
+pattern = re.compile("^inflating:(.*)")
+
+ALLOW_EXT = ['pdf', 'doc', 'docx', 'png', 'jpg', 'jpeg', 'xls', 'xlsx', 'swf']
+
+
+class CompressFiles(object):
+    @staticmethod
+    def other_un_zip_file(file_path: str, save_dir: str) -> List:
+        file_list = []
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir, 0o777)
+
+        # 解压开始
+        out_put = os.popen("unzip -o -d %s -O GBK  %s" % (save_dir, file_path))
+        out_put = out_put.read()
+        split_out = out_put.split("\n")
+
+        # 获取文件路径
+        for row in split_out:
+            files = pattern.search(row.strip())
+            if files:
+                files_re = files.groups()
+                for file in files_re:
+                    filepath = file.strip()
+                    if os.path.exists(filepath):
+                        file_list.append(filepath)
+        return file_list
+
+    def un_zip_file(self, file: str, save_dir: str) -> List:
+        print("解压开始--》")
+        ret = []
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir, 0o777)
+        try:
+            file_list = self.other_un_zip_file(file, save_dir)
+            print(f"解压结束--->{file_list}")
+            if file_list:
+                return file_list
+        except Exception as e:
+            logger.warning(f"{e}")
+        print("二次-->", )
+        zf = zipfile.ZipFile(file, 'r', compression=zipfile.ZIP_DEFLATED)
+        for index, info in enumerate(zf.infolist()):
+            if info.file_size == 0:
+                continue
+            fn = info.filename
+            # 排除掉无效的扩展名文件
+            ext = os.path.splitext(fn)[-1][1:].lower().strip()
+            if ext not in ALLOW_EXT:
+                continue
+            # 隐含文件
+            if os.path.split(fn)[-1].startswith('.'):
+                continue
+            try:
+                fn = fn.encode('cp437').decode('gbk')
+            except:
+                try:
+                    fn = fn.encode('cp437').decode('utf-8')
+                except:
+                    fn = fn.encode('utf-8').decode('utf-8')
+            save_path = os.path.join(save_dir, fn)
+            with open(save_path, 'wb') as fw:
+                fw.write(zf.read(info.filename))
+            ret.append(save_path)
+        return ret
+
+    @staticmethod
+    def un_rar_file(file: str, save_dir: str) -> List:
+        ret = []
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir, 0o777)
+        zf = RarFile(file)
+        for index, info in enumerate(zf.infolist()):
+            if info.file_size == 0:
+                continue
+            fn = info.filename
+            # 排除掉无效的扩展名文件
+            ext = os.path.splitext(fn)[-1][1:].lower().strip()
+            if not ext in ALLOW_EXT:
+                continue
+            # 隐含文件
+            if os.path.split(fn)[-1].startswith('.'):
+                continue
+            try:
+                fn = fn.encode('cp437').decode('gbk')
+            except:
+                try:
+                    fn = fn.encode('cp437').decode('utf-8')
+                except:
+                    fn = fn.encode('utf-8').decode('utf-8')
+            save_path = os.path.join(save_dir, fn)
+            with open(save_path, 'wb') as fw:
+                fw.write(zf.read(info.filename))
+            ret.append(save_path)
+        return ret
+
+    def extract_file(self, file_path):
+        """
+        解压单个文件
+        :param filename:
+        :param file_path:
+        :return:
+        """
+        return_list = []
+        try:
+            base_dir, file_name = os.path.split(file_path)
+            file_name, file_suffix = os.path.splitext(file_name)
+            save_dir = os.path.join(base_dir, file_name)
+            file_suffix = file_suffix[1:]
+            if file_suffix not in ['zip', 'rar']:
+                return [], error_number["类型不支持"]
+            else:
+                if file_suffix == 'zip':
+                    return_list = self.un_zip_file(file_path, save_dir)
+                elif file_suffix == 'rar':
+                    return_list = self.un_rar_file(file_path, save_dir)
+        except Exception as e:
+            print(e)
+            return [], error_number["解析错误"]
+        return return_list, error_number["成功"]

+ 23 - 0
file_processing/file_doc.py

@@ -0,0 +1,23 @@
+"""
+.doc解析
+"""
+import subprocess, os
+from loguru import logger
+from file_processing.file_docx import read_docx
+from docs.config import error_number
+
+
+def read_doc(file_path):
+    out_dir = os.path.dirname(file_path)
+    try:
+        args = 'soffice --headless --convert-to docx %s --outdir %s' % (file_path, out_dir)
+        output = subprocess.check_output(args, shell=True)
+        logger.debug(str(output))
+    except subprocess.CalledProcessError as e:
+        logger.error('doc文件转换出错')
+        logger.error(e)
+        return '', error_number["解析错误"]
+    file_name = os.path.basename(file_path).split('.')[0] + '.docx'
+    logger.debug('doc文件转docx后文件路径>>>' + os.path.join(out_dir, file_name))
+    docx_path = os.path.join(out_dir, file_name)
+    return read_docx(docx_path)

+ 108 - 0
file_processing/file_docx.py

@@ -0,0 +1,108 @@
+"""
+.docx文件解析
+
+"""
+
+import os
+import docx
+from file_processing.file_picture import ocr_patch
+from docx.document import Document
+from docx.oxml.table import CT_Tbl
+from docx.oxml.text.paragraph import CT_P
+from docx.table import _Cell, Table
+from docx.text.paragraph import Paragraph
+from loguru import logger
+from docs.config import error_number
+
+
+def iter_block_items(parent):
+    if isinstance(parent, Document):
+        parent_elm = parent.element.body
+    elif isinstance(parent, _Cell):
+        parent_elm = parent._tc
+    else:
+        raise ValueError("something's not right")
+
+    for child in parent_elm.iterchildren():
+        if isinstance(child, CT_P):
+            yield Paragraph(child, parent)
+        elif isinstance(child, CT_Tbl):
+            yield Table(child, parent)
+        try:
+            if str(child.xml).find('<w:drawing>') > 0:
+                yield '图像'
+        except Exception as e:
+            yield ''
+
+
+def read_table(table):
+    return [[cell.text for cell in row.cells] for row in table.rows]
+
+
+def save_picture(word_path, doc):
+    """
+    .docx文件中的图片和该文件在同目录,返回图片路径列表(有序)
+    :param word_path:
+    :param doc:
+    :return:
+    """
+
+    img_path = []
+    for i, shape in enumerate(doc.inline_shapes):
+        content_id = shape._inline.graphic.graphicData.pic.blipFill.blip.embed
+        content_type = doc.part.related_parts[content_id].content_type
+        if not content_type.startswith('image'):
+            continue
+        img_name = os.path.join(os.path.dirname(word_path), './%d.png' % (i))
+        img_data = doc.part.related_parts[content_id]._blob
+        with open(img_name, 'wb') as fp:
+            fp.write(img_data)
+            img_path.append(os.path.abspath(img_name))
+    return img_path
+
+
+def table_add_html(block):
+    text = '<table>'
+    tables = read_table(block)
+    for index_row, row in enumerate(tables):  # 行tr/列td
+        text = text + '<tr>'
+        for index_col, line in enumerate(row):
+            text = text + '<td>' + line + '</td>'
+        text = text + '</tr>'
+    text = text + '</table>'
+    return text
+
+
+def read_docx(word_path):
+    try:
+        doc = docx.Document(word_path)
+        segments = []
+        imagepos = 0
+        for block in iter_block_items(doc):
+            if isinstance(block, Paragraph):
+                if block.text == '' or block.text == '\n': continue
+                segments.append((str(block.text), 0))
+
+            elif isinstance(block, Table):
+                string = table_add_html(block)
+                segments.append((string, 0))
+            else:
+                segments.append(('图像%d' % imagepos, imagepos))
+                imagepos += 1
+
+        # 调用保存图像的函数,返回图片的路径
+        picture_path = save_picture(word_path, doc=doc)
+        logger.debug('picture_path:::' + str(picture_path))
+        # 返回图片路径,调用OCR模块,返回组织好的字符串列表,依次取列表元素和图像0,图像1,图像2..对应替换
+        image_list = []
+        for i, pic_path in enumerate(picture_path):
+            image_list.append(('图像%d' % i, pic_path))
+        ret_dict = ocr_patch(image_list)
+        for i, val in enumerate(segments):
+            if val[0] in ret_dict:
+                segments[i] = (ret_dict[val[0]], val[1])
+        segments = [s for s, i in segments]
+        return '\n'.join(segments), error_number["成功"]
+    except Exception as e:
+        logger.error(e)
+        return '', error_number["解析错误"]

+ 221 - 0
file_processing/file_pdf.py

@@ -0,0 +1,221 @@
+# coding:utf-8
+import pdfplumber
+from file_processing.models.picture_extract import extract_image
+from file_processing.models.table_extract import TableStruct
+from loguru import logger
+from docs.config import error_number
+from file_processing.file_picture import ocr
+from file_processing.file_picture import ocr_patch
+from file_processing.models.picture_extract import Picture
+import os
+from pdf2image import convert_from_path
+
+
+def get_table_block(page):
+    """
+    表格查询
+    :param page:page对象
+    :return:
+    """
+    parse_tables = []
+    tables = page.find_tables(table_settings={})
+    for table in tables:
+        # 创建表格结构对象
+        table_bbox = TableStruct()
+        table_bbox.parse(table)
+
+        parse_tables.append(table_bbox)
+    parse_tables = sorted(parse_tables, key=lambda x: x.min_x)
+    return parse_tables
+
+
+def split_block(tables, words):
+    """
+    通过坐标对比生成表格块
+    :param tables:表格列表
+    :param words:word列表
+    :return:
+    """
+    replace_blocks = []
+    for table in tables:
+        blocks = []
+        for index, word in enumerate(words):
+            min_y = word['top']
+            max_y = word['bottom']
+
+            # 坐标对比
+            if table.min_y < min_y and table.max_y > (max_y + min_y) / 2:
+                blocks.append(index)
+        blocks.sort()  # 排序
+        # 添加块
+        replace_blocks.append((blocks[0], blocks[-1], table.contents))
+    return replace_blocks
+
+
+def section(words: list):
+    """
+    pdf文章划分段落
+    :param words:
+    :return:
+    """
+    l_y0, l_y1 = 0, 0  # 初始化y轴坐标值
+    result = []
+    for word in words:
+        y0, y1 = word['top'], word['bottom']
+
+        # 同一段落检测
+        if y1 - l_y1 < (l_y1 - l_y0) / 2:
+            result[-1] += word.get("text", "")
+            l_y0, l_y1 = y0, y1
+            continue
+
+        # 更新y轴坐标值
+        l_y0, l_y1 = y0, y1
+        result.append(word.get("text", ""))
+    return "\n".join(result)
+
+
+class PdfRead(object):
+    def __init__(self):
+        self._images = {}
+        self._base_dir = ""
+
+    def read_pdf(self, file_path: str) -> tuple:
+        pdf = pdfplumber.open(file_path)
+        screenshots_pages = []
+        self._base_dir = os.path.dirname(file_path)
+        results = []
+        for page_number, page in enumerate(pdf.pages):
+            # 获取当前页面的全部文本信息,包括表格中的文字(x,y,x,y)
+            tables = get_table_block(page)
+            words = page.extract_words()
+            contents = self.__merge_table(tables, words)
+            images = self.__extract_images(page)
+            state = self.__check_images(images)
+            if state and images and contents:
+                contents = self.__merge_picture(contents, images)
+            elif not contents:
+                results.append(f"{page_number}->image")
+                screenshots_pages.append(page_number)
+                continue
+            paragraph = section(contents)
+            results.append(paragraph)
+        pdf.close()
+        if screenshots_pages:
+            screenshots_result = self.__to_screenshots(file_path, screenshots_pages)
+            ocr_result = ocr_patch(screenshots_result)
+            for key, val in ocr_result.items():
+                if key < len(results):
+                    results[key] = val
+        results = [result for result in results if result]
+        return "\n".join(results), error_number["成功"]
+
+    def __extract_images(self, page):
+        """
+        抽取图片
+        :param page:
+        :return:
+        """
+        images_result = []
+        images = page.images  # 页面图片
+        page_width = page.width  # 页面宽度
+        page_number = page.page_number  # 页码
+        for image_number, image in enumerate(images):
+            image_stream = image.get("stream", "")
+            if not image_stream:
+                continue
+            image_name = f"{page_number}-{image_number}"
+            image_path = extract_image(image_stream, self._base_dir, image_name)
+            image_width = image.get("width", 0)
+            image_high = image.get('height', 0)
+            if image_path and image_width > page_width / 2:
+                attr = {"min_y": image.get("y0", 0), "max_y": image.get("y1", 0),
+                        "width": image_width, "height": image_high, "image_path": image_path}
+                picture = Picture(**attr)
+                images_result.append(picture)
+        return images_result
+
+    @staticmethod
+    def __check_images(images_result):
+        """
+        检查图片是不是重叠
+        :param images_result:
+        :return:
+        """
+        for base_ind in range(len(images_result)):
+            base_img = images_result[base_ind]
+            for cont_ind in range(base_ind + 1, len(images_result)):
+                cont_img = images_result[cont_ind]
+                if base_img.min_y < base_img.max_y < cont_img.min_y or cont_img.max_y < base_img.min_y < base_img.max_y:
+                    continue
+                else:
+                    return False
+        return True
+
+    @staticmethod
+    def __merge_picture(contents, images):
+        images = sorted(images, key=lambda x: x.min_y)
+        for image in images:
+            img_min_y, img_max_y = image.min_y, image.max_y
+            for ind in range(len(contents) - 1):
+                word, last_word = contents[ind], contents[ind + 1]
+                y0, y1 = word['top'], word['bottom']
+                l_y0, l_y1 = last_word['top'], last_word['bottom']
+                if y1 < img_min_y and img_max_y < l_y1:
+                    contents.insert(ind + 1, {"text": ocr(image.image_path), 'top': image.max_y, "bottom": image.max_y})
+            if not contents:
+                contents.append({"text": ocr(image.image_path), 'top': image.max_y, "bottom": image.min_y})
+                continue
+            end_word = contents[-1]
+            e_y0, e_y1 = end_word['top'], end_word['bottom']
+            if not contents or e_y1 < img_min_y:
+                contents.append({"text": ocr(image.image_path), 'top': image.max_y, "bottom": image.min_y})
+        return contents
+
+    @staticmethod
+    def __merge_table(tables, words):
+        """
+        合并表格
+        :param tables:
+        :param words:
+        :return:
+        """
+        contents = []
+        start = 0
+        replace_blocks = split_block(tables, words)
+        for block in replace_blocks:
+            contents.extend(words[start:block[0]])
+            contents.append({"text": block[2], 'top': block[0], "bottom": block[1], "type": "table"})
+            start = block[1] + 1
+        contents.extend(words[start:])
+        return contents
+
+    def __to_screenshots(self, pdf_path, pages):
+        """
+        筛选需要识别的截图
+        :param pdf_path:
+        :param pages:
+        :return:
+        """
+        image_real = []
+        images = convert_from_path(pdf_path)
+        for ind, image in enumerate(images):
+            if ind not in pages:
+                continue
+            try:
+                image_name = "test{}.png".format(ind)  # 生成图片的名称
+                image_path = os.path.join(self._base_dir, image_name)
+                image.save(image_path, 'PNG')
+                image_real.append((ind, image_path))
+            except Exception as e:
+                print(e)
+                continue
+        # 释放内存
+        image = None
+        return image_real
+
+
+if __name__ == '__main__':
+    pdf_obj = PdfRead()
+    result01 = pdf_obj.read_pdf('./docs/yc.pdf')
+    print(result01)

+ 102 - 0
file_processing/file_picture.py

@@ -0,0 +1,102 @@
+# -*-coding:utf-8 -*-
+"""
+任务发布者
+"""
+import cv2 as cv
+import grpc
+from util.picture_zooming import PictureZooming
+from servicerd.client import RdClient
+from proto import ocr_pb2_grpc
+from proto import ocr_pb2
+from docs.config import picture_config
+from loguru import logger
+from file_processing.tr_ocr import tr_ocr_patch
+from docs.config import error_number
+
+zoom = PictureZooming()
+cd = RdClient(service_name=picture_config["service_name"],
+              rd_server=picture_config["rd_server"],
+              balance_type=picture_config["balance_type"])
+
+
+def run(param, **kwargs) -> (bool, any):
+    address = '{}:{}'.format(kwargs['ip'], kwargs['port'])
+    print(address)
+    with grpc.insecure_channel(address) as channel:
+        stub = ocr_pb2_grpc.OcrStub(channel)
+        response = stub.Ocr(ocr_pb2.OcrRequest(image=param[1]))
+    # print("Greeter client received: " + response.message)
+    return param[0], response.message
+
+
+def execute_ocr(image_list):
+    try:
+        image_text = {}
+        images = []
+
+        # 图片缩放
+        for position, image_path in image_list:
+            img_file = read_image(image_path)
+            if (not img_file) or isinstance(img_file, str):
+                continue
+            images.append((position, img_file))
+
+        if images:
+            # 批量识别
+            ocr_results = cd.parallel_computing(images, 20, run)
+            for values in ocr_results:
+                if len(values) < 2:
+                    continue
+                if values[1]:
+                    image_text[values[0]] = values[1]
+        return image_text
+    except Exception as e:
+        logger.warning(f"ocr is fail->{e}"), error_number["解析错误"]
+    return {}
+
+
+def read_image(filepath):
+    try:
+        load_image = cv.imread(filepath)
+        x, y, z = load_image.shape
+
+        # 图片大小判断
+        if y < 600 or x < 500:
+            return ""
+
+        # 图片缩放
+        load_image = zoom.patch(load_image)
+
+        # 格式转换
+        load_image = cv.imencode(".jpg", load_image)[1].tobytes()
+        return load_image
+    except Exception as e:
+        return {}
+
+
+OcrType = "common"
+
+ocr_select = {"tr": tr_ocr_patch, "common": execute_ocr}
+
+
+def ocr(images_path):
+    global OcrType
+    if not images_path:
+        return {}, error_number["解析错误"]
+    images = [(0, images_path)]
+    result = ocr_select[OcrType](images)
+    return result.get(0, ""), error_number["成功"]
+
+
+def ocr_patch(images):
+    '''
+
+    :param images:
+    :param ocr_type:
+    :return:
+    '''
+    global OcrType
+    if not images:
+        return {}
+    result = ocr_select[OcrType](images)
+    return result

+ 65 - 0
file_processing/file_swf.py

@@ -0,0 +1,65 @@
+"""
+.swf文件的处理
+"""
+
+import subprocess, os
+from file_processing.file_picture import ocr_patch
+from loguru import logger
+from docs.config import error_number
+
+
+def read_swf(file_path: str):
+    swf_picture_paths, status = extract_swf(file_path)
+    image_list = []
+    img_str = ""
+    for position, img_path in swf_picture_paths:
+        if os.path.exists(img_path):
+            image_list.append((position, img_path))
+    print(f"image_list-->{image_list}")
+    if image_list:
+        ocr_text = ocr_patch(image_list)
+        list_data = sorted(ocr_text.items(), key=lambda x: x[0], reverse=False)
+        img_str = '\n'.join([v[1] for v in list_data])
+    return img_str, status
+
+
+def extract_swf(file_path):
+    """
+    解压到.swf文件夹中,返回文件路径列表(有序)
+    :param file_path:
+    :return:
+    """
+    swf_picture_paths = []
+    swf_to_picture_dir = os.path.dirname(file_path)
+
+    try:
+        # 抽取swf,获取图片路径
+        output = subprocess.check_output('./extract.sh %s %s' % (file_path, swf_to_picture_dir), shell=True)
+        output_string = output.decode()
+        picture_num = output_string.split('\n')
+        if len(picture_num) > 5:
+            return str(output_string)
+        else:
+            if picture_num[0] != '':
+                for i in picture_num[0].split(','):
+                    swf_to_picture_path = os.path.join(swf_to_picture_dir, '%s.jpg' % (i.strip()))
+                    swf_picture_paths.append(swf_to_picture_path)
+
+            if picture_num[1] != '':
+                for i in picture_num[1].split(','):
+                    swf_to_picture_path = os.path.join(swf_to_picture_dir, '%s.png' % (i.strip()))
+                    swf_picture_paths.append(swf_to_picture_path)
+
+    except subprocess.CalledProcessError as e:
+        logger.error('swf文件解析出错')
+        logger.error(e)
+        return [], error_number["解析错误"]
+
+    # 对图片进行排序
+    try:
+        swf_picture_dict = dict([(int(p.split("/")[-1].split(".")[0]), p) for p in swf_picture_paths])
+        print(f"swf_picture_dict-->{swf_picture_dict}")
+        swf_picture_paths = sorted(swf_picture_dict.items(), key=lambda x: x[0])
+    except Exception as e:
+        logger.warning(f"{e}")
+    return swf_picture_paths, error_number["成功"]

+ 12 - 0
file_processing/file_txt.py

@@ -0,0 +1,12 @@
+# coding:utf-8
+from docs.config import error_number
+
+
+def read_txt(file_path):
+    try:
+        with open(file_path, "r") as f:
+            read_text = f.read()
+            return read_text, error_number["成功"]
+    except Exception as e:
+        print(e)
+        return "", error_number["解析错误"]

+ 44 - 0
file_processing/file_xlsx.py

@@ -0,0 +1,44 @@
+"""
+coding=uft-8
+解.xlsx文件
+
+单元格ctype的五种类型
+ctype : 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error
+"""
+
+from loguru import logger
+from docs.config import error_number
+import pandas as pd
+
+
+def read_xlsx(file_path: str):
+    """
+    返回拼接好的字符串
+    :param file_path:
+    :return:
+    """
+    try:
+        excel_data = pd.read_excel(file_path, keep_default_na=False, sheet_name=None)
+        tables = []
+        for sheet_name, table_2d in excel_data.items():
+            string = '<table>'
+            for row in table_2d.values:
+                string += '<tr>'
+                for clown in row:
+                    clown = str(clown)
+                    string += '<td>' + clown.strip('\n').strip('\t') + '</td>'
+                string += '</tr>'
+            string_end = string + '</table>'
+            tables.append(string_end)
+        content = "\n".join(tables)
+        return content, error_number["成功"]
+    except Exception as e:
+        logger.error('xlsx文件解析出错')
+        logger.error(e)
+        return '', error_number["解析错误"]
+
+
+if __name__ == '__main__':
+    file_path = "0.xls"
+    string = read_xlsx(file_path)
+    print(string)

二进制
file_processing/models/__pycache__/__init__.cpython-37.pyc


二进制
file_processing/models/__pycache__/picture_extract.cpython-37.pyc


二进制
file_processing/models/__pycache__/table_extract.cpython-37.pyc


+ 54 - 0
file_processing/models/picture_extract.py

@@ -0,0 +1,54 @@
+# coding:utf-8
+from PIL import Image
+import os
+
+
+def extract_image(xObject, base_dir, image_name):
+    try:
+        if xObject.attrs["Subtype"].name == 'Image':
+            size = (xObject.attrs["Width"], xObject.attrs["Height"])
+            data = xObject.get_data()
+            if xObject.attrs["ColorSpace"].name == 'DeviceRGB':
+                mode = "RGB"
+            else:
+                mode = "P"
+            # 保存图片的文件名前缀
+            img_pre = os.path.join(base_dir, image_name)
+            if 'Filter' in xObject.attrs:
+                if xObject.attrs["Filter"].name == 'FlateDecode':
+                    img = Image.frombytes(mode, size, data)
+                    img.save(img_pre + ".png")
+                    return img_pre + ".png"
+                elif xObject.attrs["Filter"].name == 'DCTDecode':
+                    img = open(img_pre + ".jpg", "wb")
+                    img.write(data)
+                    img.close()
+                    return img_pre + ".jpg"
+                elif xObject.attrs["Filter"].name == 'JPXDecode':
+                    img = open(img_pre + ".jp2", "wb")
+                    img.write(data)
+                    img.close()
+                    return img_pre + ".jp2"
+                elif xObject.attrs["Filter"].name == 'CCITTFaxDecode':
+                    img = open(img_pre + ".tiff", "wb")
+                    img.write(data)
+                    img.close()
+                    return img_pre + ".tiff"
+            else:
+                img = Image.frombytes(mode, size, data)
+                img.save(img_pre + ".png")
+                return img_pre + ".png"
+    except Exception as e:
+        # print(f"图片提取失败-->{e}")
+        return None
+
+
+class Picture(object):
+    def __init__(self, min_y, max_y, height, width, image_path):
+        self.min_y = min_y
+        self.max_y = max_y
+        self.width = width
+        self.height = height
+        self.image_path = image_path
+        self.content = ""
+

+ 43 - 0
file_processing/models/table_extract.py

@@ -0,0 +1,43 @@
+# coding:utf-8
+
+class TableStruct(object):
+    def __init__(self):
+        """
+        定义表格属性
+        """
+        self.min_x = None
+        self.max_x = None
+        self.min_y = None
+        self.max_y = None
+        self.contents = ""
+
+    def parse(self, table):
+        """
+        解析表格结构
+        :param table:
+        :return:
+        """
+        contents = table.extract()
+        if contents:
+            self.contents = self.__table_format(contents)
+        self.min_x, self.min_y, self.max_x, self.max_y = table.bbox
+
+    @staticmethod
+    def __table_format(data: list):
+        """
+        生成table标签数据,结构化数据
+        :param data:
+        :return:
+        """
+        if not data:
+            return ""
+        table = '<table border=1>'
+        for row in data:
+            table += '<tr>'
+            for col in row:
+                if col is None:
+                    col = ''
+                table += '<td>%s</td>' % col
+            table += '</tr>'
+        table += "</table>"
+        return table

+ 73 - 0
file_processing/tr_ocr.py

@@ -0,0 +1,73 @@
+# coding:utf-8
+import requests
+from concurrent.futures import ThreadPoolExecutor
+from docs.config import getOcrUrl
+
+
+def combine_text(rows):
+    '''
+    合并文字
+    :param rows:
+    :return:
+    '''
+    text = ""
+    for ind, row in enumerate(rows):
+        if ind < len(rows) - 1:
+            next_len = rows[ind + 1][0][1]
+            if abs(row[0][1] - next_len) > row[0][3] / 2:
+                text += "\n"
+        text += row[1]
+    return text
+
+
+def tr_ocr(*args):
+    '''
+    特定的ocr
+    :param args:
+    :return:
+    '''
+    try:
+        ind, picture_path = args[0]
+        img_file = {
+            'file': open(picture_path, 'rb')
+        }
+        url = ""
+        print(getOcrUrl)
+        for once in range(2):
+            ip = requests.get(getOcrUrl).text
+            print(ip)
+            if ip:
+                url = f"http://{ip}/api/tr-run/"
+                break
+        print("url-->",url)
+        if url:
+            res = requests.post(url=url, data={"is_draw": 0}, files=img_file)
+            res_json = res.json()
+            data = res_json.get("data", {})
+            raw_out = data.get("raw_out", [])
+            if raw_out:
+                text = combine_text(raw_out)
+                return {ind: text}
+        return {ind: ""}
+    except Exception as e:
+        print(e)
+        return {-1: ""}
+
+
+def tr_ocr_patch(picture_paths):
+    total_result = {}
+    thread_pool = ThreadPoolExecutor(max_workers=10)
+    results = thread_pool.map(tr_ocr, picture_paths)
+    for res in results:
+        total_result.update(res)
+    return total_result
+
+
+if __name__ == '__main__':
+    import time
+
+    paths = [(0, "../docs/2.png"), (1, "../docs/1.png")]
+    start = time.time()
+    content = tr_ocr_patch(paths)
+    print(content)
+    print(time.time() - start)

+ 23 - 0
need_package/PyPDF2-master/.coveragerc

@@ -0,0 +1,23 @@
+[run]
+source = PyPDF2
+branch = True
+
+[report]
+# Regexes for lines to exclude from consideration
+exclude_lines =
+    # Have to re-enable the standard pragma
+    pragma: no cover
+    @overload
+
+    # Don't complain about missing debug-only code:
+    def __repr__
+    def __str__
+    if self\.debug
+
+    # Don't complain if tests don't hit defensive assertion code:
+    raise AssertionError
+    raise NotImplementedError
+
+    # Don't complain if non-runnable code isn't run:
+    if 0:
+    if __name__ == .__main__.:

+ 36 - 0
need_package/PyPDF2-master/.github/workflows/unit-tests.yaml

@@ -0,0 +1,36 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Unit Tests
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements/ci.txt
+        pip install .
+    - name: Test with flake8
+      run: |
+        flake8 . --ignore E,F,I,SIM,C,PT,N,ASS,A,P,R,W
+    - name: Test with pytest
+      run: |
+        pytest Tests/tests.py Tests --cov --cov-report term-missing -vv

+ 11 - 0
need_package/PyPDF2-master/.gitignore

@@ -0,0 +1,11 @@
+*.pyc
+*.swp
+.DS_Store
+.tox
+build
+.idea/*
+.coverage
+
+# Files generated by some of the scripts
+dont_commit_merged.pdf
+PyPDF2-output.pdf

+ 39 - 0
need_package/PyPDF2-master/.pre-commit-config.yaml

@@ -0,0 +1,39 @@
+# pre-commit run --all-files
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.1.0
+    hooks:
+    -   id: check-ast
+    -   id: check-byte-order-marker
+    -   id: check-case-conflict
+    -   id: check-docstring-first
+    -   id: check-yaml
+    -   id: debug-statements
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+    -   id: mixed-line-ending
+    -   id: check-added-large-files
+        args: ['--maxkb=1000']
+-   repo: https://gitlab.com/pycqa/flake8
+    rev: 3.9.2
+    hooks:
+    -   id: flake8
+        args: ["--ignore", "E,W,F"]
+# -   repo: https://github.com/pre-commit/mirrors-mypy
+#     rev: v0.942
+#     hooks:
+#     -   id: mypy
+# -   repo: https://github.com/psf/black
+#     rev: 22.3.0
+#     hooks:
+#     -   id: black
+# -   repo: https://github.com/asottile/pyupgrade
+#     rev: v2.31.1
+#     hooks:
+#     -   id: pyupgrade
+#         args: [--py36-plus]
+-   repo: https://github.com/asottile/blacken-docs
+    rev: v1.12.1
+    hooks:
+    -   id: blacken-docs
+        additional_dependencies: [black==22.1.0]

+ 19 - 0
need_package/PyPDF2-master/.travis.yml

@@ -0,0 +1,19 @@
+sudo: false
+
+language: python
+python: 
+  - "2.7"
+  - "3.4"
+  - "3.5"
+  - "3.6"
+  - "pypy"
+  - "pypy3"
+
+install:
+  - pip install tox-travis
+
+script:
+  - tox
+
+matrix:
+  fast_finish: true

+ 631 - 0
need_package/PyPDF2-master/CHANGELOG

@@ -0,0 +1,631 @@
+Version 1.26.0, 2016-05-18
+--------------------------
+
+ - NOTE: Active maintenance on PyPDF2 is resuming after a hiatus
+
+ - Fixed a bug where image resources where incorrectly
+   overwritten when merging pages
+
+ - Added dictionary for JavaScript actions to the root (louib)
+
+ - Added unit tests for the JS functionality (louib)
+
+ - Add more Python 3 compatibility when reading inline images (im2703
+   and (VyacheslavHashov)
+
+ - Return NullObject instead of raising error when failing to resolve
+   object (ctate)
+
+ - Don't output warning for non-zeroed xref table when strict=False
+   (BenRussert)
+
+ - Remove extraneous zeroes from output formatting (speedplane)
+
+ - Fix bug where reading an inline image would cut off prematurely
+   in certain cases (speedplane)
+
+
+Patch 1.25.1, 2015-07-20
+
+ - Fix bug when parsing inline images. Occurred when merging
+   certain pages with inline images
+
+ - Fixed type error when creating outlines by utilizing the
+   isString() test
+
+Version 1.25, 2015-07-07
+------------------------
+
+BUGFIXES:
+
+ - Added Python 3 algorithm for ASCII85Decode. Fixes issue when
+   reading reportlab-generated files with Py 3 (jerickbixly)
+
+ - Recognize more escape sequence which would otherwise throw an
+   exception (manuelzs, robertsoakes)
+
+ - Fixed overflow error in generic.py. Occurred
+   when reading a too-large int in Python 2 (by Raja Jamwal)
+
+ - Allow access to files which were encrypted with an empty
+   password. Previously threw a "File has not been decrypted"
+   exception (Elena Williams)
+
+ - Do not attempt to decode an empty data stream. Previously
+   would cause an error in decode algorithms (vladir)
+
+ - Fixed some type issues specific to Py 2 or Py 3
+
+ - Fix issue when stream data begins with whitespace (soloma83)
+
+ - Recognize abbreviated filter names (AlmightyOatmeal and
+   Matthew Weiss)
+
+ - Copy decryption key from PdfFileReader to PdfFileMerger.
+   Allows usage of PdfFileMerger with encrypted files (twolfson)
+
+ - Fixed bug which occurred when a NameObject is present at end
+   of a file stream. Threw a "Stream has ended unexpectedly"
+   exception (speedplane)
+
+FEATURES:
+
+ - Initial work on a test suite; to be expanded in future.
+   Tests and Resources directory added, README updated (robertsoakes)
+
+ - Added document cloning methods to PdfFileWriter:
+   appendPagesFromReader, cloneReaderDocumentRoot, and
+   cloneDocumentFromReader. See official documentation (robertsoakes)
+
+ - Added method for writing to form fields: updatePageFormFieldValues.
+   This will be enhanced in the future. See official documentation
+   (robertsoakes)
+
+ - New addAttachment method. See documentation. Support for adding
+   and extracting embedded files to be enhanced in the future
+   (moshekaplan)
+
+ - Added methods to get page number of given PageObject or
+   Destination: getPageNumber and getDestinationPageNumber.
+   See documentation (mozbugbox)
+
+OTHER ENHANCEMENTS:
+
+ - Enhanced type handling (Brent Amrhein)
+
+ - Enhanced exception handling in NameObject (sbywater)
+
+ - Enhanced extractText method output (peircej)
+
+ - Better exception handling
+
+ - Enhanced regex usage in NameObject class (speedplane)
+
+
+Version 1.24, 2014-12-31
+------------------------
+
+ - Bugfixes for reading files in Python 3 (by Anthony Tuininga and
+   pqqp)
+
+ - Appropriate errors are now raised instead of infinite loops (by
+   naure and Cyrus Vafadari)
+
+ - Bugfix for parsing number tokens with leading spaces (by Maxim
+   Kamenkov)
+
+ - Don't crash on bad /Outlines reference (by eshellman)
+
+ - Conform tabs/spaces and blank lines to PEP 8 standards
+
+ - Utilize the readUntilRegex method when reading Number Objects
+   (by Brendan Jurd)
+
+ - More bugfixes for Python 3 and clearer exception handling
+
+ - Fixed encoding issue in merger (with eshellman)
+
+ - Created separate folder for scripts
+
+
+Version 1.23, 2014-08-11
+------------------------
+
+ - Documentation now available at http://pythonhosted.org//PyPDF2
+
+ - Bugfix in pagerange.py for when __init__.__doc__ has no value (by
+   Vladir Cruz)
+
+ - Fix typos in OutlinesObject().add() (by shilluc)
+
+ - Re-added a missing return statement in a utils.py method
+
+ - Corrected viewing mode names (by Jason Scheirer)
+
+ - New PdfFileWriter method: addJS() (by vfigueiro)
+
+ - New bookmark features: color, boldness, italics, and page fit
+   (by Joshua Arnott)
+
+ - New PdfFileReader method: getFields(). Used to extract field
+   information from PDFs with interactive forms. See documentation
+   for details
+
+ - Converted README file to markdown format (by Stephen Bussard)
+
+ - Several improvements to overall performance and efficiency
+   (by mozbugbox)
+
+ - Fixed a bug where geospatial information was not scaling along with
+   its page
+
+ - Fixed a type issue and a Python 3 issue in the decryption algorithms
+   (with Francisco Vieira and koba-ninkigumi)
+
+ - Fixed a bug causing an infinite loop in the ASCII 85 decoding
+   algorithm (by madmaardigan)
+
+ - Annotations (links, comment windows, etc.) are now preserved when
+   pages are merged together
+
+ - Used the Destination class in addLink() and addBookmark() so that 
+   the page fit option could be properly customized
+
+
+Version 1.22, 2014-05-29
+------------------------
+
+ - Added .DS_Store to .gitignore (for Mac users) (by Steve Witham)
+
+ - Removed __init__() implementation in NameObject (by Steve Witham)
+
+ - Fixed bug (inf. loop) when merging pages in Python 3 (by commx)
+
+ - Corrected error when calculating height in scaleTo()
+
+ - Removed unnecessary code from DictionaryObject (by Georges Dubus)
+
+ - Fixed bug where an exception was thrown upon reading a NULL string
+   (by speedplane)
+
+ - Allow string literals (non-unicode strings in Python 2) to be passed 
+   to PdfFileReader
+
+ - Allow ConvertFunctionsToVirtualList to be indexed with slices and
+   longs (in Python 2) (by Matt Gilson)
+
+ - Major improvements and bugfixes to addLink() method (see documentation
+   in source code) (by Henry Keiter)
+
+ - General code clean-up and improvements (with Steve Witham and Henry Keiter)
+
+ - Fixed bug that caused crash when comments are present at end of 
+   dictionary
+
+
+Version 1.21, 2014-04-21
+------------------------
+
+ - Fix for when /Type isn't present in the Pages dictionary (by Rob1080)
+
+ - More tolerance for extra whitespace in Indirect Objects
+
+ - Improved Exception handling
+
+ - Fixed error in getHeight() method (by Simon Kaempflein)
+
+ - implement use of utils.string_type to resolve Py2-3 compatibility issues
+
+ - Prevent exception for multiple definitions in a dictionary (with carlosfunk)
+   (only when strict = False)
+
+ - Fixed errors when parsing a slice using pdfcat on command line (by
+   Steve Witham)
+
+ - Tolerance for EOF markers within 1024 bytes of the actual end of the
+   file (with David Wolever)
+
+ - Added overwriteWarnings parameter to PdfFileReader constructor, if False
+   PyPDF2 will NOT overwrite methods from Python's warnings.py module with
+   a custom implementation.
+
+ - Fix NumberObject and NameObject constructors for compatibility with PyPy
+   (Rüdiger Jungbeck, Xavier Dupré, shezadkhan137, Steven Witham)
+
+ - Utilize  utils.Str in pdf.py and pagerange.py to resolve type issues (by
+   egbutter)
+
+ - Improvements in implementing StringIO for Python 2 and BytesIO for
+   Python 3 (by Xavier Dupré)
+
+ - Added /x00 to Whitespaces, defined utils.WHITESPACES to clarify code (by
+   Maxim Kamenkov)
+
+ - Bugfix for merging 3 or more resources with the same name (by lucky-user)
+
+ - Improvements to Xref parsing algorithm (by speedplane)
+
+
+Version 1.20, 2014-01-27
+------------------------
+
+ - Official Python 3+ support (with contributions from TWAC and cgammans)
+   Support for Python versions 2.6 and 2.7 will be maintained
+
+ - Command line concatenation (see pdfcat in sample code) (by Steve Witham)
+
+ - New FAQ; link included in README
+
+ - Allow more (although unnecessary) escape sequences
+
+ - Prevent exception when reading a null object in decoding parameters
+
+ - Corrected error in reading destination types (added a slash since they
+   are name objects)
+
+ - Corrected TypeError in scaleTo() method
+
+ - addBookmark() method in PdfFileMerger now returns bookmark (so nested
+   bookmarks can be created)
+
+ - Additions to Sample Code and Sample PDFs
+
+ - changes to allow 2up script to work (see sample code) (by Dylan McNamee)
+
+ - changes to metadata encoding (by Chris Hiestand)
+
+ - New methods for links: addLink() (by Enrico Lambertini) and removeLinks()
+
+ - Bugfix to handle nested bookmarks correctly (by Jamie Lentin)
+
+ - New methods removeImages() and removeText() available for PdfFileWriter
+   (by Tien Haï)
+
+ - Exception handling for illegal characters in Name Objects
+
+
+Version 1.19, 2013-10-08
+------------------------
+
+BUGFIXES:
+ - Removed pop in sweepIndirectReferences to prevent infinite loop
+   (provided by ian-su-sirca)
+
+ - Fixed bug caused by whitespace when parsing PDFs generated by AutoCad
+
+ - Fixed a bug caused by reading a 'null' ASCII value in a dictionary
+   object (primarily in PDFs generated by AutoCad).
+
+FEATURES:
+ - Added new folders for PyPDF2 sample code and example PDFs; see README
+   for each folder
+
+ - Added a method for debugging purposes to show current location while
+   parsing
+
+ - Ability to create custom metadata (by jamma313)
+
+ - Ability to access and customize document layout and view mode
+   (by Joshua Arnott)
+
+OTHER:
+ - Added and corrected some documentation
+
+ - Added some more warnings and exception messages
+
+ - Removed old test/debugging code
+
+UPCOMING:
+ - More bugfixes (We have received many problematic PDFs via email, we
+   will work with them)
+ 
+ - Documentation - It's time for PyPDF2 to get its own documentation
+   since it has grown much since the original pyPdf
+
+ - A FAQ to answer common questions
+
+
+Version 1.18, 2013-08-19
+------------------------
+
+ - Fixed a bug where older verions of objects were incorrectly added to the 
+   cache, resulting in outdated or missing pages, images, and other objects
+   (from speedplane)
+
+ - Fixed a bug in parsing the xref table where new xref values were 
+   overwritten; also cleaned up code (from speedplane)
+
+ - New method mergeRotatedAroundPointPage which merges a page while rotating
+   it around a point (from speedplane)
+
+ - Updated Destination syntax to respect PDF 1.6 specifications (from
+   jamma313)
+
+ - Prevented infinite loop when a PdfFileReader object was instantiated
+   with an empty file (from Jerome Nexedi)
+
+Other Changes:
+
+ - Downloads now available via PyPI
+   https://pypi.python.org/pypi?:action=display&name=PyPDF2
+
+ - Installation through pip library is fixed
+
+
+Version 1.17, 2013-07-25
+------------------------
+
+ - Removed one (from pdf.py) of the two Destination classes. Both 
+   classes had the same name, but were slightly different in content, 
+   causing some errors. (from Janne Vanhala)
+
+ - Corrected and Expanded README file to demonstrate PdfFileMerger
+
+ - Added filter for LZW encoded streams (from Michal Horejsek)
+
+ - PyPDF2 issue tracker enabled on Github to allow community
+   discussion and collaboration
+
+
+Versions -1.16, -2013-06-30
+---------------------------
+
+ - Note: This ChangeLog has not been kept up-to-date for a while.
+   Hopefully we can keep better track of it from now on. Some of the
+   changes listed here come from previous versions 1.14 and 1.15; they
+   were only vaguely defined. With the new _version.py file we should 
+   have more structured and better documented versioning from now on.
+ 
+ - Defined PyPDF2.__version__
+
+ - Fixed encrypt() method (from Martijn The)
+
+ - Improved error handling on PDFs with truncated streams (from cecilkorik)
+
+ - Python 3 support (from kushal-kumaran)
+
+ - Fixed example code in README (from Jeremy Bethmont)
+
+ - Fixed an bug caused by DecimalError Exception (from Adam Morris)
+
+ - Many other bug fixes and features by: 
+	
+	jeansch
+	Anton Vlasenko
+	Joseph Walton
+	Jan Oliver Oelerich
+	Fabian Henze
+	And any others I missed. 
+	Thanks for contributing!
+
+
+Version 1.13, 2010-12-04
+------------------------
+
+ - Fixed a typo in code for reading a "\b" escape character in strings.
+
+ - Improved __repr__ in FloatObject.
+
+ - Fixed a bug in reading octal escape sequences in strings.
+
+ - Added getWidth and getHeight methods to the RectangleObject class.
+
+ - Fixed compatibility warnings with Python 2.4 and 2.5.
+
+ - Added addBlankPage and insertBlankPage methods on PdfFileWriter class.
+
+ - Fixed a bug with circular references in page's object trees (typically
+   annotations) that prevented correctly writing out a copy of those pages.
+
+ - New merge page functions allow application of a transformation matrix.
+
+ - To all patch contributors: I did a poor job of keeping this ChangeLog
+   up-to-date for this release, so I am missing attributions here for any
+   changes you submitted.  Sorry!  I'll do better in the future.
+
+
+Version 1.12, 2008-09-02
+------------------------
+
+ - Added support for XMP metadata.
+
+ - Fix reading files with xref streams with multiple /Index values.
+
+ - Fix extracting content streams that use graphics operators longer than 2
+   characters.  Affects merging PDF files.
+
+
+Version 1.11, 2008-05-09
+------------------------
+
+ - Patch from Hartmut Goebel to permit RectangleObjects to accept NumberObject
+   or FloatObject values.
+
+ - PDF compatibility fixes.
+
+ - Fix to read object xref stream in correct order.
+
+ - Fix for comments inside content streams.
+
+
+Version 1.10, 2007-10-04
+------------------------
+
+ - Text strings from PDF files are returned as Unicode string objects when
+ pyPdf determines that they can be decoded (as UTF-16 strings, or as
+ PDFDocEncoding strings).  Unicode objects are also written out when
+ necessary.  This means that string objects in pyPdf can be either
+ generic.ByteStringObject instances, or generic.TextStringObject instances.
+
+ - The extractText method now returns a unicode string object.
+
+ - All document information properties now return unicode string objects.  In
+ the event that a document provides docinfo properties that are not decoded by
+ pyPdf, the raw byte strings can be accessed with an "_raw" property (ie.
+ title_raw rather than title)
+
+ - generic.DictionaryObject instances have been enhanced to be easier to use.
+ Values coming out of dictionary objects will automatically be de-referenced
+ (.getObject will be called on them), unless accessed by the new "raw_get"
+ method.  DictionaryObjects can now only contain PdfObject instances (as keys
+ and values), making it easier to debug where non-PdfObject values (which
+ cannot be written out) are entering dictionaries.
+
+ - Support for reading named destinations and outlines in PDF files.  Original
+ patch by Ashish Kulkarni.
+
+ - Stream compatibility reading enhancements for malformed PDF files.
+
+ - Cross reference table reading enhancements for malformed PDF files.
+
+ - Encryption documentation.
+
+ - Replace some "assert" statements with error raising.
+
+ - Minor optimizations to FlateDecode algorithm increase speed when using PNG
+ predictors.
+
+Version 1.9, 2006-12-15
+-----------------------
+
+ - Fix several serious bugs introduced in version 1.8, caused by a failure to
+   run through our PDF test suite before releasing that version.
+
+ - Fix bug in NullObject reading and writing.
+
+Version 1.8, 2006-12-14
+-----------------------
+
+ - Add support for decryption with the standard PDF security handler.  This
+   allows for decrypting PDF files given the proper user or owner password.
+
+ - Add support for encryption with the standard PDF security handler.
+
+ - Add new pythondoc documentation.
+
+ - Fix bug in ASCII85 decode that occurs when whitespace exists inside the
+   two terminating characters of the stream.
+
+Version 1.7, 2006-12-10
+-----------------------
+
+ - Fix a bug when using a single page object in two PdfFileWriter objects.
+
+ - Adjust PyPDF to be tolerant of whitespace characters that don't belong
+   during a stream object.
+
+ - Add documentInfo property to PdfFileReader.
+
+ - Add numPages property to PdfFileReader.
+
+ - Add pages property to PdfFileReader.
+
+ - Add extractText function to PdfFileReader.
+
+
+Version 1.6, 2006-06-06
+-----------------------
+
+ - Add basic support for comments in PDF files.  This allows us to read some
+   ReportLab PDFs that could not be read before.
+
+ - Add "auto-repair" for finding xref table at slightly bad locations.
+
+ - New StreamObject backend, cleaner and more powerful.  Allows the use of
+   stream filters more easily, including compressed streams.
+
+ - Add a graphics state push/pop around page merges.  Improves quality of
+   page merges when one page's content stream leaves the graphics 
+   in an abnormal state.
+
+ - Add PageObject.compressContentStreams function, which filters all content
+   streams and compresses them.  This will reduce the size of PDF pages,
+   especially after they could have been decompressed in a mergePage
+   operation.
+
+ - Support inline images in PDF content streams.
+
+ - Add support for using .NET framework compression when zlib is not
+   available.  This does not make pyPdf compatible with IronPython, but it
+   is a first step.
+
+ - Add support for reading the document information dictionary, and extracting
+   title, author, subject, producer and creator tags.
+
+ - Add patch to support NullObject and multiple xref streams, from Bradley
+   Lawrence.
+
+
+Version 1.5, 2006-01-28
+-----------------------
+
+- Fix a bug where merging pages did not work in "no-rename" cases when the
+  second page has an array of content streams.
+
+- Remove some debugging output that should not have been present.
+
+
+Version 1.4, 2006-01-27
+-----------------------
+
+- Add capability to merge pages from multiple PDF files into a single page
+  using the PageObject.mergePage function.  See example code (README or web
+  site) for more information.
+
+- Add ability to modify a page's MediaBox, CropBox, BleedBox, TrimBox, and
+  ArtBox properties through PageObject.  See example code (README or web site)
+  for more information.
+
+- Refactor pdf.py into multiple files: generic.py (contains objects like
+  NameObject, DictionaryObject), filters.py (contains filter code),
+  utils.py (various).  This does not affect importing PdfFileReader
+  or PdfFileWriter.
+
+- Add new decoding functions for standard PDF filters ASCIIHexDecode and
+  ASCII85Decode.
+
+- Change url and download_url to refer to new pybrary.net web site.
+
+
+Version 1.3, 2006-01-23
+-----------------------
+
+- Fix new bug introduced in 1.2 where PDF files with \r line endings did not
+  work properly anymore.  A new test suite developed with various PDF files
+  should prevent regression bugs from now on.
+
+- Fix a bug where inheriting attributes from page nodes did not work.
+
+
+Version 1.2, 2006-01-23
+-----------------------
+
+- Improved support for files with CRLF-based line endings, fixing a common
+  reported problem stating "assertion error: assert line == "%%EOF"".
+
+- Software author/maintainer is now officially a proud married person, which
+  is sure to result in better software... somehow.
+
+
+Version 1.1, 2006-01-18
+-----------------------
+
+- Add capability to rotate pages.
+
+- Improved PDF reading support to properly manage inherited attributes from
+  /Type=/Pages nodes.  This means that page groups that are rotated or have
+  different media boxes or whatever will now work properly.
+
+- Added PDF 1.5 support.  Namely cross-reference streams and object streams.
+  This release can mangle Adobe's PDFReference16.pdf successfully.
+
+
+Version 1.0, 2006-01-17
+-----------------------
+
+- First distutils-capable true public release.  Supports a wide variety of PDF
+  files that I found sitting around on my system.
+
+- Does not support some PDF 1.5 features, such as object streams,
+  cross-reference streams.
+

+ 29 - 0
need_package/PyPDF2-master/LICENSE

@@ -0,0 +1,29 @@
+Copyright (c) 2006-2008, Mathieu Fenniak
+Some contributions copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
+Some contributions copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+* The name of the author may not be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.

+ 2 - 0
need_package/PyPDF2-master/MANIFEST.in

@@ -0,0 +1,2 @@
+include CHANGELOG
+include LICENSE

二进制
need_package/PyPDF2-master/PDF_Samples/AutoCad_Diagram.pdf


+ 103 - 0
need_package/PyPDF2-master/PDF_Samples/AutoCad_Simple.pdf

@@ -0,0 +1,103 @@
+%PDF-1.6
+%Þ­¾ï
+3 0 obj
+<<  /Length 152 /Filter /FlateDecode /DecodeParms
+<<  /Predictor 1
+>>
+>>
+stream
+xœUÍÁ
+Â0Ðû€ÿ0_`“tíÕT
+‚ˆöàYJ%UìAñïÝ´—ÊBxd—X¦9Õ˜0\a–ÖѤI°ž¹eÛ/ˆ”úÝC\¾RÄ	¾0E².gÔ5Ï|`Jù4ŸYÕ½omW—/Cág¡í÷ô옳gk¹©àÖ¹£”âµu´3"Ú`½ñoâì>¢ávpüÜ�-³
+endstream
+endobj
+4 0 obj
+<< /Type /FontDescriptor /FontName /ArialMT /FontFamily (Arial) /FontWeight 400 /FontBBox [-665 -325 2000 1006] /Ascent 728 /Descent -210 /CapHeight -34 /Leading 33 /Flags 42 /ItalicAngle 0 /StemV 80
+>>
+endobj
+5 0 obj
+<< /Type /Font /Subtype /TrueType /Name /F0 /BaseFont /ArialMT /FontDescriptor 4 0 R /Encoding /WinAnsiEncoding /FirstChar 32 /LastChar 255 /Widths 2 0 R
+>>
+endobj
+6 0 obj
+<<  /Length 150 /Filter /FlateDecode /DecodeParms
+<<  /Predictor 1
+>>
+>>
+stream
+xœM�=Â@ƒ÷üŠŒ:”»k¯ÖŽöZ»Xü:ÐUJÅ*vô×û]$CI`´«1ÂpÁÖæÔìaã$¸�0KuXÂ?”˜> –†I2§?£ðÐ’Õd•ªìÞ×¶«‡Ó‡nOµvTÏÖ°(iFá^<ÖÖÒÆyFßKÝ·0Zì‹Iu\4›U5¥¿¡ò¬‡íʲ(Û
+endstream
+endobj
+2 0 obj
+ [ 278 278 355 556 556 889 667 191 333 333 389 584 278 333 278 278 556 556 556 556 556 556 556 556 556 556 278 278 584 584 584 556 1015 667 667 722 722 667 611 778 722 278 500 667 556 833 722 778 667 778 722 667 611 722 667 944 667 667 611 278 278 278 469 556 333 556 556 500 556 556 278 556 556 222 222 500 222 833 556 556 556 556 333 500 278 556 500 722 500 500 500 334 260 334 584 500 556 500 222 556 333 1000 556 556 333 1000 667 333 1000 500 611 500 500 222 222 333 333 350 556 1000 333 1000 500 333 944 500 500 667 500 333 556 556 556 556 260 556 333 737 370 556 584 500 737 552 400 549 333 333 333 576 537 333 333 333 365 556 834 834 834 611 667 667 667 667 667 667 1000 722 667 667 667 667 278 278 278 278 722 722 778 778 778 778 778 584 778 722 722 722 722 667 667 611 556 556 556 556 556 556 889 500 556 556 556 556 278 278 278 278 556 556 556 556 556 556 556 549 611 556 556 556 556 500 556 500 
+]
+endobj
+1 0 obj
+<< /Type /OCG /Name (DIMENSION)
+>>
+endobj
+8 0 obj
+<< /Type /Page /Parent 7 0 R /MediaBox [0 0 595 842] /Contents [3 0 R
+6 0 R
+] /Rotate 270 
+ /VP [
+<<  /Measure
+<<  /Subtype /RL
+ /A [
+<<  /C 1 /U (\ )
+>>]
+ /D [
+<<  /C 1 /U (\ )
+>>]
+ /X [
+<<  /C 0.68767 /U (\ )
+>>]
+ /R (\ ) /Type /Measure
+>> /Type /Viewport /BBox [69 31 524 811]
+>>] /Resources
+<<  /ProcSet [ /PDF /Text]
+ /Font
+<<  /F1 5 0 R
+>>
+ /Properties
+<<  /oc1 1 0 R
+>>
+>>
+>>
+endobj
+7 0 obj
+<< /Type /Pages /Kids [8 0 R
+] /Count 1
+>>
+endobj
+9 0 obj
+<< /Type /Catalog /Pages 7 0 R /OCProperties
+<<  /OCGs [ 1 0 R]  /D
+<<  /Order [ 1 0 R] /OFF []
+>>
+>> /PageMode /UseOC /PageLayout /SinglePage
+>>
+endobj
+10 0 obj
+<<  /Creator (AutoCAD\ 2012\ -\ English\ 2012\ \(18.2s\ \(LMS\ Tech\)\)) /Title (Model) /Producer (pdfplot10.hdi\ 10.2.51.0) /CreationDate (D:20130827124142) /ModDate (D:20130827124142)
+>>
+endobj
+xref
+0 11
+0000000000 65535 f 
+0000001843 00000 n 
+0000000919 00000 n 
+0000000015 00000 n 
+0000000273 00000 n 
+0000000491 00000 n 
+0000000663 00000 n 
+0000002291 00000 n 
+0000001893 00000 n 
+0000002349 00000 n 
+0000002510 00000 n 
+trailer
+<<  /Size 11 /Root 9 0 R /Info 10 0 R
+>>
+startxref
+2715
+%%EOF

二进制
need_package/PyPDF2-master/PDF_Samples/GeoBase_NHNC1_Data_Model_UML_EN.pdf


+ 26 - 0
need_package/PyPDF2-master/PDF_Samples/README.txt

@@ -0,0 +1,26 @@
+PDF Sample Folder
+-----------------
+
+PDF files are generated by a large variety of sources
+for many different purposes. One of the goals of PyPDF2
+is to be able to read/write any PDF instance that Adobe
+can.
+
+This is a catalog of various PDF files. The
+files may not have worked with PyPDF2 but do now, they
+may be complicated or unconventional files, or they may
+just be good for testing. The purpose is to insure that
+when changes to PyPDF2 are made, we keep them in mind.
+
+If you have confidential PDFs that don't work with
+PyPDF2, feel free to still e-mail them for debugging -
+we won't add PDFs without expressed permission.
+
+(This folder is available through GitHub only)
+
+
+Feel free to add any type of PDF file or sample code, 
+either by
+
+	1) sending it via email to PyPDF2@phaseit.net
+	2) including it in a pull request on GitHub

二进制
need_package/PyPDF2-master/PDF_Samples/SF424_page2.pdf


二进制
need_package/PyPDF2-master/PDF_Samples/Seige_of_Vicksburg_Sample_OCR.pdf


二进制
need_package/PyPDF2-master/PDF_Samples/jpeg.pdf


+ 5 - 0
need_package/PyPDF2-master/PyPDF2/__init__.py

@@ -0,0 +1,5 @@
+from .pdf import PdfFileReader, PdfFileWriter
+from .merger import PdfFileMerger
+from .pagerange import PageRange, parse_filename_page_ranges
+from ._version import __version__
+__all__ = ["pdf", "PdfFileMerger"]

+ 1 - 0
need_package/PyPDF2-master/PyPDF2/_version.py

@@ -0,0 +1 @@
+__version__ = '1.26.0'

+ 424 - 0
need_package/PyPDF2-master/PyPDF2/filters.py

@@ -0,0 +1,424 @@
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+"""
+Implementation of stream filters for PDF.
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+import math
+
+from .utils import PdfReadError, ord_, paethPredictor
+from sys import version_info
+if version_info < ( 3, 0 ):
+    from cStringIO import StringIO
+else:
+    from io import StringIO
+    import struct
+
+try:
+    import zlib
+
+    def decompress(data):
+        return zlib.decompress(data)
+
+    def compress(data):
+        return zlib.compress(data)
+
+except ImportError:
+    # Unable to import zlib.  Attempt to use the System.IO.Compression
+    # library from the .NET framework. (IronPython only)
+    import System
+    from System import IO, Collections, Array
+
+    def _string_to_bytearr(buf):
+        retval = Array.CreateInstance(System.Byte, len(buf))
+        for i in range(len(buf)):
+            retval[i] = ord(buf[i])
+        return retval
+
+    def _bytearr_to_string(bytes):
+        retval = ""
+        for i in range(bytes.Length):
+            retval += chr(bytes[i])
+        return retval
+
+    def _read_bytes(stream):
+        ms = IO.MemoryStream()
+        buf = Array.CreateInstance(System.Byte, 2048)
+        while True:
+            bytes = stream.Read(buf, 0, buf.Length)
+            if bytes == 0:
+                break
+            else:
+                ms.Write(buf, 0, bytes)
+        retval = ms.ToArray()
+        ms.Close()
+        return retval
+
+    def decompress(data):
+        bytes = _string_to_bytearr(data)
+        ms = IO.MemoryStream()
+        ms.Write(bytes, 0, bytes.Length)
+        ms.Position = 0  # fseek 0
+        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
+        bytes = _read_bytes(gz)
+        retval = _bytearr_to_string(bytes)
+        gz.Close()
+        return retval
+
+    def compress(data):
+        bytes = _string_to_bytearr(data)
+        ms = IO.MemoryStream()
+        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
+        gz.Write(bytes, 0, bytes.Length)
+        gz.Close()
+        ms.Position = 0 # fseek 0
+        bytes = ms.ToArray()
+        retval = _bytearr_to_string(bytes)
+        ms.Close()
+        return retval
+
+
+class FlateDecode(object):
+    def decode(data, decodeParms):
+        data = decompress(data)
+        predictor = 1
+        if decodeParms:
+            try:
+                predictor = decodeParms.get("/Predictor", 1)
+            except AttributeError:
+                pass    # usually an array with a null object was read
+
+        # predictor 1 == no predictor
+        if predictor != 1:
+            columns = decodeParms["/Columns"]
+            # PNG prediction:
+            if predictor >= 10 and predictor <= 15:
+                output = StringIO()
+                # PNG prediction can vary from row to row
+                rowlength = columns + 1
+                assert len(data) % rowlength == 0
+                prev_rowdata = (0,) * rowlength
+                for row in range(len(data) // rowlength):
+                    rowdata = [ord_(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
+                    filterByte = rowdata[0]
+                    if filterByte == 0:
+                        pass
+                    elif filterByte == 1:
+                        for i in range(2, rowlength):
+                            rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
+                    elif filterByte == 2:
+                        for i in range(1, rowlength):
+                            rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
+                    elif filterByte == 3:
+                        for i in range(1, rowlength):
+                            left = rowdata[i-1] if i > 1 else 0
+                            floor = math.floor(left + prev_rowdata[i])/2
+                            rowdata[i] = (rowdata[i] + int(floor)) % 256
+                    elif filterByte == 4:
+                        for i in range(1, rowlength):
+                            left = rowdata[i - 1] if i > 1 else 0
+                            up = prev_rowdata[i]
+                            up_left = prev_rowdata[i - 1] if i > 1 else 0
+                            paeth = paethPredictor(left, up, up_left)
+                            rowdata[i] = (rowdata[i] + paeth) % 256
+                    else:
+                        # unsupported PNG filter
+                        raise PdfReadError("Unsupported PNG filter %r" % filterByte)
+                    prev_rowdata = rowdata
+                    output.write(''.join([chr(x) for x in rowdata[1:]]))
+                data = output.getvalue()
+            else:
+                # unsupported predictor
+                raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
+        return data
+    decode = staticmethod(decode)
+
+    def encode(data):
+        return compress(data)
+    encode = staticmethod(encode)
+
+
+class ASCIIHexDecode(object):
+    def decode(data, decodeParms=None):
+        retval = ""
+        char = ""
+        x = 0
+        while True:
+            c = data[x]
+            if c == ">":
+                break
+            elif c.isspace():
+                x += 1
+                continue
+            char += c
+            if len(char) == 2:
+                retval += chr(int(char, base=16))
+                char = ""
+            x += 1
+        assert char == ""
+        return retval
+    decode = staticmethod(decode)
+
+
+class LZWDecode(object):
+    """Taken from:
+    http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
+    """
+    class decoder(object):
+        def __init__(self, data):
+            self.STOP=257
+            self.CLEARDICT=256
+            self.data=data
+            self.bytepos=0
+            self.bitpos=0
+            self.dict=[""]*4096
+            for i in range(256):
+                self.dict[i]=chr(i)
+            self.resetDict()
+
+        def resetDict(self):
+            self.dictlen=258
+            self.bitspercode=9
+
+        def nextCode(self):
+            fillbits=self.bitspercode
+            value=0
+            while fillbits>0 :
+                if self.bytepos >= len(self.data):
+                    return -1
+                nextbits=ord_(self.data[self.bytepos])
+                bitsfromhere=8-self.bitpos
+                if bitsfromhere>fillbits:
+                    bitsfromhere=fillbits
+                value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
+                           (0xff >> (8-bitsfromhere))) <<
+                          (fillbits-bitsfromhere))
+                fillbits -= bitsfromhere
+                self.bitpos += bitsfromhere
+                if self.bitpos >=8:
+                    self.bitpos=0
+                    self.bytepos = self.bytepos+1
+            return value
+
+        def decode(self):
+            """ algorithm derived from:
+            http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html
+            and the PDFReference
+            """
+            cW = self.CLEARDICT;
+            baos=""
+            while True:
+                pW = cW;
+                cW = self.nextCode();
+                if cW == -1:
+                    raise PdfReadError("Missed the stop code in LZWDecode!")
+                if cW == self.STOP:
+                    break;
+                elif cW == self.CLEARDICT:
+                    self.resetDict();
+                elif pW == self.CLEARDICT:
+                    baos+=self.dict[cW]
+                else:
+                    if cW < self.dictlen:
+                        baos += self.dict[cW]
+                        p=self.dict[pW]+self.dict[cW][0]
+                        self.dict[self.dictlen]=p
+                        self.dictlen+=1
+                    else:
+                        p=self.dict[pW]+self.dict[pW][0]
+                        baos+=p
+                        self.dict[self.dictlen] = p;
+                        self.dictlen+=1
+                    if (self.dictlen >= (1 << self.bitspercode) - 1 and
+                        self.bitspercode < 12):
+                        self.bitspercode+=1
+            return baos
+
+    @staticmethod
+    def decode(data,decodeParams=None):
+        return LZWDecode.decoder(data).decode()
+
+
+class ASCII85Decode(object):
+    def decode(data, decodeParms=None):
+        if version_info < ( 3, 0 ):
+            retval = ""
+            group = []
+            x = 0
+            hitEod = False
+            # remove all whitespace from data
+            data = [y for y in data if not (y in ' \n\r\t')]
+            while not hitEod:
+                c = data[x]
+                if len(retval) == 0 and c == "<" and data[x+1] == "~":
+                    x += 2
+                    continue
+                #elif c.isspace():
+                #    x += 1
+                #    continue
+                elif c == 'z':
+                    assert len(group) == 0
+                    retval += '\x00\x00\x00\x00'
+                    x += 1
+                    continue
+                elif c == "~" and data[x+1] == ">":
+                    if len(group) != 0:
+                        # cannot have a final group of just 1 char
+                        assert len(group) > 1
+                        cnt = len(group) - 1
+                        group += [ 85, 85, 85 ]
+                        hitEod = cnt
+                    else:
+                        break
+                else:
+                    c = ord(c) - 33
+                    assert c >= 0 and c < 85
+                    group += [ c ]
+                if len(group) >= 5:
+                    b = group[0] * (85**4) + \
+                        group[1] * (85**3) + \
+                        group[2] * (85**2) + \
+                        group[3] * 85 + \
+                        group[4]
+                    assert b < (2**32 - 1)
+                    c4 = chr((b >> 0) % 256)
+                    c3 = chr((b >> 8) % 256)
+                    c2 = chr((b >> 16) % 256)
+                    c1 = chr(b >> 24)
+                    retval += (c1 + c2 + c3 + c4)
+                    if hitEod:
+                        retval = retval[:-4+hitEod]
+                    group = []
+                x += 1
+            return retval
+        else:
+            if isinstance(data, str):
+                data = data.encode('ascii')
+            n = b = 0
+            out = bytearray()
+            for c in data:
+                if ord('!') <= c and c <= ord('u'):
+                    n += 1
+                    b = b*85+(c-33)
+                    if n == 5:
+                        out += struct.pack(b'>L',b)
+                        n = b = 0
+                elif c == ord('z'):
+                    assert n == 0
+                    out += b'\0\0\0\0'
+                elif c == ord('~'):
+                    if n:
+                        for _ in range(5-n):
+                            b = b*85+84
+                        out += struct.pack(b'>L',b)[:n-1]
+                    break
+            return bytes(out)
+    decode = staticmethod(decode)
+
+class DCTDecode(object):
+    def decode(data, decodeParms=None):
+        return data
+    decode = staticmethod(decode)
+
+class JPXDecode(object):
+    def decode(data, decodeParms=None):
+        return data
+    decode = staticmethod(decode)
+
+class CCITTFaxDecode(object):
+    def decode(data, decodeParms=None, height=0):
+        if decodeParms:
+            if decodeParms.get("/K", 1) == -1:
+                CCITTgroup = 4
+            else:
+                CCITTgroup = 3
+
+        width = decodeParms["/Columns"]
+        imgSize = len(data)
+        tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h'
+        tiffHeader = struct.pack(tiff_header_struct,
+                           b'II',  # Byte order indication: Little endian
+                           42,  # Version number (always 42)
+                           8,  # Offset to first IFD
+                           8,  # Number of tags in IFD
+                           256, 4, 1, width,  # ImageWidth, LONG, 1, width
+                           257, 4, 1, height,  # ImageLength, LONG, 1, length
+                           258, 3, 1, 1,  # BitsPerSample, SHORT, 1, 1
+                           259, 3, 1, CCITTgroup,  # Compression, SHORT, 1, 4 = CCITT Group 4 fax encoding
+                           262, 3, 1, 0,  # Thresholding, SHORT, 1, 0 = WhiteIsZero
+                           273, 4, 1, struct.calcsize(tiff_header_struct),  # StripOffsets, LONG, 1, length of header
+                           278, 4, 1, height,  # RowsPerStrip, LONG, 1, length
+                           279, 4, 1, imgSize,  # StripByteCounts, LONG, 1, size of image
+                           0  # last IFD
+                           )
+
+        return tiffHeader + data
+
+    decode = staticmethod(decode)
+
+def decodeStreamData(stream):
+    from .generic import NameObject
+    filters = stream.get("/Filter", ())
+
+    if len(filters) and not isinstance(filters[0], NameObject):
+        # we have a single filter instance
+        filters = (filters,)
+    data = stream._data
+    # If there is not data to decode we should not try to decode the data.
+    if data:
+        for filterType in filters:
+            if filterType == "/FlateDecode" or filterType == "/Fl":
+                data = FlateDecode.decode(data, stream.get("/DecodeParms"))
+            elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
+                data = ASCIIHexDecode.decode(data)
+            elif filterType == "/LZWDecode" or filterType == "/LZW":
+                data = LZWDecode.decode(data, stream.get("/DecodeParms"))
+            elif filterType == "/ASCII85Decode" or filterType == "/A85":
+                data = ASCII85Decode.decode(data)
+            elif filterType == "/DCTDecode":
+                data = DCTDecode.decode(data)
+            elif filterType == "/JPXDecode":
+                data = JPXDecode.decode(data)
+            elif filterType == "/CCITTFaxDecode":
+                height = stream.get("/Height", ())
+                data = CCITTFaxDecode.decode(data, stream.get("/DecodeParms"), height)
+            elif filterType == "/Crypt":
+                decodeParams = stream.get("/DecodeParams", {})
+                if "/Name" not in decodeParams and "/Type" not in decodeParams:
+                    pass
+                else:
+                    raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
+            else:
+                # unsupported filter
+                raise NotImplementedError("unsupported filter %s" % filterType)
+    return data

+ 1227 - 0
need_package/PyPDF2-master/PyPDF2/generic.py

@@ -0,0 +1,1227 @@
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+"""
+Implementation of generic PDF objects (dictionary, number, string, and so on)
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+import re
+from .utils import readNonWhitespace, RC4_encrypt, skipOverComment
+from .utils import b_, u_, chr_, ord_
+from .utils import PdfStreamError
+import warnings
+from . import filters
+from . import utils
+import decimal
+import codecs
+
+ObjectPrefix = b_('/<[tf(n%')
+NumberSigns = b_('+-')
+IndirectPattern = re.compile(b_(r"[+-]?(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
+
+
+def readObject(stream, pdf):
+    tok = stream.read(1)
+    stream.seek(-1, 1) # reset to start
+    idx = ObjectPrefix.find(tok)
+    if idx == 0:
+        # name object
+        return NameObject.readFromStream(stream, pdf)
+    elif idx == 1:
+        # hexadecimal string OR dictionary
+        peek = stream.read(2)
+        stream.seek(-2, 1) # reset to start
+        if peek == b_('<<'):
+            return DictionaryObject.readFromStream(stream, pdf)
+        else:
+            return readHexStringFromStream(stream)
+    elif idx == 2:
+        # array object
+        return ArrayObject.readFromStream(stream, pdf)
+    elif idx == 3 or idx == 4:
+        # boolean object
+        return BooleanObject.readFromStream(stream)
+    elif idx == 5:
+        # string object
+        return readStringFromStream(stream)
+    elif idx == 6:
+        # null object
+        return NullObject.readFromStream(stream)
+    elif idx == 7:
+        # comment
+        while tok not in (b_('\r'), b_('\n')):
+            tok = stream.read(1)
+            # Prevents an infinite loop by raising an error if the stream is at
+            # the EOF
+            if len(tok) <= 0:
+                raise PdfStreamError("File ended unexpectedly.")
+        tok = readNonWhitespace(stream)
+        stream.seek(-1, 1)
+        return readObject(stream, pdf)
+    else:
+        # number object OR indirect reference
+        peek = stream.read(20)
+        stream.seek(-len(peek), 1) # reset to start
+        if IndirectPattern.match(peek) != None:
+            return IndirectObject.readFromStream(stream, pdf)
+        else:
+            return NumberObject.readFromStream(stream)
+
+
+class PdfObject(object):
+    def getObject(self):
+        """Resolves indirect references."""
+        return self
+
+
+class NullObject(PdfObject):
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_("null"))
+
+    def readFromStream(stream):
+        nulltxt = stream.read(4)
+        if nulltxt != b_("null"):
+            raise utils.PdfReadError("Could not read Null object")
+        return NullObject()
+    readFromStream = staticmethod(readFromStream)
+
+
+class BooleanObject(PdfObject):
+    def __init__(self, value):
+        self.value = value
+
+    def writeToStream(self, stream, encryption_key):
+        if self.value:
+            stream.write(b_("true"))
+        else:
+            stream.write(b_("false"))
+
+    def readFromStream(stream):
+        word = stream.read(4)
+        if word == b_("true"):
+            return BooleanObject(True)
+        elif word == b_("fals"):
+            stream.read(1)
+            return BooleanObject(False)
+        else:
+            raise utils.PdfReadError('Could not read Boolean object')
+    readFromStream = staticmethod(readFromStream)
+
+
+class ArrayObject(list, PdfObject):
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_("["))
+        for data in self:
+            stream.write(b_(" "))
+            data.writeToStream(stream, encryption_key)
+        stream.write(b_(" ]"))
+
+    def readFromStream(stream, pdf):
+        arr = ArrayObject()
+        tmp = stream.read(1)
+        if tmp != b_("["):
+            raise utils.PdfReadError("Could not read array")
+        while True:
+            # skip leading whitespace
+            tok = stream.read(1)
+            while tok.isspace():
+                tok = stream.read(1)
+            stream.seek(-1, 1)
+            # check for array ending
+            peekahead = stream.read(1)
+            if peekahead == b_("]"):
+                break
+            stream.seek(-1, 1)
+            # read and append obj
+            arr.append(readObject(stream, pdf))
+        return arr
+    readFromStream = staticmethod(readFromStream)
+
+
+class IndirectObject(PdfObject):
+    def __init__(self, idnum, generation, pdf):
+        self.idnum = idnum
+        self.generation = generation
+        self.pdf = pdf
+
+    def getObject(self):
+        return self.pdf.getObject(self).getObject()
+
+    def __repr__(self):
+        return "IndirectObject(%r, %r)" % (self.idnum, self.generation)
+
+    def __eq__(self, other):
+        return (
+            other != None and
+            isinstance(other, IndirectObject) and
+            self.idnum == other.idnum and
+            self.generation == other.generation and
+            self.pdf is other.pdf
+            )
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_("%s %s R" % (self.idnum, self.generation)))
+
+    def readFromStream(stream, pdf):
+        idnum = b_("")
+        while True:
+            tok = stream.read(1)
+            if not tok:
+                # stream has truncated prematurely
+                raise PdfStreamError("Stream has ended unexpectedly")
+            if tok.isspace():
+                break
+            idnum += tok
+        generation = b_("")
+        while True:
+            tok = stream.read(1)
+            if not tok:
+                # stream has truncated prematurely
+                raise PdfStreamError("Stream has ended unexpectedly")
+            if tok.isspace():
+                if not generation:
+                    continue
+                break
+            generation += tok
+        r = readNonWhitespace(stream)
+        if r != b_("R"):
+            raise utils.PdfReadError("Error reading indirect object reference at byte %s" % utils.hexStr(stream.tell()))
+        return IndirectObject(int(idnum), int(generation), pdf)
+    readFromStream = staticmethod(readFromStream)
+
+
+class FloatObject(decimal.Decimal, PdfObject):
+    def __new__(cls, value="0", context=None):
+        try:
+            return decimal.Decimal.__new__(cls, utils.str_(value), context)
+        except Exception:
+            return decimal.Decimal.__new__(cls, str(value))
+
+    def __repr__(self):
+        if self == self.to_integral():
+            return str(self.quantize(decimal.Decimal(1)))
+        else:
+            # Standard formatting adds useless extraneous zeros.
+            o = "%.5f" % self
+            # Remove the zeros.
+            while o and o[-1] == '0':
+                o = o[:-1]
+            return o
+
+    def as_numeric(self):
+        return float(b_(repr(self)))
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_(repr(self)))
+
+
+class NumberObject(int, PdfObject):
+    NumberPattern = re.compile(b_('[^+-.0-9]'))
+    ByteDot = b_(".")
+
+    def __new__(cls, value):
+        val = int(value)
+        try:
+            return int.__new__(cls, val)
+        except OverflowError:
+            return int.__new__(cls, 0)
+
+    def as_numeric(self):
+        return int(b_(repr(self)))
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_(repr(self)))
+
+    def readFromStream(stream):
+        num = utils.readUntilRegex(stream, NumberObject.NumberPattern)
+        if num.find(NumberObject.ByteDot) != -1:
+            return FloatObject(num)
+        else:
+            return NumberObject(num)
+    readFromStream = staticmethod(readFromStream)
+
+
+##
+# Given a string (either a "str" or "unicode"), create a ByteStringObject or a
+# TextStringObject to represent the string.
+def createStringObject(string):
+    if isinstance(string, utils.string_type):
+        return TextStringObject(string)
+    elif isinstance(string, utils.bytes_type):
+        try:
+            if string.startswith(codecs.BOM_UTF16_BE):
+                retval = TextStringObject(string.decode("utf-16"))
+                retval.autodetect_utf16 = True
+                return retval
+            else:
+                # This is probably a big performance hit here, but we need to
+                # convert string objects into the text/unicode-aware version if
+                # possible... and the only way to check if that's possible is
+                # to try.  Some strings are strings, some are just byte arrays.
+                retval = TextStringObject(decode_pdfdocencoding(string))
+                retval.autodetect_pdfdocencoding = True
+                return retval
+        except UnicodeDecodeError:
+            return ByteStringObject(string)
+    else:
+        raise TypeError("createStringObject should have str or unicode arg")
+
+
+def readHexStringFromStream(stream):
+    stream.read(1)
+    txt = ""
+    x = b_("")
+    while True:
+        tok = readNonWhitespace(stream)
+        if not tok:
+            # stream has truncated prematurely
+            raise PdfStreamError("Stream has ended unexpectedly")
+        if tok == b_(">"):
+            break
+        x += tok
+        if len(x) == 2:
+            txt += chr(int(x, base=16))
+            x = b_("")
+    if len(x) == 1:
+        x += b_("0")
+    if len(x) == 2:
+        txt += chr(int(x, base=16))
+    return createStringObject(b_(txt))
+
+
+def readStringFromStream(stream):
+    tok = stream.read(1)
+    parens = 1
+    txt = b_("")
+    while True:
+        tok = stream.read(1)
+        if not tok:
+            # stream has truncated prematurely
+            raise PdfStreamError("Stream has ended unexpectedly")
+        if tok == b_("("):
+            parens += 1
+        elif tok == b_(")"):
+            parens -= 1
+            if parens == 0:
+                break
+        elif tok == b_("\\"):
+            tok = stream.read(1)
+            ESCAPE_DICT = {b_("n") : b_("\n"),
+                           b_("r") : b_("\r"),
+                           b_("t") : b_("\t"),
+                           b_("b") : b_("\b"),
+                           b_("f") : b_("\f"),
+                           b_("c") : b_(r"\c"),
+                           b_("(") : b_("("),
+                           b_(")") : b_(")"),
+                           b_("/") : b_("/"),
+                           b_("\\") : b_("\\"),
+                           b_(" ") : b_(" "),
+                           b_("/") : b_("/"),
+                           b_("%") : b_("%"),
+                           b_("<") : b_("<"),
+                           b_(">") : b_(">"),
+                           b_("[") : b_("["),
+                           b_("]") : b_("]"),
+                           b_("#") : b_("#"),
+                           b_("_") : b_("_"),
+                           b_("&") : b_("&"),
+                           b_('$') : b_('$'),
+                           }
+            try:
+                tok = ESCAPE_DICT[tok]
+            except KeyError:
+                if tok.isdigit():
+                    # "The number ddd may consist of one, two, or three
+                    # octal digits; high-order overflow shall be ignored.
+                    # Three octal digits shall be used, with leading zeros
+                    # as needed, if the next character of the string is also
+                    # a digit." (PDF reference 7.3.4.2, p 16)
+                    for _ in range(2):
+                        ntok = stream.read(1)
+                        if ntok.isdigit():
+                            tok += ntok
+                        else:
+                            break
+                    tok = b_(chr(int(tok, base=8)))
+                elif tok in b_("\n\r"):
+                    # This case is  hit when a backslash followed by a line
+                    # break occurs.  If it's a multi-char EOL, consume the
+                    # second character:
+                    tok = stream.read(1)
+                    if not tok in b_("\n\r"):
+                        stream.seek(-1, 1)
+                    # Then don't add anything to the actual string, since this
+                    # line break was escaped:
+                    tok = b_('')
+                else:
+                    raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok)
+        txt += tok
+    return createStringObject(txt)
+
+
+##
+# Represents a string object where the text encoding could not be determined.
+# This occurs quite often, as the PDF spec doesn't provide an alternate way to
+# represent strings -- for example, the encryption data stored in files (like
+# /O) is clearly not text, but is still stored in a "String" object.
+class ByteStringObject(utils.bytes_type, PdfObject):
+
+    ##
+    # For compatibility with TextStringObject.original_bytes.  This method
+    # returns self.
+    original_bytes = property(lambda self: self)
+
+    def writeToStream(self, stream, encryption_key):
+        bytearr = self
+        if encryption_key:
+            bytearr = RC4_encrypt(encryption_key, bytearr)
+        stream.write(b_("<"))
+        stream.write(utils.hexencode(bytearr))
+        stream.write(b_(">"))
+
+
+##
+# Represents a string object that has been decoded into a real unicode string.
+# If read from a PDF document, this string appeared to match the
+# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
+# occur.
+class TextStringObject(utils.string_type, PdfObject):
+    autodetect_pdfdocencoding = False
+    autodetect_utf16 = False
+
+    ##
+    # It is occasionally possible that a text string object gets created where
+    # a byte string object was expected due to the autodetection mechanism --
+    # if that occurs, this "original_bytes" property can be used to
+    # back-calculate what the original encoded bytes were.
+    original_bytes = property(lambda self: self.get_original_bytes())
+
+    def get_original_bytes(self):
+        # We're a text string object, but the library is trying to get our raw
+        # bytes.  This can happen if we auto-detected this string as text, but
+        # we were wrong.  It's pretty common.  Return the original bytes that
+        # would have been used to create this object, based upon the autodetect
+        # method.
+        if self.autodetect_utf16:
+            return codecs.BOM_UTF16_BE + self.encode("utf-16be")
+        elif self.autodetect_pdfdocencoding:
+            return encode_pdfdocencoding(self)
+        else:
+            raise Exception("no information about original bytes")
+
+    def writeToStream(self, stream, encryption_key):
+        # Try to write the string out as a PDFDocEncoding encoded string.  It's
+        # nicer to look at in the PDF file.  Sadly, we take a performance hit
+        # here for trying...
+        try:
+            bytearr = encode_pdfdocencoding(self)
+        except UnicodeEncodeError:
+            bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
+        if encryption_key:
+            bytearr = RC4_encrypt(encryption_key, bytearr)
+            obj = ByteStringObject(bytearr)
+            obj.writeToStream(stream, None)
+        else:
+            stream.write(b_("("))
+            for c in bytearr:
+                if not chr_(c).isalnum() and c != b_(' '):
+                    stream.write(b_("\\%03o" % ord_(c)))
+                else:
+                    stream.write(b_(chr_(c)))
+            stream.write(b_(")"))
+
+
+class NameObject(str, PdfObject):
+    delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]"))
+    surfix = b_("/")
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_(self))
+
+    def readFromStream(stream, pdf):
+        debug = False
+        if debug: print((stream.tell()))
+        name = stream.read(1)
+        if name != NameObject.surfix:
+            raise utils.PdfReadError("name read error")
+        name += utils.readUntilRegex(stream, NameObject.delimiterPattern,
+                                     ignore_eof=True)
+        if debug: print(name)
+        try:
+            return NameObject(name.decode('utf-8'))
+        except (UnicodeEncodeError, UnicodeDecodeError) as e:
+            # Name objects should represent irregular characters
+            # with a '#' followed by the symbol's hex number
+            if not pdf.strict:
+                warnings.warn("Illegal character in Name Object", utils.PdfReadWarning)
+                return NameObject(name)
+            else:
+                raise utils.PdfReadError("Illegal character in Name Object")
+
+    readFromStream = staticmethod(readFromStream)
+
+
+class DictionaryObject(dict, PdfObject):
+    def raw_get(self, key):
+        return dict.__getitem__(self, key)
+
+    def __setitem__(self, key, value):
+        if not isinstance(key, PdfObject):
+            raise ValueError("key must be PdfObject")
+        if not isinstance(value, PdfObject):
+            raise ValueError("value must be PdfObject")
+        return dict.__setitem__(self, key, value)
+
+    def setdefault(self, key, value=None):
+        if not isinstance(key, PdfObject):
+            raise ValueError("key must be PdfObject")
+        if not isinstance(value, PdfObject):
+            raise ValueError("value must be PdfObject")
+        return dict.setdefault(self, key, value)
+
+    def __getitem__(self, key):
+        return dict.__getitem__(self, key).getObject()
+
+    ##
+    # Retrieves XMP (Extensible Metadata Platform) data relevant to the
+    # this object, if available.
+    # <p>
+    # Stability: Added in v1.12, will exist for all future v1.x releases.
+    # @return Returns a {@link #xmp.XmpInformation XmlInformation} instance
+    # that can be used to access XMP metadata from the document.  Can also
+    # return None if no metadata was found on the document root.
+    def getXmpMetadata(self):
+        metadata = self.get("/Metadata", None)
+        if metadata == None:
+            return None
+        metadata = metadata.getObject()
+        from . import xmp
+        if not isinstance(metadata, xmp.XmpInformation):
+            metadata = xmp.XmpInformation(metadata)
+            self[NameObject("/Metadata")] = metadata
+        return metadata
+
+    ##
+    # Read-only property that accesses the {@link
+    # #DictionaryObject.getXmpData getXmpData} function.
+    # <p>
+    # Stability: Added in v1.12, will exist for all future v1.x releases.
+    xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None)
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_("<<\n"))
+        for key, value in list(self.items()):
+            key.writeToStream(stream, encryption_key)
+            stream.write(b_(" "))
+            value.writeToStream(stream, encryption_key)
+            stream.write(b_("\n"))
+        stream.write(b_(">>"))
+
+    def readFromStream(stream, pdf):
+        debug = False
+        tmp = stream.read(2)
+        if tmp != b_("<<"):
+            raise utils.PdfReadError("Dictionary read error at byte %s: stream must begin with '<<'" % utils.hexStr(stream.tell()))
+        data = {}
+        while True:
+            tok = readNonWhitespace(stream)
+            if tok == b_('\x00'):
+                continue
+            elif tok == b_('%'):
+                stream.seek(-1, 1)
+                skipOverComment(stream)
+                continue
+            if not tok:
+                # stream has truncated prematurely
+                raise PdfStreamError("Stream has ended unexpectedly")
+
+            if debug: print(("Tok:", tok))
+            if tok == b_(">"):
+                stream.read(1)
+                break
+            stream.seek(-1, 1)
+            key = readObject(stream, pdf)
+            tok = readNonWhitespace(stream)
+            stream.seek(-1, 1)
+            value = readObject(stream, pdf)
+            if not data.get(key):
+                data[key] = value
+            elif pdf.strict:
+                # multiple definitions of key not permitted
+                raise utils.PdfReadError("Multiple definitions in dictionary at byte %s for key %s" \
+                                         % (utils.hexStr(stream.tell()), key))
+            else:
+                warnings.warn("Multiple definitions in dictionary at byte %s for key %s" \
+                              % (utils.hexStr(stream.tell()), key), utils.PdfReadWarning)
+
+        pos = stream.tell()
+        s = readNonWhitespace(stream)
+        if s == b_('s') and stream.read(5) == b_('tream'):
+            eol = stream.read(1)
+            # odd PDF file output has spaces after 'stream' keyword but before EOL.
+            # patch provided by Danial Sandler
+            while eol == b_(' '):
+                eol = stream.read(1)
+            assert eol in (b_("\n"), b_("\r"))
+            if eol == b_("\r"):
+                # read \n after
+                if stream.read(1)  != b_('\n'):
+                    stream.seek(-1, 1)
+            # this is a stream object, not a dictionary
+            assert "/Length" in data
+            length = data["/Length"]
+            if debug: print(data)
+            if isinstance(length, IndirectObject):
+                t = stream.tell()
+                length = pdf.getObject(length)
+                stream.seek(t, 0)
+            data["__streamdata__"] = stream.read(length)
+            if debug: print("here")
+            #if debug: print(binascii.hexlify(data["__streamdata__"]))
+            e = readNonWhitespace(stream)
+            ndstream = stream.read(8)
+            if (e + ndstream) != b_("endstream"):
+                # (sigh) - the odd PDF file has a length that is too long, so
+                # we need to read backwards to find the "endstream" ending.
+                # ReportLab (unknown version) generates files with this bug,
+                # and Python users into PDF files tend to be our audience.
+                # we need to do this to correct the streamdata and chop off
+                # an extra character.
+                pos = stream.tell()
+                stream.seek(-10, 1)
+                end = stream.read(9)
+                if end == b_("endstream"):
+                    # we found it by looking back one character further.
+                    data["__streamdata__"] = data["__streamdata__"][:-1]
+                else:
+                    if debug: print(("E", e, ndstream, debugging.toHex(end)))
+                    stream.seek(pos, 0)
+                    raise utils.PdfReadError("Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell()))
+        else:
+            stream.seek(pos, 0)
+        if "__streamdata__" in data:
+            return StreamObject.initializeFromDictionary(data)
+        else:
+            retval = DictionaryObject()
+            retval.update(data)
+            return retval
+    readFromStream = staticmethod(readFromStream)
+
+
+class TreeObject(DictionaryObject):
+    def __init__(self):
+        DictionaryObject.__init__(self)
+
+    def hasChildren(self):
+        return '/First' in self
+
+    def __iter__(self):
+        return self.children()
+
+    def children(self):
+        if not self.hasChildren():
+            raise StopIteration
+
+        child = self['/First']
+        while True:
+            yield child
+            if child == self['/Last']:
+                raise StopIteration
+            child = child['/Next']
+
+    def addChild(self, child, pdf):
+        childObj = child.getObject()
+        child = pdf.getReference(childObj)
+        assert isinstance(child, IndirectObject)
+
+        if '/First' not in self:
+            self[NameObject('/First')] = child
+            self[NameObject('/Count')] = NumberObject(0)
+            prev = None
+        else:
+            prev = self['/Last']
+
+        self[NameObject('/Last')] = child
+        self[NameObject('/Count')] = NumberObject(self[NameObject('/Count')] + 1)
+
+        if prev:
+            prevRef = pdf.getReference(prev)
+            assert isinstance(prevRef, IndirectObject)
+            childObj[NameObject('/Prev')] = prevRef
+            prev[NameObject('/Next')] = child
+
+        parentRef = pdf.getReference(self)
+        assert isinstance(parentRef, IndirectObject)
+        childObj[NameObject('/Parent')] = parentRef
+
+    def removeChild(self, child):
+        childObj = child.getObject()
+
+        if NameObject('/Parent') not in childObj:
+            raise ValueError("Removed child does not appear to be a tree item")
+        elif childObj[NameObject('/Parent')] != self:
+            raise ValueError("Removed child is not a member of this tree")
+
+        found = False
+        prevRef = None
+        prev = None
+        curRef = self[NameObject('/First')]
+        cur = curRef.getObject()
+        lastRef = self[NameObject('/Last')]
+        last = lastRef.getObject()
+        while cur != None:
+            if cur == childObj:
+                if prev == None:
+                    if NameObject('/Next') in cur:
+                        # Removing first tree node
+                        nextRef = cur[NameObject('/Next')]
+                        next = nextRef.getObject()
+                        del next[NameObject('/Prev')]
+                        self[NameObject('/First')] = nextRef
+                        self[NameObject('/Count')] = self[NameObject('/Count')] - 1
+
+                    else:
+                        # Removing only tree node
+                        assert self[NameObject('/Count')] == 1
+                        del self[NameObject('/Count')]
+                        del self[NameObject('/First')]
+                        if NameObject('/Last') in self:
+                            del self[NameObject('/Last')]
+                else:
+                    if NameObject('/Next') in cur:
+                        # Removing middle tree node
+                        nextRef = cur[NameObject('/Next')]
+                        next = nextRef.getObject()
+                        next[NameObject('/Prev')] = prevRef
+                        prev[NameObject('/Next')] = nextRef
+                        self[NameObject('/Count')] = self[NameObject('/Count')] - 1
+                    else:
+                        # Removing last tree node
+                        assert cur == last
+                        del prev[NameObject('/Next')]
+                        self[NameObject('/Last')] = prevRef
+                        self[NameObject('/Count')] = self[NameObject('/Count')] - 1
+                found = True
+                break
+
+            prevRef = curRef
+            prev = cur
+            if NameObject('/Next') in cur:
+                curRef = cur[NameObject('/Next')]
+                cur = curRef.getObject()
+            else:
+                curRef = None
+                cur = None
+
+        if not found:
+            raise ValueError("Removal couldn't find item in tree")
+
+        del childObj[NameObject('/Parent')]
+        if NameObject('/Next') in childObj:
+            del childObj[NameObject('/Next')]
+        if NameObject('/Prev') in childObj:
+            del childObj[NameObject('/Prev')]
+
+    def emptyTree(self):
+        for child in self:
+            childObj = child.getObject()
+            del childObj[NameObject('/Parent')]
+            if NameObject('/Next') in childObj:
+                del childObj[NameObject('/Next')]
+            if NameObject('/Prev') in childObj:
+                del childObj[NameObject('/Prev')]
+
+        if NameObject('/Count') in self:
+            del self[NameObject('/Count')]
+        if NameObject('/First') in self:
+            del self[NameObject('/First')]
+        if NameObject('/Last') in self:
+            del self[NameObject('/Last')]
+
+
+class StreamObject(DictionaryObject):
+    def __init__(self):
+        self._data = None
+        self.decodedSelf = None
+
+    def writeToStream(self, stream, encryption_key):
+        self[NameObject("/Length")] = NumberObject(len(self._data))
+        DictionaryObject.writeToStream(self, stream, encryption_key)
+        del self["/Length"]
+        stream.write(b_("\nstream\n"))
+        data = self._data
+        if encryption_key:
+            data = RC4_encrypt(encryption_key, data)
+        stream.write(data)
+        stream.write(b_("\nendstream"))
+
+    def initializeFromDictionary(data):
+        if "/Filter" in data:
+            retval = EncodedStreamObject()
+        else:
+            retval = DecodedStreamObject()
+        retval._data = data["__streamdata__"]
+        del data["__streamdata__"]
+        del data["/Length"]
+        retval.update(data)
+        return retval
+    initializeFromDictionary = staticmethod(initializeFromDictionary)
+
+    def flateEncode(self):
+        if "/Filter" in self:
+            f = self["/Filter"]
+            if isinstance(f, ArrayObject):
+                f.insert(0, NameObject("/FlateDecode"))
+            else:
+                newf = ArrayObject()
+                newf.append(NameObject("/FlateDecode"))
+                newf.append(f)
+                f = newf
+        else:
+            f = NameObject("/FlateDecode")
+        retval = EncodedStreamObject()
+        retval[NameObject("/Filter")] = f
+        retval._data = filters.FlateDecode.encode(self._data)
+        return retval
+
+
+class DecodedStreamObject(StreamObject):
+    def getData(self):
+        return self._data
+
+    def setData(self, data):
+        self._data = data
+
+
+class EncodedStreamObject(StreamObject):
+    def __init__(self):
+        self.decodedSelf = None
+
+    def getData(self):
+        if self.decodedSelf:
+            # cached version of decoded object
+            return self.decodedSelf.getData()
+        else:
+            # create decoded object
+            decoded = DecodedStreamObject()
+
+            decoded._data = filters.decodeStreamData(self)
+            for key, value in list(self.items()):
+                if not key in ("/Length", "/Filter", "/DecodeParms"):
+                    decoded[key] = value
+            self.decodedSelf = decoded
+            return decoded._data
+
+    def setData(self, data):
+        raise utils.PdfReadError("Creating EncodedStreamObject is not currently supported")
+
+
+class RectangleObject(ArrayObject):
+    """
+    This class is used to represent *page boxes* in PyPDF2. These boxes include:
+
+        * :attr:`artBox <PyPDF2.pdf.PageObject.artBox>`
+        * :attr:`bleedBox <PyPDF2.pdf.PageObject.bleedBox>`
+        * :attr:`cropBox <PyPDF2.pdf.PageObject.cropBox>`
+        * :attr:`mediaBox <PyPDF2.pdf.PageObject.mediaBox>`
+        * :attr:`trimBox <PyPDF2.pdf.PageObject.trimBox>`
+    """
+    def __init__(self, arr):
+        # must have four points
+        assert len(arr) == 4
+        # automatically convert arr[x] into NumberObject(arr[x]) if necessary
+        ArrayObject.__init__(self, [self.ensureIsNumber(x) for x in arr])
+
+    def ensureIsNumber(self, value):
+        if not isinstance(value, (NumberObject, FloatObject)):
+            value = FloatObject(value)
+        return value
+
+    def __repr__(self):
+        return "RectangleObject(%s)" % repr(list(self))
+
+    def getLowerLeft_x(self):
+        return self[0]
+
+    def getLowerLeft_y(self):
+        return self[1]
+
+    def getUpperRight_x(self):
+        return self[2]
+
+    def getUpperRight_y(self):
+        return self[3]
+
+    def getUpperLeft_x(self):
+        return self.getLowerLeft_x()
+
+    def getUpperLeft_y(self):
+        return self.getUpperRight_y()
+
+    def getLowerRight_x(self):
+        return self.getUpperRight_x()
+
+    def getLowerRight_y(self):
+        return self.getLowerLeft_y()
+
+    def getLowerLeft(self):
+        return self.getLowerLeft_x(), self.getLowerLeft_y()
+
+    def getLowerRight(self):
+        return self.getLowerRight_x(), self.getLowerRight_y()
+
+    def getUpperLeft(self):
+        return self.getUpperLeft_x(), self.getUpperLeft_y()
+
+    def getUpperRight(self):
+        return self.getUpperRight_x(), self.getUpperRight_y()
+
+    def setLowerLeft(self, value):
+        self[0], self[1] = [self.ensureIsNumber(x) for x in value]
+
+    def setLowerRight(self, value):
+        self[2], self[1] = [self.ensureIsNumber(x) for x in value]
+
+    def setUpperLeft(self, value):
+        self[0], self[3] = [self.ensureIsNumber(x) for x in value]
+
+    def setUpperRight(self, value):
+        self[2], self[3] = [self.ensureIsNumber(x) for x in value]
+
+    def getWidth(self):
+        return self.getUpperRight_x() - self.getLowerLeft_x()
+
+    def getHeight(self):
+        return self.getUpperRight_y() - self.getLowerLeft_y()
+
+    lowerLeft = property(getLowerLeft, setLowerLeft, None, None)
+    """
+    Property to read and modify the lower left coordinate of this box
+    in (x,y) form.
+    """
+    lowerRight = property(getLowerRight, setLowerRight, None, None)
+    """
+    Property to read and modify the lower right coordinate of this box
+    in (x,y) form.
+    """
+    upperLeft = property(getUpperLeft, setUpperLeft, None, None)
+    """
+    Property to read and modify the upper left coordinate of this box
+    in (x,y) form.
+    """
+    upperRight = property(getUpperRight, setUpperRight, None, None)
+    """
+    Property to read and modify the upper right coordinate of this box
+    in (x,y) form.
+    """
+
+
+class Field(TreeObject):
+    """
+    A class representing a field dictionary. This class is accessed through
+    :meth:`getFields()<PyPDF2.PdfFileReader.getFields>`
+    """
+    def __init__(self, data):
+        DictionaryObject.__init__(self)
+        attributes = ("/FT", "/Parent", "/Kids", "/T", "/TU", "/TM", "/Ff",
+                      "/V", "/DV", "/AA")
+        for attr in attributes:
+            try:
+                self[NameObject(attr)] = data[attr]
+            except KeyError:
+                pass
+
+    fieldType = property(lambda self: self.get("/FT"))
+    """
+    Read-only property accessing the type of this field.
+    """
+
+    parent = property(lambda self: self.get("/Parent"))
+    """
+    Read-only property accessing the parent of this field.
+    """
+
+    kids = property(lambda self: self.get("/Kids"))
+    """
+    Read-only property accessing the kids of this field.
+    """
+
+    name = property(lambda self: self.get("/T"))
+    """
+    Read-only property accessing the name of this field.
+    """
+
+    altName = property(lambda self: self.get("/TU"))
+    """
+    Read-only property accessing the alternate name of this field.
+    """
+
+    mappingName = property(lambda self: self.get("/TM"))
+    """
+    Read-only property accessing the mapping name of this field. This
+    name is used by PyPDF2 as a key in the dictionary returned by
+    :meth:`getFields()<PyPDF2.PdfFileReader.getFields>`
+    """
+
+    flags = property(lambda self: self.get("/Ff"))
+    """
+    Read-only property accessing the field flags, specifying various
+    characteristics of the field (see Table 8.70 of the PDF 1.7 reference).
+    """
+
+    value = property(lambda self: self.get("/V"))
+    """
+    Read-only property accessing the value of this field. Format
+    varies based on field type.
+    """
+
+    defaultValue = property(lambda self: self.get("/DV"))
+    """
+    Read-only property accessing the default value of this field.
+    """
+
+    additionalActions = property(lambda self: self.get("/AA"))
+    """
+    Read-only property accessing the additional actions dictionary.
+    This dictionary defines the field's behavior in response to trigger events.
+    See Section 8.5.2 of the PDF 1.7 reference.
+    """
+
+
+class Destination(TreeObject):
+    """
+    A class representing a destination within a PDF file.
+    See section 8.2.1 of the PDF 1.6 reference.
+
+    :param str title: Title of this destination.
+    :param IndirectObject page: Reference to the page of this destination. Should
+        be an instance of :class:`IndirectObject<PyPDF2.generic.IndirectObject>`.
+    :param str typ: How the destination is displayed.
+    :param args: Additional arguments may be necessary depending on the type.
+    :raises PdfReadError: If destination type is invalid.
+
+    Valid ``typ`` arguments (see PDF spec for details):
+             /Fit       No additional arguments
+             /XYZ       [left] [top] [zoomFactor]
+             /FitH      [top]
+             /FitV      [left]
+             /FitR      [left] [bottom] [right] [top]
+             /FitB      No additional arguments
+             /FitBH     [top]
+             /FitBV     [left]
+    """
+    def __init__(self, title, page, typ, *args):
+        DictionaryObject.__init__(self)
+        self[NameObject("/Title")] = title
+        self[NameObject("/Page")] = page
+        self[NameObject("/Type")] = typ
+
+        # from table 8.2 of the PDF 1.7 reference.
+        if typ == "/XYZ":
+            (self[NameObject("/Left")], self[NameObject("/Top")],
+                self[NameObject("/Zoom")]) = args
+        elif typ == "/FitR":
+            (self[NameObject("/Left")], self[NameObject("/Bottom")],
+                self[NameObject("/Right")], self[NameObject("/Top")]) = args
+        elif typ in ["/FitH", "/FitBH"]:
+            self[NameObject("/Top")], = args
+        elif typ in ["/FitV", "/FitBV"]:
+            self[NameObject("/Left")], = args
+        elif typ in ["/Fit", "/FitB"]:
+            pass
+        else:
+            raise utils.PdfReadError("Unknown Destination Type: %r" % typ)
+
+    def getDestArray(self):
+        return ArrayObject([self.raw_get('/Page'), self['/Type']] + [self[x] for x in ['/Left', '/Bottom', '/Right', '/Top', '/Zoom'] if x in self])
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_("<<\n"))
+        key = NameObject('/D')
+        key.writeToStream(stream, encryption_key)
+        stream.write(b_(" "))
+        value = self.getDestArray()
+        value.writeToStream(stream, encryption_key)
+
+        key = NameObject("/S")
+        key.writeToStream(stream, encryption_key)
+        stream.write(b_(" "))
+        value = NameObject("/GoTo")
+        value.writeToStream(stream, encryption_key)
+
+        stream.write(b_("\n"))
+        stream.write(b_(">>"))
+
+    title = property(lambda self: self.get("/Title"))
+    """
+    Read-only property accessing the destination title.
+
+    :rtype: str
+    """
+
+    page = property(lambda self: self.get("/Page"))
+    """
+    Read-only property accessing the destination page number.
+
+    :rtype: int
+    """
+
+    typ = property(lambda self: self.get("/Type"))
+    """
+    Read-only property accessing the destination type.
+
+    :rtype: str
+    """
+
+    zoom = property(lambda self: self.get("/Zoom", None))
+    """
+    Read-only property accessing the zoom factor.
+
+    :rtype: int, or ``None`` if not available.
+    """
+
+    left = property(lambda self: self.get("/Left", None))
+    """
+    Read-only property accessing the left horizontal coordinate.
+
+    :rtype: int, or ``None`` if not available.
+    """
+
+    right = property(lambda self: self.get("/Right", None))
+    """
+    Read-only property accessing the right horizontal coordinate.
+
+    :rtype: int, or ``None`` if not available.
+    """
+
+    top = property(lambda self: self.get("/Top", None))
+    """
+    Read-only property accessing the top vertical coordinate.
+
+    :rtype: int, or ``None`` if not available.
+    """
+
+    bottom = property(lambda self: self.get("/Bottom", None))
+    """
+    Read-only property accessing the bottom vertical coordinate.
+
+    :rtype: int, or ``None`` if not available.
+    """
+
+
+class Bookmark(Destination):
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_("<<\n"))
+        for key in [NameObject(x) for x in ['/Title', '/Parent', '/First', '/Last', '/Next', '/Prev'] if x in self]:
+            key.writeToStream(stream, encryption_key)
+            stream.write(b_(" "))
+            value = self.raw_get(key)
+            value.writeToStream(stream, encryption_key)
+            stream.write(b_("\n"))
+        key = NameObject('/Dest')
+        key.writeToStream(stream, encryption_key)
+        stream.write(b_(" "))
+        value = self.getDestArray()
+        value.writeToStream(stream, encryption_key)
+        stream.write(b_("\n"))
+        stream.write(b_(">>"))
+
+
+def encode_pdfdocencoding(unicode_string):
+    retval = b_('')
+    for c in unicode_string:
+        try:
+            retval += b_(chr(_pdfDocEncoding_rev[c]))
+        except KeyError:
+            raise UnicodeEncodeError("pdfdocencoding", c, -1, -1,
+                    "does not exist in translation table")
+    return retval
+
+
+def decode_pdfdocencoding(byte_array):
+    retval = u_('')
+    for b in byte_array:
+        c = _pdfDocEncoding[ord_(b)]
+        if c == u_('\u0000'):
+            raise UnicodeDecodeError("pdfdocencoding", utils.barray(b), -1, -1,
+                    "does not exist in translation table")
+        retval += c
+    return retval
+
+_pdfDocEncoding = (
+  u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'),
+  u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'),
+  u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'),
+  u_('\u02d8'), u_('\u02c7'), u_('\u02c6'), u_('\u02d9'), u_('\u02dd'), u_('\u02db'), u_('\u02da'), u_('\u02dc'),
+  u_('\u0020'), u_('\u0021'), u_('\u0022'), u_('\u0023'), u_('\u0024'), u_('\u0025'), u_('\u0026'), u_('\u0027'),
+  u_('\u0028'), u_('\u0029'), u_('\u002a'), u_('\u002b'), u_('\u002c'), u_('\u002d'), u_('\u002e'), u_('\u002f'),
+  u_('\u0030'), u_('\u0031'), u_('\u0032'), u_('\u0033'), u_('\u0034'), u_('\u0035'), u_('\u0036'), u_('\u0037'),
+  u_('\u0038'), u_('\u0039'), u_('\u003a'), u_('\u003b'), u_('\u003c'), u_('\u003d'), u_('\u003e'), u_('\u003f'),
+  u_('\u0040'), u_('\u0041'), u_('\u0042'), u_('\u0043'), u_('\u0044'), u_('\u0045'), u_('\u0046'), u_('\u0047'),
+  u_('\u0048'), u_('\u0049'), u_('\u004a'), u_('\u004b'), u_('\u004c'), u_('\u004d'), u_('\u004e'), u_('\u004f'),
+  u_('\u0050'), u_('\u0051'), u_('\u0052'), u_('\u0053'), u_('\u0054'), u_('\u0055'), u_('\u0056'), u_('\u0057'),
+  u_('\u0058'), u_('\u0059'), u_('\u005a'), u_('\u005b'), u_('\u005c'), u_('\u005d'), u_('\u005e'), u_('\u005f'),
+  u_('\u0060'), u_('\u0061'), u_('\u0062'), u_('\u0063'), u_('\u0064'), u_('\u0065'), u_('\u0066'), u_('\u0067'),
+  u_('\u0068'), u_('\u0069'), u_('\u006a'), u_('\u006b'), u_('\u006c'), u_('\u006d'), u_('\u006e'), u_('\u006f'),
+  u_('\u0070'), u_('\u0071'), u_('\u0072'), u_('\u0073'), u_('\u0074'), u_('\u0075'), u_('\u0076'), u_('\u0077'),
+  u_('\u0078'), u_('\u0079'), u_('\u007a'), u_('\u007b'), u_('\u007c'), u_('\u007d'), u_('\u007e'), u_('\u0000'),
+  u_('\u2022'), u_('\u2020'), u_('\u2021'), u_('\u2026'), u_('\u2014'), u_('\u2013'), u_('\u0192'), u_('\u2044'),
+  u_('\u2039'), u_('\u203a'), u_('\u2212'), u_('\u2030'), u_('\u201e'), u_('\u201c'), u_('\u201d'), u_('\u2018'),
+  u_('\u2019'), u_('\u201a'), u_('\u2122'), u_('\ufb01'), u_('\ufb02'), u_('\u0141'), u_('\u0152'), u_('\u0160'),
+  u_('\u0178'), u_('\u017d'), u_('\u0131'), u_('\u0142'), u_('\u0153'), u_('\u0161'), u_('\u017e'), u_('\u0000'),
+  u_('\u20ac'), u_('\u00a1'), u_('\u00a2'), u_('\u00a3'), u_('\u00a4'), u_('\u00a5'), u_('\u00a6'), u_('\u00a7'),
+  u_('\u00a8'), u_('\u00a9'), u_('\u00aa'), u_('\u00ab'), u_('\u00ac'), u_('\u0000'), u_('\u00ae'), u_('\u00af'),
+  u_('\u00b0'), u_('\u00b1'), u_('\u00b2'), u_('\u00b3'), u_('\u00b4'), u_('\u00b5'), u_('\u00b6'), u_('\u00b7'),
+  u_('\u00b8'), u_('\u00b9'), u_('\u00ba'), u_('\u00bb'), u_('\u00bc'), u_('\u00bd'), u_('\u00be'), u_('\u00bf'),
+  u_('\u00c0'), u_('\u00c1'), u_('\u00c2'), u_('\u00c3'), u_('\u00c4'), u_('\u00c5'), u_('\u00c6'), u_('\u00c7'),
+  u_('\u00c8'), u_('\u00c9'), u_('\u00ca'), u_('\u00cb'), u_('\u00cc'), u_('\u00cd'), u_('\u00ce'), u_('\u00cf'),
+  u_('\u00d0'), u_('\u00d1'), u_('\u00d2'), u_('\u00d3'), u_('\u00d4'), u_('\u00d5'), u_('\u00d6'), u_('\u00d7'),
+  u_('\u00d8'), u_('\u00d9'), u_('\u00da'), u_('\u00db'), u_('\u00dc'), u_('\u00dd'), u_('\u00de'), u_('\u00df'),
+  u_('\u00e0'), u_('\u00e1'), u_('\u00e2'), u_('\u00e3'), u_('\u00e4'), u_('\u00e5'), u_('\u00e6'), u_('\u00e7'),
+  u_('\u00e8'), u_('\u00e9'), u_('\u00ea'), u_('\u00eb'), u_('\u00ec'), u_('\u00ed'), u_('\u00ee'), u_('\u00ef'),
+  u_('\u00f0'), u_('\u00f1'), u_('\u00f2'), u_('\u00f3'), u_('\u00f4'), u_('\u00f5'), u_('\u00f6'), u_('\u00f7'),
+  u_('\u00f8'), u_('\u00f9'), u_('\u00fa'), u_('\u00fb'), u_('\u00fc'), u_('\u00fd'), u_('\u00fe'), u_('\u00ff')
+)
+
+assert len(_pdfDocEncoding) == 256
+
+_pdfDocEncoding_rev = {}
+for i in range(256):
+    char = _pdfDocEncoding[i]
+    if char == u_("\u0000"):
+        continue
+    assert char not in _pdfDocEncoding_rev
+    _pdfDocEncoding_rev[char] = i

+ 553 - 0
need_package/PyPDF2-master/PyPDF2/merger.py

@@ -0,0 +1,553 @@
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from .generic import *
+from .utils import isString, str_
+from .pdf import PdfFileReader, PdfFileWriter
+from .pagerange import PageRange
+from sys import version_info
+if version_info < ( 3, 0 ):
+    from cStringIO import StringIO
+    StreamIO = StringIO
+else:
+    from io import BytesIO
+    from io import FileIO as file
+    StreamIO = BytesIO
+
+
+class _MergedPage(object):
+    """
+    _MergedPage is used internally by PdfFileMerger to collect necessary
+    information on each page that is being merged.
+    """
+    def __init__(self, pagedata, src, id):
+        self.src = src
+        self.pagedata = pagedata
+        self.out_pagedata = None
+        self.id = id
+
+
+class PdfFileMerger(object):
+    """
+    Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
+    into a single PDF. It can concatenate, slice, insert, or any combination
+    of the above.
+
+    See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
+    and :meth:`write()<write>` for usage information.
+
+    :param bool strict: Determines whether user should be warned of all
+            problems and also causes some correctable problems to be fatal.
+            Defaults to ``True``.
+    """
+
+    def __init__(self, strict=True):
+        self.inputs = []
+        self.pages = []
+        self.output = PdfFileWriter()
+        self.bookmarks = []
+        self.named_dests = []
+        self.id_count = 0
+        self.strict = strict
+
+    def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
+        """
+        Merges the pages from the given file into the output file at the
+        specified page number.
+
+        :param int position: The *page number* to insert this file. File will
+            be inserted after the given number.
+
+        :param fileobj: A File Object or an object that supports the standard read
+            and seek methods similar to a File Object. Could also be a
+            string representing a path to a PDF file.
+
+        :param str bookmark: Optionally, you may specify a bookmark to be applied at
+            the beginning of the included file by supplying the text of the bookmark.
+
+        :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
+            to merge only the specified range of pages from the source
+            document into the output document.
+
+        :param bool import_bookmarks: You may prevent the source document's bookmarks
+            from being imported by specifying this as ``False``.
+        """
+
+        # This parameter is passed to self.inputs.append and means
+        # that the stream used was created in this method.
+        my_file = False
+
+        # If the fileobj parameter is a string, assume it is a path
+        # and create a file object at that location. If it is a file,
+        # copy the file's contents into a BytesIO (or StreamIO) stream object; if
+        # it is a PdfFileReader, copy that reader's stream into a
+        # BytesIO (or StreamIO) stream.
+        # If fileobj is none of the above types, it is not modified
+        decryption_key = None
+        if isString(fileobj):
+            fileobj = file(fileobj, 'rb')
+            my_file = True
+        elif hasattr(fileobj, "seek") and hasattr(fileobj, "read"):
+            fileobj.seek(0)
+            filecontent = fileobj.read()
+            fileobj = StreamIO(filecontent)
+            my_file = True
+        elif isinstance(fileobj, PdfFileReader):
+            orig_tell = fileobj.stream.tell()
+            fileobj.stream.seek(0)
+            filecontent = StreamIO(fileobj.stream.read())
+            fileobj.stream.seek(orig_tell) # reset the stream to its original location
+            fileobj = filecontent
+            if hasattr(fileobj, '_decryption_key'):
+                decryption_key = fileobj._decryption_key
+            my_file = True
+
+        # Create a new PdfFileReader instance using the stream
+        # (either file or BytesIO or StringIO) created above
+        pdfr = PdfFileReader(fileobj, strict=self.strict)
+        if decryption_key is not None:
+            pdfr._decryption_key = decryption_key
+
+        # Find the range of pages to merge.
+        if pages == None:
+            pages = (0, pdfr.getNumPages())
+        elif isinstance(pages, PageRange):
+            pages = pages.indices(pdfr.getNumPages())
+        elif not isinstance(pages, tuple):
+            raise TypeError('"pages" must be a tuple of (start, stop[, step])')
+
+        srcpages = []
+        if bookmark:
+            bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
+
+        outline = []
+        if import_bookmarks:
+            outline = pdfr.getOutlines()
+            outline = self._trim_outline(pdfr, outline, pages)
+
+        if bookmark:
+            self.bookmarks += [bookmark, outline]
+        else:
+            self.bookmarks += outline
+
+        dests = pdfr.namedDestinations
+        dests = self._trim_dests(pdfr, dests, pages)
+        self.named_dests += dests
+
+        # Gather all the pages that are going to be merged
+        for i in range(*pages):
+            pg = pdfr.getPage(i)
+
+            id = self.id_count
+            self.id_count += 1
+
+            mp = _MergedPage(pg, pdfr, id)
+
+            srcpages.append(mp)
+
+        self._associate_dests_to_pages(srcpages)
+        self._associate_bookmarks_to_pages(srcpages)
+
+        # Slice to insert the pages at the specified position
+        self.pages[position:position] = srcpages
+
+        # Keep track of our input files so we can close them later
+        self.inputs.append((fileobj, pdfr, my_file))
+
+    def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
+        """
+        Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
+        all pages onto the end of the file instead of specifying a position.
+
+        :param fileobj: A File Object or an object that supports the standard read
+            and seek methods similar to a File Object. Could also be a
+            string representing a path to a PDF file.
+
+        :param str bookmark: Optionally, you may specify a bookmark to be applied at
+            the beginning of the included file by supplying the text of the bookmark.
+
+        :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
+            to merge only the specified range of pages from the source
+            document into the output document.
+
+        :param bool import_bookmarks: You may prevent the source document's bookmarks
+            from being imported by specifying this as ``False``.
+        """
+
+        self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
+
+    def write(self, fileobj):
+        """
+        Writes all data that has been merged to the given output file.
+
+        :param fileobj: Output file. Can be a filename or any kind of
+            file-like object.
+        """
+        my_file = False
+        if isString(fileobj):
+            fileobj = file(fileobj, 'wb')
+            my_file = True
+
+        # Add pages to the PdfFileWriter
+        # The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
+        for page in self.pages:
+            self.output.addPage(page.pagedata)
+            page.out_pagedata = self.output.getReference(self.output._pages.getObject()["/Kids"][-1].getObject())
+            #idnum = self.output._objects.index(self.output._pages.getObject()["/Kids"][-1].getObject()) + 1
+            #page.out_pagedata = IndirectObject(idnum, 0, self.output)
+
+        # Once all pages are added, create bookmarks to point at those pages
+        self._write_dests()
+        self._write_bookmarks()
+
+        # Write the output to the file
+        self.output.write(fileobj)
+
+        if my_file:
+            fileobj.close()
+
+    def close(self):
+        """
+        Shuts all file descriptors (input and output) and clears all memory
+        usage.
+        """
+        self.pages = []
+        for fo, _pdfr, mine in self.inputs:
+            if mine:
+                fo.close()
+
+        self.inputs = []
+        self.output = None
+
+    def addMetadata(self, infos):
+        """
+        Add custom metadata to the output.
+
+        :param dict infos: a Python dictionary where each key is a field
+            and each value is your new metadata.
+            Example: ``{u'/Title': u'My title'}``
+        """
+        self.output.addMetadata(infos)
+
+    def setPageLayout(self, layout):
+        """
+        Set the page layout
+
+        :param str layout: The page layout to be used
+
+        Valid layouts are:
+             /NoLayout        Layout explicitly not specified
+             /SinglePage      Show one page at a time
+             /OneColumn       Show one column at a time
+             /TwoColumnLeft   Show pages in two columns, odd-numbered pages on the left
+             /TwoColumnRight  Show pages in two columns, odd-numbered pages on the right
+             /TwoPageLeft     Show two pages at a time, odd-numbered pages on the left
+             /TwoPageRight    Show two pages at a time, odd-numbered pages on the right
+        """
+        self.output.setPageLayout(layout)
+
+    def setPageMode(self, mode):
+        """
+        Set the page mode.
+
+        :param str mode: The page mode to use.
+
+        Valid modes are:
+            /UseNone         Do not show outlines or thumbnails panels
+            /UseOutlines     Show outlines (aka bookmarks) panel
+            /UseThumbs       Show page thumbnails panel
+            /FullScreen      Fullscreen view
+            /UseOC           Show Optional Content Group (OCG) panel
+            /UseAttachments  Show attachments panel
+        """
+        self.output.setPageMode(mode)
+
+    def _trim_dests(self, pdf, dests, pages):
+        """
+        Removes any named destinations that are not a part of the specified
+        page set.
+        """
+        new_dests = []
+        prev_header_added = True
+        for k, o in list(dests.items()):
+            for j in range(*pages):
+                if pdf.getPage(j).getObject() == o['/Page'].getObject():
+                    o[NameObject('/Page')] = o['/Page'].getObject()
+                    assert str_(k) == str_(o['/Title'])
+                    new_dests.append(o)
+                    break
+        return new_dests
+
+    def _trim_outline(self, pdf, outline, pages):
+        """
+        Removes any outline/bookmark entries that are not a part of the
+        specified page set.
+        """
+        new_outline = []
+        prev_header_added = True
+        for i, o in enumerate(outline):
+            if isinstance(o, list):
+                sub = self._trim_outline(pdf, o, pages)
+                if sub:
+                    if not prev_header_added:
+                        new_outline.append(outline[i-1])
+                    new_outline.append(sub)
+            else:
+                prev_header_added = False
+                for j in range(*pages):
+                    if pdf.getPage(j).getObject() == o['/Page'].getObject():
+                        o[NameObject('/Page')] = o['/Page'].getObject()
+                        new_outline.append(o)
+                        prev_header_added = True
+                        break
+        return new_outline
+
+    def _write_dests(self):
+        dests = self.named_dests
+
+        for v in dests:
+            pageno = None
+            pdf = None
+            if '/Page' in v:
+                for i, p in enumerate(self.pages):
+                    if p.id == v['/Page']:
+                        v[NameObject('/Page')] = p.out_pagedata
+                        pageno = i
+                        pdf = p.src
+                        break
+            if pageno != None:
+                self.output.addNamedDestinationObject(v)
+
+    def _write_bookmarks(self, bookmarks=None, parent=None):
+
+        if bookmarks == None:
+            bookmarks = self.bookmarks
+
+        last_added = None
+        for b in bookmarks:
+            if isinstance(b, list):
+                self._write_bookmarks(b, last_added)
+                continue
+
+            pageno = None
+            pdf = None
+            if '/Page' in b:
+                for i, p in enumerate(self.pages):
+                    if p.id == b['/Page']:
+                        #b[NameObject('/Page')] = p.out_pagedata
+                        args = [NumberObject(p.id), NameObject(b['/Type'])]
+                        #nothing more to add
+                        #if b['/Type'] == '/Fit' or b['/Type'] == '/FitB'
+                        if b['/Type'] == '/FitH' or b['/Type'] == '/FitBH':
+                            if '/Top' in b and not isinstance(b['/Top'], NullObject):
+                                args.append(FloatObject(b['/Top']))
+                            else:
+                                args.append(FloatObject(0))
+                            del b['/Top']
+                        elif b['/Type'] == '/FitV' or b['/Type'] == '/FitBV':
+                            if '/Left' in b and not isinstance(b['/Left'], NullObject):
+                                args.append(FloatObject(b['/Left']))
+                            else:
+                                args.append(FloatObject(0))
+                            del b['/Left']
+                        elif b['/Type'] == '/XYZ':
+                            if '/Left' in b and not isinstance(b['/Left'], NullObject):
+                                args.append(FloatObject(b['/Left']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Top' in b and not isinstance(b['/Top'], NullObject):
+                                args.append(FloatObject(b['/Top']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Zoom' in b and not isinstance(b['/Zoom'], NullObject):
+                                args.append(FloatObject(b['/Zoom']))
+                            else:
+                                args.append(FloatObject(0))
+                            del b['/Top'], b['/Zoom'], b['/Left']
+                        elif b['/Type'] == '/FitR':
+                            if '/Left' in b and not isinstance(b['/Left'], NullObject):
+                                args.append(FloatObject(b['/Left']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Bottom' in b and not isinstance(b['/Bottom'], NullObject):
+                                args.append(FloatObject(b['/Bottom']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Right' in b and not isinstance(b['/Right'], NullObject):
+                                args.append(FloatObject(b['/Right']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Top' in b and not isinstance(b['/Top'], NullObject):
+                                args.append(FloatObject(b['/Top']))
+                            else:
+                                args.append(FloatObject(0))
+                            del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']
+
+                        b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
+
+                        pageno = i
+                        pdf = p.src
+                        break
+            if pageno != None:
+                del b['/Page'], b['/Type']
+                last_added = self.output.addBookmarkDict(b, parent)
+
+    def _associate_dests_to_pages(self, pages):
+        for nd in self.named_dests:
+            pageno = None
+            np = nd['/Page']
+
+            if isinstance(np, NumberObject):
+                continue
+
+            for p in pages:
+                if np.getObject() == p.pagedata.getObject():
+                    pageno = p.id
+
+            if pageno != None:
+                nd[NameObject('/Page')] = NumberObject(pageno)
+            else:
+                raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
+
+    def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
+        if bookmarks == None:
+            bookmarks = self.bookmarks
+
+        for b in bookmarks:
+            if isinstance(b, list):
+                self._associate_bookmarks_to_pages(pages, b)
+                continue
+
+            pageno = None
+            bp = b['/Page']
+
+            if isinstance(bp, NumberObject):
+                continue
+
+            for p in pages:
+                if bp.getObject() == p.pagedata.getObject():
+                    pageno = p.id
+
+            if pageno != None:
+                b[NameObject('/Page')] = NumberObject(pageno)
+            else:
+                raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
+
+    def findBookmark(self, bookmark, root=None):
+        if root == None:
+            root = self.bookmarks
+
+        for i, b in enumerate(root):
+            if isinstance(b, list):
+                res = self.findBookmark(bookmark, b)
+                if res:
+                    return [i] + res
+            elif b == bookmark or b['/Title'] == bookmark:
+                return [i]
+
+        return None
+
+    def addBookmark(self, title, pagenum, parent=None):
+        """
+        Add a bookmark to this PDF file.
+
+        :param str title: Title to use for this bookmark.
+        :param int pagenum: Page number this bookmark will point to.
+        :param parent: A reference to a parent bookmark to create nested
+            bookmarks.
+        """
+        if parent == None:
+            iloc = [len(self.bookmarks)-1]
+        elif isinstance(parent, list):
+            iloc = parent
+        else:
+            iloc = self.findBookmark(parent)
+
+        dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
+
+        if parent == None:
+            self.bookmarks.append(dest)
+        else:
+            bmparent = self.bookmarks
+            for i in iloc[:-1]:
+                bmparent = bmparent[i]
+            npos = iloc[-1]+1
+            if npos < len(bmparent) and isinstance(bmparent[npos], list):
+                bmparent[npos].append(dest)
+            else:
+                bmparent.insert(npos, [dest])
+        return dest
+
+    def addNamedDestination(self, title, pagenum):
+        """
+        Add a destination to the output.
+
+        :param str title: Title to use
+        :param int pagenum: Page number this destination points at.
+        """
+
+        dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
+        self.named_dests.append(dest)
+
+
+class OutlinesObject(list):
+    def __init__(self, pdf, tree, parent=None):
+        list.__init__(self)
+        self.tree = tree
+        self.pdf = pdf
+        self.parent = parent
+
+    def remove(self, index):
+        obj = self[index]
+        del self[index]
+        self.tree.removeChild(obj)
+
+    def add(self, title, pagenum):
+        pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
+        action = DictionaryObject()
+        action.update({
+            NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
+            NameObject('/S') : NameObject('/GoTo')
+        })
+        actionRef = self.pdf._addObject(action)
+        bookmark = TreeObject()
+
+        bookmark.update({
+            NameObject('/A'): actionRef,
+            NameObject('/Title'): createStringObject(title),
+        })
+
+        self.pdf._addObject(bookmark)
+
+        self.tree.addChild(bookmark)
+
+    def removeAll(self):
+        for child in [x for x in self.tree.children()]:
+            self.tree.removeChild(child)
+            self.pop()

+ 152 - 0
need_package/PyPDF2-master/PyPDF2/pagerange.py

@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+"""
+Representation and utils for ranges of PDF file pages.
+
+Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
+All rights reserved. This software is available under a BSD license;
+see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
+"""
+
+import re
+from .utils import isString
+
+_INT_RE = r"(0|-?[1-9]\d*)"  # A decimal int, don't allow "-0".
+PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
+# groups:         12     34     5 6     7 8
+
+
+class ParseError(Exception):
+    pass
+
+
+PAGE_RANGE_HELP = """Remember, page indices start with zero.
+        Page range expression examples:
+            :     all pages.                   -1    last page.
+            22    just the 23rd page.          :-1   all but the last page.
+            0:3   the first three pages.       -2    second-to-last page.
+            :3    the first three pages.       -2:   last two pages.
+            5:    from the sixth page onward.  -3:-1 third & second to last.
+        The third, "stride" or "step" number is also recognized.
+            ::2       0 2 4 ... to the end.    3:0:-1    3 2 1 but not 0.
+            1:10:2    1 3 5 7 9                2::-1     2 1 0.
+            ::-1      all pages in reverse order.
+"""
+
+
+class PageRange(object):
+    """
+    A slice-like representation of a range of page indices,
+        i.e. page numbers, only starting at zero.
+    The syntax is like what you would put between brackets [ ].
+    The slice is one of the few Python types that can't be subclassed,
+    but this class converts to and from slices, and allows similar use.
+      o  PageRange(str) parses a string representing a page range.
+      o  PageRange(slice) directly "imports" a slice.
+      o  to_slice() gives the equivalent slice.
+      o  str() and repr() allow printing.
+      o  indices(n) is like slice.indices(n).
+    """
+
+    def __init__(self, arg):
+        """
+        Initialize with either a slice -- giving the equivalent page range,
+        or a PageRange object -- making a copy,
+        or a string like
+            "int", "[int]:[int]" or "[int]:[int]:[int]",
+            where the brackets indicate optional ints.
+        {page_range_help}
+        Note the difference between this notation and arguments to slice():
+            slice(3) means the first three pages;
+            PageRange("3") means the range of only the fourth page.
+            However PageRange(slice(3)) means the first three pages.
+        """
+        if isinstance(arg, slice):
+            self._slice = arg
+            return
+
+        if isinstance(arg, PageRange):
+            self._slice = arg.to_slice()
+            return
+
+        m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
+        if not m:
+            raise ParseError(arg)
+        elif m.group(2):
+            # Special case: just an int means a range of one page.
+            start = int(m.group(2))
+            stop = start + 1 if start != -1 else None
+            self._slice = slice(start, stop)
+        else:
+            self._slice = slice(*[int(g) if g else None
+                                  for g in m.group(4, 6, 8)])
+
+    # Just formatting this when there is __doc__ for __init__
+    if __init__.__doc__:
+        __init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
+
+    @staticmethod
+    def valid(input):
+        """ True if input is a valid initializer for a PageRange. """
+        return isinstance(input, slice)  or \
+               isinstance(input, PageRange) or \
+               (isString(input)
+                and bool(re.match(PAGE_RANGE_RE, input)))
+
+    def to_slice(self):
+        """ Return the slice equivalent of this page range. """
+        return self._slice
+
+    def __str__(self):
+        """ A string like "1:2:3". """
+        s = self._slice
+        if s.step == None:
+            if s.start != None  and  s.stop == s.start + 1:
+                return str(s.start)
+
+            indices = s.start, s.stop
+        else:
+            indices = s.start, s.stop, s.step
+        return ':'.join("" if i == None else str(i) for i in indices)
+
+    def __repr__(self):
+        """ A string like "PageRange('1:2:3')". """
+        return "PageRange(" + repr(str(self)) + ")"
+
+    def indices(self, n):
+        """
+        n is the length of the list of pages to choose from.
+        Returns arguments for range().  See help(slice.indices).
+        """
+        return self._slice.indices(n)
+
+
+PAGE_RANGE_ALL = PageRange(":")  # The range of all pages.
+
+
+def parse_filename_page_ranges(args):
+    """
+    Given a list of filenames and page ranges, return a list of
+    (filename, page_range) pairs.
+    First arg must be a filename; other ags are filenames, page-range
+    expressions, slice objects, or PageRange objects.
+    A filename not followed by a page range indicates all pages of the file.
+    """
+    pairs = []
+    pdf_filename = None
+    did_page_range = False
+    for arg in args + [None]:
+        if PageRange.valid(arg):
+            if not pdf_filename:
+                raise ValueError("The first argument must be a filename, " \
+                                 "not a page range.")
+
+            pairs.append( (pdf_filename, PageRange(arg)) )
+            did_page_range = True
+        else:
+            # New filename or end of list--do all of the previous file?
+            if pdf_filename and not did_page_range:
+                pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
+
+            pdf_filename = arg
+            did_page_range = False
+    return pairs

+ 3070 - 0
need_package/PyPDF2-master/PyPDF2/pdf.py

@@ -0,0 +1,3070 @@
+# -*- coding: utf-8 -*-
+#
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2006, Mathieu Fenniak
+# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+A pure-Python PDF library with an increasing number of capabilities.
+See README for links to FAQ, documentation, homepage, etc.
+"""
+
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+__maintainer__ = "Phaseit, Inc."
+__maintainer_email = "PyPDF2@phaseit.net"
+
+import math
+import struct
+import sys
+from sys import version_info
+if version_info < ( 3, 0 ):
+    from cStringIO import StringIO
+else:
+    from io import StringIO
+
+if version_info < ( 3, 0 ):
+    BytesIO = StringIO
+else:
+    from io import BytesIO
+
+from .generic import *
+from .utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
+from .utils import isString, b_, u_, ord_, str_, formatWarning
+
+if version_info < ( 2, 4 ):
+   from sets import ImmutableSet as frozenset
+
+if version_info < ( 2, 5 ):
+    from md5 import md5
+else:
+    from hashlib import md5
+import uuid
+
+
+class PdfFileWriter(object):
+    """
+    This class supports writing PDF files out, given pages produced by another
+    class (typically :class:`PdfFileReader<PdfFileReader>`).
+    """
+    def __init__(self):
+        self._header = b_("%PDF-1.3")
+        self._objects = []  # array of indirect objects
+
+        # The root of our page tree node.
+        pages = DictionaryObject()
+        pages.update({
+                NameObject("/Type"): NameObject("/Pages"),
+                NameObject("/Count"): NumberObject(0),
+                NameObject("/Kids"): ArrayObject(),
+                })
+        self._pages = self._addObject(pages)
+
+        # info object
+        info = DictionaryObject()
+        info.update({
+                NameObject("/Producer"): createStringObject(codecs.BOM_UTF16_BE + u_("PyPDF2").encode('utf-16be'))
+                })
+        self._info = self._addObject(info)
+
+        # root object
+        root = DictionaryObject()
+        root.update({
+            NameObject("/Type"): NameObject("/Catalog"),
+            NameObject("/Pages"): self._pages,
+            })
+        self._root = None
+        self._root_object = root
+
+    def _addObject(self, obj):
+        self._objects.append(obj)
+        return IndirectObject(len(self._objects), 0, self)
+
+    def getObject(self, ido):
+        if ido.pdf != self:
+            raise ValueError("pdf must be self")
+        return self._objects[ido.idnum - 1]
+
+    def _addPage(self, page, action):
+        assert page["/Type"] == "/Page"
+        page[NameObject("/Parent")] = self._pages
+        page = self._addObject(page)
+        pages = self.getObject(self._pages)
+        action(pages["/Kids"], page)
+        pages[NameObject("/Count")] = NumberObject(pages["/Count"] + 1)
+
+    def addPage(self, page):
+        """
+        Adds a page to this PDF file.  The page is usually acquired from a
+        :class:`PdfFileReader<PdfFileReader>` instance.
+
+        :param PageObject page: The page to add to the document. Should be
+            an instance of :class:`PageObject<PyPDF2.pdf.PageObject>`
+        """
+        self._addPage(page, list.append)
+
+    def insertPage(self, page, index=0):
+        """
+        Insert a page in this PDF file. The page is usually acquired from a
+        :class:`PdfFileReader<PdfFileReader>` instance.
+
+        :param PageObject page: The page to add to the document.  This
+            argument should be an instance of :class:`PageObject<pdf.PageObject>`.
+        :param int index: Position at which the page will be inserted.
+        """
+        self._addPage(page, lambda l, p: l.insert(index, p))
+
+    def getPage(self, pageNumber):
+        """
+        Retrieves a page by number from this PDF file.
+
+        :param int pageNumber: The page number to retrieve
+            (pages begin at zero)
+        :return: the page at the index given by *pageNumber*
+        :rtype: :class:`PageObject<pdf.PageObject>`
+        """
+        pages = self.getObject(self._pages)
+        # XXX: crude hack
+        return pages["/Kids"][pageNumber].getObject()
+
+    def getNumPages(self):
+        """
+        :return: the number of pages.
+        :rtype: int
+        """
+        pages = self.getObject(self._pages)
+        return int(pages[NameObject("/Count")])
+
+    def addBlankPage(self, width=None, height=None):
+        """
+        Appends a blank page to this PDF file and returns it. If no page size
+        is specified, use the size of the last page.
+
+        :param float width: The width of the new page expressed in default user
+            space units.
+        :param float height: The height of the new page expressed in default
+            user space units.
+        :return: the newly appended page
+        :rtype: :class:`PageObject<PyPDF2.pdf.PageObject>`
+        :raises PageSizeNotDefinedError: if width and height are not defined
+            and previous page does not exist.
+        """
+        page = PageObject.createBlankPage(self, width, height)
+        self.addPage(page)
+        return page
+
+    def insertBlankPage(self, width=None, height=None, index=0):
+        """
+        Inserts a blank page to this PDF file and returns it. If no page size
+        is specified, use the size of the last page.
+
+        :param float width: The width of the new page expressed in default user
+            space units.
+        :param float height: The height of the new page expressed in default
+            user space units.
+        :param int index: Position to add the page.
+        :return: the newly appended page
+        :rtype: :class:`PageObject<PyPDF2.pdf.PageObject>`
+        :raises PageSizeNotDefinedError: if width and height are not defined
+            and previous page does not exist.
+        """
+        if width is None or height is None and \
+                (self.getNumPages() - 1) >= index:
+            oldpage = self.getPage(index)
+            width = oldpage.mediaBox.getWidth()
+            height = oldpage.mediaBox.getHeight()
+        page = PageObject.createBlankPage(self, width, height)
+        self.insertPage(page, index)
+        return page
+
+    def addJS(self, javascript):
+        """
+        Add Javascript which will launch upon opening this PDF.
+
+        :param str javascript: Your Javascript.
+
+        >>> output.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
+        # Example: This will launch the print window when the PDF is opened.
+        """
+        js = DictionaryObject()
+        js.update({
+                NameObject("/Type"): NameObject("/Action"),
+                NameObject("/S"): NameObject("/JavaScript"),
+                NameObject("/JS"): NameObject("(%s)" % javascript)
+                })
+        js_indirect_object = self._addObject(js)
+
+        # We need a name for parameterized javascript in the pdf file, but it can be anything.
+        js_string_name = str(uuid.uuid4())
+
+        js_name_tree = DictionaryObject()
+        js_name_tree.update({
+                NameObject("/JavaScript"): DictionaryObject({
+                  NameObject("/Names"): ArrayObject([createStringObject(js_string_name), js_indirect_object])
+                })
+              })
+        self._addObject(js_name_tree)
+
+        self._root_object.update({
+                NameObject("/OpenAction"): js_indirect_object,
+                NameObject("/Names"): js_name_tree
+                })
+
+    def addAttachment(self, fname, fdata):
+        """
+        Embed a file inside the PDF.
+
+        :param str fname: The filename to display.
+        :param str fdata: The data in the file.
+
+        Reference:
+        https://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
+        Section 7.11.3
+        """
+
+        # We need 3 entries:
+        # * The file's data
+        # * The /Filespec entry
+        # * The file's name, which goes in the Catalog
+
+
+        # The entry for the file
+        """ Sample:
+        8 0 obj
+        <<
+         /Length 12
+         /Type /EmbeddedFile
+        >>
+        stream
+        Hello world!
+        endstream
+        endobj
+        """
+        file_entry = DecodedStreamObject()
+        file_entry.setData(fdata)
+        file_entry.update({
+                NameObject("/Type"): NameObject("/EmbeddedFile")
+                })
+
+        # The Filespec entry
+        """ Sample:
+        7 0 obj
+        <<
+         /Type /Filespec
+         /F (hello.txt)
+         /EF << /F 8 0 R >>
+        >>
+        """
+        efEntry = DictionaryObject()
+        efEntry.update({ NameObject("/F"):file_entry })
+
+        filespec = DictionaryObject()
+        filespec.update({
+                NameObject("/Type"): NameObject("/Filespec"),
+                NameObject("/F"): createStringObject(fname),  # Perhaps also try TextStringObject
+                NameObject("/EF"): efEntry
+                })
+
+        # Then create the entry for the root, as it needs a reference to the Filespec
+        """ Sample:
+        1 0 obj
+        <<
+         /Type /Catalog
+         /Outlines 2 0 R
+         /Pages 3 0 R
+         /Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >>
+        >>
+        endobj
+
+        """
+        embeddedFilesNamesDictionary = DictionaryObject()
+        embeddedFilesNamesDictionary.update({
+                NameObject("/Names"): ArrayObject([createStringObject(fname), filespec])
+                })
+
+        embeddedFilesDictionary = DictionaryObject()
+        embeddedFilesDictionary.update({
+                NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary
+                })
+        # Update the root
+        self._root_object.update({
+                NameObject("/Names"): embeddedFilesDictionary
+                })
+
+    def appendPagesFromReader(self, reader, after_page_append=None):
+        """
+        Copy pages from reader to writer. Includes an optional callback parameter
+        which is invoked after pages are appended to the writer.
+
+        :param reader: a PdfFileReader object from which to copy page
+            annotations to this writer object.  The writer's annots
+        will then be updated
+        :callback after_page_append (function): Callback function that is invoked after
+            each page is appended to the writer. Callback signature:
+
+            :param writer_pageref (PDF page reference): Reference to the page
+                appended to the writer.
+        """
+        # Get page count from writer and reader
+        reader_num_pages = reader.getNumPages()
+        writer_num_pages = self.getNumPages()
+
+        # Copy pages from reader to writer
+        for rpagenum in range(0, reader_num_pages):
+            reader_page = reader.getPage(rpagenum)
+            self.addPage(reader_page)
+            writer_page = self.getPage(writer_num_pages+rpagenum)
+            # Trigger callback, pass writer page as parameter
+            if callable(after_page_append): after_page_append(writer_page)
+
+    def updatePageFormFieldValues(self, page, fields):
+        '''
+        Update the form field values for a given page from a fields dictionary.
+        Copy field texts and values from fields to page.
+
+        :param page: Page reference from PDF writer where the annotations
+            and field data will be updated.
+        :param fields: a Python dictionary of field names (/T) and text
+            values (/V)
+        '''
+        # Iterate through pages, update field values
+        for j in range(0, len(page['/Annots'])):
+            writer_annot = page['/Annots'][j].getObject()
+            for field in fields:
+                if writer_annot.get('/T') == field:
+                    writer_annot.update({
+                        NameObject("/V"): TextStringObject(fields[field])
+                    })
+
+    def cloneReaderDocumentRoot(self, reader):
+        '''
+        Copy the reader document root to the writer.
+
+        :param reader:  PdfFileReader from the document root should be copied.
+        :callback after_page_append
+        '''
+        self._root_object = reader.trailer['/Root']
+
+    def cloneDocumentFromReader(self, reader, after_page_append=None):
+        '''
+        Create a copy (clone) of a document from a PDF file reader
+
+        :param reader: PDF file reader instance from which the clone
+            should be created.
+        :callback after_page_append (function): Callback function that is invoked after
+            each page is appended to the writer. Signature includes a reference to the
+            appended page (delegates to appendPagesFromReader). Callback signature:
+
+            :param writer_pageref (PDF page reference): Reference to the page just
+                appended to the document.
+        '''
+        self.cloneReaderDocumentRoot(reader)
+        self.appendPagesFromReader(reader, after_page_append)
+
+    def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
+        """
+        Encrypt this PDF file with the PDF Standard encryption handler.
+
+        :param str user_pwd: The "user password", which allows for opening
+            and reading the PDF file with the restrictions provided.
+        :param str owner_pwd: The "owner password", which allows for
+            opening the PDF files without any restrictions.  By default,
+            the owner password is the same as the user password.
+        :param bool use_128bit: flag as to whether to use 128bit
+            encryption.  When false, 40bit encryption will be used.  By default,
+            this flag is on.
+        """
+        import time, random
+        if owner_pwd == None:
+            owner_pwd = user_pwd
+        if use_128bit:
+            V = 2
+            rev = 3
+            keylen = int(128 / 8)
+        else:
+            V = 1
+            rev = 2
+            keylen = int(40 / 8)
+        # permit everything:
+        P = -1
+        O = ByteStringObject(_alg33(owner_pwd, user_pwd, rev, keylen))
+        ID_1 = ByteStringObject(md5(b_(repr(time.time()))).digest())
+        ID_2 = ByteStringObject(md5(b_(repr(random.random()))).digest())
+        self._ID = ArrayObject((ID_1, ID_2))
+        if rev == 2:
+            U, key = _alg34(user_pwd, O, P, ID_1)
+        else:
+            assert rev == 3
+            U, key = _alg35(user_pwd, rev, keylen, O, P, ID_1, False)
+        encrypt = DictionaryObject()
+        encrypt[NameObject("/Filter")] = NameObject("/Standard")
+        encrypt[NameObject("/V")] = NumberObject(V)
+        if V == 2:
+            encrypt[NameObject("/Length")] = NumberObject(keylen * 8)
+        encrypt[NameObject("/R")] = NumberObject(rev)
+        encrypt[NameObject("/O")] = ByteStringObject(O)
+        encrypt[NameObject("/U")] = ByteStringObject(U)
+        encrypt[NameObject("/P")] = NumberObject(P)
+        self._encrypt = self._addObject(encrypt)
+        self._encrypt_key = key
+
+    def write(self, stream):
+        """
+        Writes the collection of pages added to this object out as a PDF file.
+
+        :param stream: An object to write the file to.  The object must support
+            the write method and the tell method, similar to a file object.
+        """
+        if hasattr(stream, 'mode') and 'b' not in stream.mode:
+            warnings.warn("File <%s> to write to is not in binary mode. It may not be written to correctly." % stream.name)
+        debug = False
+        import struct
+
+        if not self._root:
+            self._root = self._addObject(self._root_object)
+
+        externalReferenceMap = {}
+
+        # PDF objects sometimes have circular references to their /Page objects
+        # inside their object tree (for example, annotations).  Those will be
+        # indirect references to objects that we've recreated in this PDF.  To
+        # address this problem, PageObject's store their original object
+        # reference number, and we add it to the external reference map before
+        # we sweep for indirect references.  This forces self-page-referencing
+        # trees to reference the correct new object location, rather than
+        # copying in a new copy of the page object.
+        for objIndex in range(len(self._objects)):
+            obj = self._objects[objIndex]
+            if isinstance(obj, PageObject) and obj.indirectRef != None:
+                data = obj.indirectRef
+                if data.pdf not in externalReferenceMap:
+                    externalReferenceMap[data.pdf] = {}
+                if data.generation not in externalReferenceMap[data.pdf]:
+                    externalReferenceMap[data.pdf][data.generation] = {}
+                externalReferenceMap[data.pdf][data.generation][data.idnum] = IndirectObject(objIndex + 1, 0, self)
+
+        self.stack = []
+        if debug: print(("ERM:", externalReferenceMap, "root:", self._root))
+        self._sweepIndirectReferences(externalReferenceMap, self._root)
+        del self.stack
+
+        # Begin writing:
+        object_positions = []
+        stream.write(self._header + b_("\n"))
+        stream.write(b_("%\xE2\xE3\xCF\xD3\n"))
+        for i in range(len(self._objects)):
+            idnum = (i + 1)
+            obj = self._objects[i]
+            object_positions.append(stream.tell())
+            stream.write(b_(str(idnum) + " 0 obj\n"))
+            key = None
+            if hasattr(self, "_encrypt") and idnum != self._encrypt.idnum:
+                pack1 = struct.pack("<i", i + 1)[:3]
+                pack2 = struct.pack("<i", 0)[:2]
+                key = self._encrypt_key + pack1 + pack2
+                assert len(key) == (len(self._encrypt_key) + 5)
+                md5_hash = md5(key).digest()
+                key = md5_hash[:min(16, len(self._encrypt_key) + 5)]
+            obj.writeToStream(stream, key)
+            stream.write(b_("\nendobj\n"))
+
+        # xref table
+        xref_location = stream.tell()
+        stream.write(b_("xref\n"))
+        stream.write(b_("0 %s\n" % (len(self._objects) + 1)))
+        stream.write(b_("%010d %05d f \n" % (0, 65535)))
+        for offset in object_positions:
+            stream.write(b_("%010d %05d n \n" % (offset, 0)))
+
+        # trailer
+        stream.write(b_("trailer\n"))
+        trailer = DictionaryObject()
+        trailer.update({
+                NameObject("/Size"): NumberObject(len(self._objects) + 1),
+                NameObject("/Root"): self._root,
+                NameObject("/Info"): self._info,
+                })
+        if hasattr(self, "_ID"):
+            trailer[NameObject("/ID")] = self._ID
+        if hasattr(self, "_encrypt"):
+            trailer[NameObject("/Encrypt")] = self._encrypt
+        trailer.writeToStream(stream, None)
+
+        # eof
+        stream.write(b_("\nstartxref\n%s\n%%%%EOF\n" % (xref_location)))
+
+    def addMetadata(self, infos):
+        """
+        Add custom metadata to the output.
+
+        :param dict infos: a Python dictionary where each key is a field
+            and each value is your new metadata.
+        """
+        args = {}
+        for key, value in list(infos.items()):
+            args[NameObject(key)] = createStringObject(value)
+        self.getObject(self._info).update(args)
+
+    def _sweepIndirectReferences(self, externMap, data):
+        debug = False
+        if debug: print((data, "TYPE", data.__class__.__name__))
+        if isinstance(data, DictionaryObject):
+            for key, value in list(data.items()):
+                origvalue = value
+                value = self._sweepIndirectReferences(externMap, value)
+                if isinstance(value, StreamObject):
+                    # a dictionary value is a stream.  streams must be indirect
+                    # objects, so we need to change this value.
+                    value = self._addObject(value)
+                data[key] = value
+            return data
+        elif isinstance(data, ArrayObject):
+            for i in range(len(data)):
+                value = self._sweepIndirectReferences(externMap, data[i])
+                if isinstance(value, StreamObject):
+                    # an array value is a stream.  streams must be indirect
+                    # objects, so we need to change this value
+                    value = self._addObject(value)
+                data[i] = value
+            return data
+        elif isinstance(data, IndirectObject):
+            # internal indirect references are fine
+            if data.pdf == self:
+                if data.idnum in self.stack:
+                    return data
+                else:
+                    self.stack.append(data.idnum)
+                    realdata = self.getObject(data)
+                    self._sweepIndirectReferences(externMap, realdata)
+                    return data
+            else:
+                if data.pdf.stream.closed:
+                    raise ValueError("I/O operation on closed file: {}".format(data.pdf.stream.name))
+                newobj = externMap.get(data.pdf, {}).get(data.generation, {}).get(data.idnum, None)
+                if newobj == None:
+                    try:
+                        newobj = data.pdf.getObject(data)
+                        self._objects.append(None) # placeholder
+                        idnum = len(self._objects)
+                        newobj_ido = IndirectObject(idnum, 0, self)
+                        if data.pdf not in externMap:
+                            externMap[data.pdf] = {}
+                        if data.generation not in externMap[data.pdf]:
+                            externMap[data.pdf][data.generation] = {}
+                        externMap[data.pdf][data.generation][data.idnum] = newobj_ido
+                        newobj = self._sweepIndirectReferences(externMap, newobj)
+                        self._objects[idnum-1] = newobj
+                        return newobj_ido
+                    except ValueError:
+                        # Unable to resolve the Object, returning NullObject instead.
+                        warnings.warn("Unable to resolve [{}: {}], returning NullObject instead".format(
+                            data.__class__.__name__, data
+                        ))
+                        return NullObject()
+                return newobj
+        else:
+            return data
+
+    def getReference(self, obj):
+        idnum = self._objects.index(obj) + 1
+        ref = IndirectObject(idnum, 0, self)
+        assert ref.getObject() == obj
+        return ref
+
+    def getOutlineRoot(self):
+        if '/Outlines' in self._root_object:
+            outline = self._root_object['/Outlines']
+            idnum = self._objects.index(outline) + 1
+            outlineRef = IndirectObject(idnum, 0, self)
+            assert outlineRef.getObject() == outline
+        else:
+            outline = TreeObject()
+            outline.update({ })
+            outlineRef = self._addObject(outline)
+            self._root_object[NameObject('/Outlines')] = outlineRef
+
+        return outline
+
+    def getNamedDestRoot(self):
+        if '/Names' in self._root_object and isinstance(self._root_object['/Names'], DictionaryObject):
+            names = self._root_object['/Names']
+            idnum = self._objects.index(names) + 1
+            namesRef = IndirectObject(idnum, 0, self)
+            assert namesRef.getObject() == names
+            if '/Dests' in names and isinstance(names['/Dests'], DictionaryObject):
+                dests = names['/Dests']
+                idnum = self._objects.index(dests) + 1
+                destsRef = IndirectObject(idnum, 0, self)
+                assert destsRef.getObject() == dests
+                if '/Names' in dests:
+                    nd = dests['/Names']
+                else:
+                    nd = ArrayObject()
+                    dests[NameObject('/Names')] = nd
+            else:
+                dests = DictionaryObject()
+                destsRef = self._addObject(dests)
+                names[NameObject('/Dests')] = destsRef
+                nd = ArrayObject()
+                dests[NameObject('/Names')] = nd
+
+        else:
+            names = DictionaryObject()
+            namesRef = self._addObject(names)
+            self._root_object[NameObject('/Names')] = namesRef
+            dests = DictionaryObject()
+            destsRef = self._addObject(dests)
+            names[NameObject('/Dests')] = destsRef
+            nd = ArrayObject()
+            dests[NameObject('/Names')] = nd
+
+        return nd
+
+    def addBookmarkDestination(self, dest, parent=None):
+        destRef = self._addObject(dest)
+
+        outlineRef = self.getOutlineRoot()
+
+        if parent == None:
+            parent = outlineRef
+
+        parent = parent.getObject()
+        #print parent.__class__.__name__
+        parent.addChild(destRef, self)
+
+        return destRef
+
+    def addBookmarkDict(self, bookmark, parent=None):
+        bookmarkObj = TreeObject()
+        for k, v in list(bookmark.items()):
+            bookmarkObj[NameObject(str(k))] = v
+        bookmarkObj.update(bookmark)
+
+        if '/A' in bookmark:
+            action = DictionaryObject()
+            for k, v in list(bookmark['/A'].items()):
+                action[NameObject(str(k))] = v
+            actionRef = self._addObject(action)
+            bookmarkObj[NameObject('/A')] = actionRef
+
+        bookmarkRef = self._addObject(bookmarkObj)
+
+        outlineRef = self.getOutlineRoot()
+
+        if parent == None:
+            parent = outlineRef
+
+        parent = parent.getObject()
+        parent.addChild(bookmarkRef, self)
+
+        return bookmarkRef
+
+    def addBookmark(self, title, pagenum, parent=None, color=None, bold=False, italic=False, fit='/Fit', *args):
+        """
+        Add a bookmark to this PDF file.
+
+        :param str title: Title to use for this bookmark.
+        :param int pagenum: Page number this bookmark will point to.
+        :param parent: A reference to a parent bookmark to create nested
+            bookmarks.
+        :param tuple color: Color of the bookmark as a red, green, blue tuple
+            from 0.0 to 1.0
+        :param bool bold: Bookmark is bold
+        :param bool italic: Bookmark is italic
+        :param str fit: The fit of the destination page. See
+            :meth:`addLink()<addLink>` for details.
+        """
+        pageRef = self.getObject(self._pages)['/Kids'][pagenum]
+        action = DictionaryObject()
+        zoomArgs = []
+        for a in args:
+            if a is not None:
+                zoomArgs.append(NumberObject(a))
+            else:
+                zoomArgs.append(NullObject())
+        dest = Destination(NameObject("/"+title + " bookmark"), pageRef, NameObject(fit), *zoomArgs)
+        destArray = dest.getDestArray()
+        action.update({
+            NameObject('/D') : destArray,
+            NameObject('/S') : NameObject('/GoTo')
+        })
+        actionRef = self._addObject(action)
+
+        outlineRef = self.getOutlineRoot()
+
+        if parent == None:
+            parent = outlineRef
+
+        bookmark = TreeObject()
+
+        bookmark.update({
+            NameObject('/A'): actionRef,
+            NameObject('/Title'): createStringObject(title),
+        })
+
+        if color is not None:
+            bookmark.update({NameObject('/C'): ArrayObject([FloatObject(c) for c in color])})
+
+        format = 0
+        if italic:
+            format += 1
+        if bold:
+            format += 2
+        if format:
+            bookmark.update({NameObject('/F'): NumberObject(format)})
+
+        bookmarkRef = self._addObject(bookmark)
+
+        parent = parent.getObject()
+        parent.addChild(bookmarkRef, self)
+
+        return bookmarkRef
+
+    def addNamedDestinationObject(self, dest):
+        destRef = self._addObject(dest)
+
+        nd = self.getNamedDestRoot()
+        nd.extend([dest['/Title'], destRef])
+
+        return destRef
+
+    def addNamedDestination(self, title, pagenum):
+        pageRef = self.getObject(self._pages)['/Kids'][pagenum]
+        dest = DictionaryObject()
+        dest.update({
+            NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
+            NameObject('/S') : NameObject('/GoTo')
+        })
+
+        destRef = self._addObject(dest)
+        nd = self.getNamedDestRoot()
+
+        nd.extend([title, destRef])
+
+        return destRef
+
+    def removeLinks(self):
+        """
+        Removes links and annotations from this output.
+        """
+        pages = self.getObject(self._pages)['/Kids']
+        for page in pages:
+            pageRef = self.getObject(page)
+            if "/Annots" in pageRef:
+                del pageRef['/Annots']
+
+    def removeImages(self, ignoreByteStringObject=False):
+        """
+        Removes images from this output.
+
+        :param bool ignoreByteStringObject: optional parameter
+            to ignore ByteString Objects.
+        """
+        pages = self.getObject(self._pages)['/Kids']
+        for j in range(len(pages)):
+            page = pages[j]
+            pageRef = self.getObject(page)
+            content = pageRef['/Contents'].getObject()
+            if not isinstance(content, ContentStream):
+                content = ContentStream(content, pageRef)
+
+            _operations = []
+            seq_graphics = False
+            for operands, operator in content.operations:
+                if operator == b_('Tj'):
+                    text = operands[0]
+                    if ignoreByteStringObject:
+                        if not isinstance(text, TextStringObject):
+                            operands[0] = TextStringObject()
+                elif operator == b_("'"):
+                    text = operands[0]
+                    if ignoreByteStringObject:
+                        if not isinstance(text, TextStringObject):
+                            operands[0] = TextStringObject()
+                elif operator == b_('"'):
+                    text = operands[2]
+                    if ignoreByteStringObject:
+                        if not isinstance(text, TextStringObject):
+                            operands[2] = TextStringObject()
+                elif operator == b_("TJ"):
+                    for i in range(len(operands[0])):
+                        if ignoreByteStringObject:
+                            if not isinstance(operands[0][i], TextStringObject):
+                                operands[0][i] = TextStringObject()
+
+                if operator == b_('q'):
+                    seq_graphics = True
+                if operator == b_('Q'):
+                    seq_graphics = False
+                if seq_graphics:
+                    if operator in [b_('cm'), b_('w'), b_('J'), b_('j'), b_('M'), b_('d'), b_('ri'), b_('i'),
+                            b_('gs'), b_('W'), b_('b'), b_('s'), b_('S'), b_('f'), b_('F'), b_('n'), b_('m'), b_('l'),
+                            b_('c'), b_('v'), b_('y'), b_('h'), b_('B'), b_('Do'), b_('sh')]:
+                        continue
+                if operator == b_('re'):
+                    continue
+                _operations.append((operands, operator))
+
+            content.operations = _operations
+            pageRef.__setitem__(NameObject('/Contents'), content)
+
+    def removeText(self, ignoreByteStringObject=False):
+        """
+        Removes text from this output.
+
+        :param bool ignoreByteStringObject: optional parameter
+            to ignore ByteString Objects.
+        """
+        pages = self.getObject(self._pages)['/Kids']
+        for j in range(len(pages)):
+            page = pages[j]
+            pageRef = self.getObject(page)
+            content = pageRef['/Contents'].getObject()
+            if not isinstance(content, ContentStream):
+                content = ContentStream(content, pageRef)
+            for operands,operator in content.operations:
+                if operator == b_('Tj'):
+                    text = operands[0]
+                    if not ignoreByteStringObject:
+                        if isinstance(text, TextStringObject):
+                            operands[0] = TextStringObject()
+                    else:
+                        if isinstance(text, TextStringObject) or \
+                                isinstance(text, ByteStringObject):
+                            operands[0] = TextStringObject()
+                elif operator == b_("'"):
+                    text = operands[0]
+                    if not ignoreByteStringObject:
+                        if isinstance(text, TextStringObject):
+                            operands[0] = TextStringObject()
+                    else:
+                        if isinstance(text, TextStringObject) or \
+                                isinstance(text, ByteStringObject):
+                            operands[0] = TextStringObject()
+                elif operator == b_('"'):
+                    text = operands[2]
+                    if not ignoreByteStringObject:
+                        if isinstance(text, TextStringObject):
+                            operands[2] = TextStringObject()
+                    else:
+                        if isinstance(text, TextStringObject) or \
+                                isinstance(text, ByteStringObject):
+                            operands[2] = TextStringObject()
+                elif operator == b_("TJ"):
+                    for i in range(len(operands[0])):
+                        if not ignoreByteStringObject:
+                            if isinstance(operands[0][i], TextStringObject):
+                                operands[0][i] = TextStringObject()
+                        else:
+                            if isinstance(operands[0][i], TextStringObject) or \
+                                    isinstance(operands[0][i], ByteStringObject):
+                                operands[0][i] = TextStringObject()
+
+            pageRef.__setitem__(NameObject('/Contents'), content)
+
+    def addURI(self, pagenum, uri, rect, border=None):
+        """
+        Add an URI from a rectangular area to the specified page.
+        This uses the basic structure of AddLink
+
+        :param int pagenum: index of the page on which to place the URI action.
+        :param int uri: string -- uri of resource to link to.
+        :param rect: :class:`RectangleObject<PyPDF2.generic.RectangleObject>` or array of four
+            integers specifying the clickable rectangular area
+            ``[xLL, yLL, xUR, yUR]``, or string in the form ``"[ xLL yLL xUR yUR ]"``.
+        :param border: if provided, an array describing border-drawing
+            properties. See the PDF spec for details. No border will be
+            drawn if this argument is omitted.
+
+        REMOVED FIT/ZOOM ARG
+        -John Mulligan
+        """
+
+        pageLink = self.getObject(self._pages)['/Kids'][pagenum]
+        pageRef = self.getObject(pageLink)
+
+        if border is not None:
+            borderArr = [NameObject(n) for n in border[:3]]
+            if len(border) == 4:
+                dashPattern = ArrayObject([NameObject(n) for n in border[3]])
+                borderArr.append(dashPattern)
+        else:
+            borderArr = [NumberObject(2)] * 3
+
+        if isString(rect):
+            rect = NameObject(rect)
+        elif isinstance(rect, RectangleObject):
+            pass
+        else:
+            rect = RectangleObject(rect)
+
+        lnk2 = DictionaryObject()
+        lnk2.update({
+        NameObject('/S'): NameObject('/URI'),
+        NameObject('/URI'): TextStringObject(uri)
+        });
+        lnk = DictionaryObject()
+        lnk.update({
+        NameObject('/Type'): NameObject('/Annot'),
+        NameObject('/Subtype'): NameObject('/Link'),
+        NameObject('/P'): pageLink,
+        NameObject('/Rect'): rect,
+        NameObject('/H'): NameObject('/I'),
+        NameObject('/Border'): ArrayObject(borderArr),
+        NameObject('/A'): lnk2
+        })
+        lnkRef = self._addObject(lnk)
+
+        if "/Annots" in pageRef:
+            pageRef['/Annots'].append(lnkRef)
+        else:
+            pageRef[NameObject('/Annots')] = ArrayObject([lnkRef])
+
+    def addLink(self, pagenum, pagedest, rect, border=None, fit='/Fit', *args):
+        """
+        Add an internal link from a rectangular area to the specified page.
+
+        :param int pagenum: index of the page on which to place the link.
+        :param int pagedest: index of the page to which the link should go.
+        :param rect: :class:`RectangleObject<PyPDF2.generic.RectangleObject>` or array of four
+            integers specifying the clickable rectangular area
+            ``[xLL, yLL, xUR, yUR]``, or string in the form ``"[ xLL yLL xUR yUR ]"``.
+        :param border: if provided, an array describing border-drawing
+            properties. See the PDF spec for details. No border will be
+            drawn if this argument is omitted.
+        :param str fit: Page fit or 'zoom' option (see below). Additional arguments may need
+            to be supplied. Passing ``None`` will be read as a null value for that coordinate.
+
+        Valid zoom arguments (see Table 8.2 of the PDF 1.7 reference for details):
+             /Fit       No additional arguments
+             /XYZ       [left] [top] [zoomFactor]
+             /FitH      [top]
+             /FitV      [left]
+             /FitR      [left] [bottom] [right] [top]
+             /FitB      No additional arguments
+             /FitBH     [top]
+             /FitBV     [left]
+        """
+
+        pageLink = self.getObject(self._pages)['/Kids'][pagenum]
+        pageDest = self.getObject(self._pages)['/Kids'][pagedest] #TODO: switch for external link
+        pageRef = self.getObject(pageLink)
+
+        if border is not None:
+            borderArr = [NameObject(n) for n in border[:3]]
+            if len(border) == 4:
+                dashPattern = ArrayObject([NameObject(n) for n in border[3]])
+                borderArr.append(dashPattern)
+        else:
+            borderArr = [NumberObject(0)] * 3
+
+        if isString(rect):
+            rect = NameObject(rect)
+        elif isinstance(rect, RectangleObject):
+            pass
+        else:
+            rect = RectangleObject(rect)
+
+        zoomArgs = []
+        for a in args:
+            if a is not None:
+                zoomArgs.append(NumberObject(a))
+            else:
+                zoomArgs.append(NullObject())
+        dest = Destination(NameObject("/LinkName"), pageDest, NameObject(fit), *zoomArgs) #TODO: create a better name for the link
+        destArray = dest.getDestArray()
+
+        lnk = DictionaryObject()
+        lnk.update({
+            NameObject('/Type'): NameObject('/Annot'),
+            NameObject('/Subtype'): NameObject('/Link'),
+            NameObject('/P'): pageLink,
+            NameObject('/Rect'): rect,
+            NameObject('/Border'): ArrayObject(borderArr),
+            NameObject('/Dest'): destArray
+        })
+        lnkRef = self._addObject(lnk)
+
+        if "/Annots" in pageRef:
+            pageRef['/Annots'].append(lnkRef)
+        else:
+            pageRef[NameObject('/Annots')] = ArrayObject([lnkRef])
+
+    _valid_layouts = ['/NoLayout', '/SinglePage', '/OneColumn', '/TwoColumnLeft', '/TwoColumnRight', '/TwoPageLeft', '/TwoPageRight']
+
+    def getPageLayout(self):
+        """
+        Get the page layout.
+        See :meth:`setPageLayout()<PdfFileWriter.setPageLayout>` for a description of valid layouts.
+
+        :return: Page layout currently being used.
+        :rtype: str, None if not specified
+        """
+        try:
+            return self._root_object['/PageLayout']
+        except KeyError:
+            return None
+
+    def setPageLayout(self, layout):
+        """
+        Set the page layout
+
+        :param str layout: The page layout to be used
+
+        Valid layouts are:
+             /NoLayout        Layout explicitly not specified
+             /SinglePage      Show one page at a time
+             /OneColumn       Show one column at a time
+             /TwoColumnLeft   Show pages in two columns, odd-numbered pages on the left
+             /TwoColumnRight  Show pages in two columns, odd-numbered pages on the right
+             /TwoPageLeft     Show two pages at a time, odd-numbered pages on the left
+             /TwoPageRight    Show two pages at a time, odd-numbered pages on the right
+        """
+        if not isinstance(layout, NameObject):
+            if layout not in self._valid_layouts:
+                warnings.warn("Layout should be one of: {}".format(', '.join(self._valid_layouts)))
+            layout = NameObject(layout)
+        self._root_object.update({NameObject('/PageLayout'): layout})
+
+    pageLayout = property(getPageLayout, setPageLayout)
+    """Read and write property accessing the :meth:`getPageLayout()<PdfFileWriter.getPageLayout>`
+    and :meth:`setPageLayout()<PdfFileWriter.setPageLayout>` methods."""
+
+    _valid_modes = ['/UseNone', '/UseOutlines', '/UseThumbs', '/FullScreen', '/UseOC', '/UseAttachments']
+
+    def getPageMode(self):
+        """
+        Get the page mode.
+        See :meth:`setPageMode()<PdfFileWriter.setPageMode>` for a description
+        of valid modes.
+
+        :return: Page mode currently being used.
+        :rtype: str, None if not specified
+        """
+        try:
+            return self._root_object['/PageMode']
+        except KeyError:
+            return None
+
+    def setPageMode(self, mode):
+        """
+        Set the page mode.
+
+        :param str mode: The page mode to use.
+
+        Valid modes are:
+            /UseNone         Do not show outlines or thumbnails panels
+            /UseOutlines     Show outlines (aka bookmarks) panel
+            /UseThumbs       Show page thumbnails panel
+            /FullScreen      Fullscreen view
+            /UseOC           Show Optional Content Group (OCG) panel
+            /UseAttachments  Show attachments panel
+        """
+        if not isinstance(mode, NameObject):
+            if mode not in self._valid_modes:
+                warnings.warn("Mode should be one of: {}".format(', '.join(self._valid_modes)))
+            mode = NameObject(mode)
+        self._root_object.update({NameObject('/PageMode'): mode})
+
+    pageMode = property(getPageMode, setPageMode)
+    """Read and write property accessing the :meth:`getPageMode()<PdfFileWriter.getPageMode>`
+    and :meth:`setPageMode()<PdfFileWriter.setPageMode>` methods."""
+
+
+class PdfFileReader(object):
+    """
+    Initializes a PdfFileReader object.  This operation can take some time, as
+    the PDF stream's cross-reference tables are read into memory.
+
+    :param stream: A File object or an object that supports the standard read
+        and seek methods similar to a File object. Could also be a
+        string representing a path to a PDF file.
+    :param bool strict: Determines whether user should be warned of all
+        problems and also causes some correctable problems to be fatal.
+        Defaults to ``True``.
+    :param warndest: Destination for logging warnings (defaults to
+        ``sys.stderr``).
+    :param bool overwriteWarnings: Determines whether to override Python's
+        ``warnings.py`` module with a custom implementation (defaults to
+        ``True``).
+    """
+    def __init__(self, stream, strict=True, warndest = None, overwriteWarnings = True):
+        if overwriteWarnings:
+            # have to dynamically override the default showwarning since there are no
+            # public methods that specify the 'file' parameter
+            def _showwarning(message, category, filename, lineno, file=warndest, line=None):
+                if file is None:
+                    file = sys.stderr
+                try:
+                    # It is possible for sys.stderr to be defined as None, most commonly in the case that the script
+                    # is being run vida pythonw.exe on Windows. In this case, just swallow the warning.
+                    # See also https://docs.python.org/3/library/sys.html#sys.__stderr__
+                    if file is not None:
+                        file.write(formatWarning(message, category, filename, lineno, line))
+                except IOError:
+                    pass
+            warnings.showwarning = _showwarning
+        self.strict = strict
+        self.flattenedPages = None
+        self.resolvedObjects = {}
+        self.xrefIndex = 0
+        self._pageId2Num = None # map page IndirectRef number to Page Number
+        if hasattr(stream, 'mode') and 'b' not in stream.mode:
+            warnings.warn("PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", utils.PdfReadWarning)
+        if isString(stream):
+            fileobj = open(stream, 'rb')
+            stream = BytesIO(b_(fileobj.read()))
+            fileobj.close()
+        self.read(stream)
+        self.stream = stream
+
+        self._override_encryption = False
+
+    def getDocumentInfo(self):
+        """
+        Retrieves the PDF file's document information dictionary, if it exists.
+        Note that some PDF files use metadata streams instead of docinfo
+        dictionaries, and these metadata streams will not be accessed by this
+        function.
+
+        :return: the document information of this PDF file
+        :rtype: :class:`DocumentInformation<pdf.DocumentInformation>` or ``None`` if none exists.
+        """
+        if "/Info" not in self.trailer:
+            return None
+        obj = self.trailer['/Info']
+        retval = DocumentInformation()
+        retval.update(obj)
+        return retval
+
+    documentInfo = property(lambda self: self.getDocumentInfo(), None, None)
+    """Read-only property that accesses the :meth:`getDocumentInfo()<PdfFileReader.getDocumentInfo>` function."""
+
+    def getXmpMetadata(self):
+        """
+        Retrieves XMP (Extensible Metadata Platform) data from the PDF document
+        root.
+
+        :return: a :class:`XmpInformation<xmp.XmpInformation>`
+            instance that can be used to access XMP metadata from the document.
+        :rtype: :class:`XmpInformation<xmp.XmpInformation>` or
+            ``None`` if no metadata was found on the document root.
+        """
+        try:
+            self._override_encryption = True
+            return self.trailer["/Root"].getXmpMetadata()
+        finally:
+            self._override_encryption = False
+
+    xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None)
+    """
+    Read-only property that accesses the
+    :meth:`getXmpMetadata()<PdfFileReader.getXmpMetadata>` function.
+    """
+
+    def getNumPages(self):
+        """
+        Calculates the number of pages in this PDF file.
+
+        :return: number of pages
+        :rtype: int
+        :raises PdfReadError: if file is encrypted and restrictions prevent
+            this action.
+        """
+
+        # Flattened pages will not work on an Encrypted PDF;
+        # the PDF file's page count is used in this case. Otherwise,
+        # the original method (flattened page count) is used.
+        if self.isEncrypted:
+            try:
+                self._override_encryption = True
+                self.decrypt('')
+                return self.trailer["/Root"]["/Pages"]["/Count"]
+            except Exception:
+                raise utils.PdfReadError("File has not been decrypted")
+            finally:
+                self._override_encryption = False
+        else:
+            if self.flattenedPages == None:
+                self._flatten()
+            return len(self.flattenedPages)
+
+    numPages = property(lambda self: self.getNumPages(), None, None)
+    """
+    Read-only property that accesses the
+    :meth:`getNumPages()<PdfFileReader.getNumPages>` function.
+    """
+
+    def getPage(self, pageNumber):
+        """
+        Retrieves a page by number from this PDF file.
+
+        :param int pageNumber: The page number to retrieve
+            (pages begin at zero)
+        :return: a :class:`PageObject<pdf.PageObject>` instance.
+        :rtype: :class:`PageObject<pdf.PageObject>`
+        """
+        ## ensure that we're not trying to access an encrypted PDF
+        #assert not self.trailer.has_key("/Encrypt")
+        if self.flattenedPages == None:
+            self._flatten()
+        return self.flattenedPages[pageNumber]
+
+    namedDestinations = property(lambda self:
+                                  self.getNamedDestinations(), None, None)
+    """
+    Read-only property that accesses the
+    :meth:`getNamedDestinations()<PdfFileReader.getNamedDestinations>` function.
+    """
+
+    # A select group of relevant field attributes. For the complete list,
+    # see section 8.6.2 of the PDF 1.7 reference.
+
+    def getFields(self, tree = None, retval = None, fileobj = None):
+        """
+        Extracts field data if this PDF contains interactive form fields.
+        The *tree* and *retval* parameters are for recursive use.
+
+        :param fileobj: A file object (usually a text file) to write
+            a report to on all interactive form fields found.
+        :return: A dictionary where each key is a field name, and each
+            value is a :class:`Field<PyPDF2.generic.Field>` object. By
+            default, the mapping name is used for keys.
+        :rtype: dict, or ``None`` if form data could not be located.
+        """
+        fieldAttributes = {"/FT" : "Field Type", "/Parent" : "Parent",
+                       "/T" : "Field Name", "/TU" : "Alternate Field Name",
+                       "/TM" : "Mapping Name", "/Ff" : "Field Flags",
+                       "/V" : "Value", "/DV" : "Default Value"}
+        if retval == None:
+            retval = {}
+            catalog = self.trailer["/Root"]
+            # get the AcroForm tree
+            if "/AcroForm" in catalog:
+                tree = catalog["/AcroForm"]
+            else:
+                return None
+        if tree == None:
+            return retval
+
+        self._checkKids(tree, retval, fileobj)
+        for attr in fieldAttributes:
+            if attr in tree:
+                # Tree is a field
+                self._buildField(tree, retval, fileobj, fieldAttributes)
+                break
+
+        if "/Fields" in tree:
+            fields = tree["/Fields"]
+            for f in fields:
+                field = f.getObject()
+                self._buildField(field, retval, fileobj, fieldAttributes)
+
+        return retval
+
+    def _buildField(self, field, retval, fileobj, fieldAttributes):
+        self._checkKids(field, retval, fileobj)
+        try:
+            key = field["/TM"]
+        except KeyError:
+            try:
+                key = field["/T"]
+            except KeyError:
+                # Ignore no-name field for now
+                return
+        if fileobj:
+            self._writeField(fileobj, field, fieldAttributes)
+            fileobj.write("\n")
+        retval[key] = Field(field)
+
+    def _checkKids(self, tree, retval, fileobj):
+        if "/Kids" in tree:
+            # recurse down the tree
+            for kid in tree["/Kids"]:
+                self.getFields(kid.getObject(), retval, fileobj)
+
+    def _writeField(self, fileobj, field, fieldAttributes):
+        order = ["/TM", "/T", "/FT", "/Parent", "/TU", "/Ff", "/V", "/DV"]
+        for attr in order:
+            attrName = fieldAttributes[attr]
+            try:
+                if attr == "/FT":
+                    # Make the field type value more clear
+                    types = {"/Btn":"Button", "/Tx":"Text", "/Ch": "Choice",
+                             "/Sig":"Signature"}
+                    if field[attr] in types:
+                        fileobj.write(attrName + ": " + types[field[attr]] + "\n")
+                elif attr == "/Parent":
+                    # Let's just write the name of the parent
+                    try:
+                        name = field["/Parent"]["/TM"]
+                    except KeyError:
+                        name = field["/Parent"]["/T"]
+                    fileobj.write(attrName + ": " + name + "\n")
+                else:
+                    fileobj.write(attrName + ": " + str(field[attr]) + "\n")
+            except KeyError:
+                # Field attribute is N/A or unknown, so don't write anything
+                pass
+
+    def getFormTextFields(self):
+        ''' Retrieves form fields from the document with textual data (inputs, dropdowns)
+        '''
+        # Retrieve document form fields
+        formfields = self.getFields()
+        return dict(
+            (formfields[field]['/T'], formfields[field].get('/V')) for field in formfields \
+                if formfields[field].get('/FT') == '/Tx'
+        )
+
+    def getNamedDestinations(self, tree=None, retval=None):
+        """
+        Retrieves the named destinations present in the document.
+
+        :return: a dictionary which maps names to
+            :class:`Destinations<PyPDF2.generic.Destination>`.
+        :rtype: dict
+        """
+        if retval == None:
+            retval = {}
+            catalog = self.trailer["/Root"]
+
+            # get the name tree
+            if "/Dests" in catalog:
+                tree = catalog["/Dests"]
+            elif "/Names" in catalog:
+                names = catalog['/Names']
+                if "/Dests" in names:
+                    tree = names['/Dests']
+
+        if tree == None:
+            return retval
+
+        if "/Kids" in tree:
+            # recurse down the tree
+            for kid in tree["/Kids"]:
+                self.getNamedDestinations(kid.getObject(), retval)
+
+        if "/Names" in tree:
+            names = tree["/Names"]
+            for i in range(0, len(names), 2):
+                key = names[i].getObject()
+                val = names[i+1].getObject()
+                if isinstance(val, DictionaryObject) and '/D' in val:
+                    val = val['/D']
+                dest = self._buildDestination(key, val)
+                if dest != None:
+                    retval[key] = dest
+
+        return retval
+
+    outlines = property(lambda self: self.getOutlines(), None, None)
+    """
+    Read-only property that accesses the
+        :meth:`getOutlines()<PdfFileReader.getOutlines>` function.
+    """
+
+    def getOutlines(self, node=None, outlines=None):
+        """
+        Retrieves the document outline present in the document.
+
+        :return: a nested list of :class:`Destinations<PyPDF2.generic.Destination>`.
+        """
+        if outlines == None:
+            outlines = []
+            catalog = self.trailer["/Root"]
+
+            # get the outline dictionary and named destinations
+            if "/Outlines" in catalog:
+                try:
+                    lines = catalog["/Outlines"]
+                except utils.PdfReadError:
+                    # this occurs if the /Outlines object reference is incorrect
+                    # for an example of such a file, see https://unglueit-files.s3.amazonaws.com/ebf/7552c42e9280b4476e59e77acc0bc812.pdf
+                    # so continue to load the file without the Bookmarks
+                    return outlines
+
+                if "/First" in lines:
+                    node = lines["/First"]
+            self._namedDests = self.getNamedDestinations()
+
+        if node == None:
+          return outlines
+
+        # see if there are any more outlines
+        while True:
+            outline = self._buildOutline(node)
+            if outline:
+                outlines.append(outline)
+
+            # check for sub-outlines
+            if "/First" in node:
+                subOutlines = []
+                self.getOutlines(node["/First"], subOutlines)
+                if subOutlines:
+                    outlines.append(subOutlines)
+
+            if "/Next" not in node:
+                break
+            node = node["/Next"]
+
+        return outlines
+
+    def _getPageNumberByIndirect(self, indirectRef):
+        """Generate _pageId2Num"""
+        if self._pageId2Num is None:
+            id2num = {}
+            for i, x in enumerate(self.pages):
+                id2num[x.indirectRef.idnum] = i
+            self._pageId2Num = id2num
+
+        if isinstance(indirectRef, int):
+            idnum = indirectRef
+        else:
+            idnum = indirectRef.idnum
+
+        ret = self._pageId2Num.get(idnum, -1)
+        return ret
+
+    def getPageNumber(self, page):
+        """
+        Retrieve page number of a given PageObject
+
+        :param PageObject page: The page to get page number. Should be
+            an instance of :class:`PageObject<PyPDF2.pdf.PageObject>`
+        :return: the page number or -1 if page not found
+        :rtype: int
+        """
+        indirectRef = page.indirectRef
+        ret = self._getPageNumberByIndirect(indirectRef)
+        return ret
+
+    def getDestinationPageNumber(self, destination):
+        """
+        Retrieve page number of a given Destination object
+
+        :param Destination destination: The destination to get page number.
+             Should be an instance of
+             :class:`Destination<PyPDF2.pdf.Destination>`
+        :return: the page number or -1 if page not found
+        :rtype: int
+        """
+        indirectRef = destination.page
+        ret = self._getPageNumberByIndirect(indirectRef)
+        return ret
+
+    def _buildDestination(self, title, array):
+        page, typ = array[0:2]
+        array = array[2:]
+        return Destination(title, page, typ, *array)
+
+    def _buildOutline(self, node):
+        dest, title, outline = None, None, None
+
+        if "/A" in node and "/Title" in node:
+            # Action, section 8.5 (only type GoTo supported)
+            title  = node["/Title"]
+            action = node["/A"]
+            if action["/S"] == "/GoTo":
+                dest = action["/D"]
+        elif "/Dest" in node and "/Title" in node:
+            # Destination, section 8.2.1
+            title = node["/Title"]
+            dest  = node["/Dest"]
+
+        # if destination found, then create outline
+        if dest:
+            if isinstance(dest, ArrayObject):
+                outline = self._buildDestination(title, dest)
+            elif isString(dest) and dest in self._namedDests:
+                outline = self._namedDests[dest]
+                outline[NameObject("/Title")] = title
+            else:
+                raise utils.PdfReadError("Unexpected destination %r" % dest)
+        return outline
+
+    pages = property(lambda self: ConvertFunctionsToVirtualList(self.getNumPages, self.getPage),
+        None, None)
+    """
+    Read-only property that emulates a list based upon the
+    :meth:`getNumPages()<PdfFileReader.getNumPages>` and
+    :meth:`getPage()<PdfFileReader.getPage>` methods.
+    """
+
+    def getPageLayout(self):
+        """
+        Get the page layout.
+        See :meth:`setPageLayout()<PdfFileWriter.setPageLayout>`
+        for a description of valid layouts.
+
+        :return: Page layout currently being used.
+        :rtype: ``str``, ``None`` if not specified
+        """
+        try:
+            return self.trailer['/Root']['/PageLayout']
+        except KeyError:
+            return None
+
+    pageLayout = property(getPageLayout)
+    """Read-only property accessing the
+    :meth:`getPageLayout()<PdfFileReader.getPageLayout>` method."""
+
+    def getPageMode(self):
+        """
+        Get the page mode.
+        See :meth:`setPageMode()<PdfFileWriter.setPageMode>`
+        for a description of valid modes.
+
+        :return: Page mode currently being used.
+        :rtype: ``str``, ``None`` if not specified
+        """
+        try:
+            return self.trailer['/Root']['/PageMode']
+        except KeyError:
+            return None
+
+    pageMode = property(getPageMode)
+    """Read-only property accessing the
+    :meth:`getPageMode()<PdfFileReader.getPageMode>` method."""
+
+    def _flatten(self, pages=None, inherit=None, indirectRef=None):
+        inheritablePageAttributes = (
+            NameObject("/Resources"), NameObject("/MediaBox"),
+            NameObject("/CropBox"), NameObject("/Rotate")
+            )
+        if inherit == None:
+            inherit = dict()
+        if pages == None:
+            self.flattenedPages = []
+            catalog = self.trailer["/Root"].getObject()
+            pages = catalog["/Pages"].getObject()
+
+        t = "/Pages"
+        if "/Type" in pages:
+            t = pages["/Type"]
+
+        if t == "/Pages":
+            for attr in inheritablePageAttributes:
+                if attr in pages:
+                    inherit[attr] = pages[attr]
+            for page in pages["/Kids"]:
+                addt = {}
+                if isinstance(page, IndirectObject):
+                    addt["indirectRef"] = page
+                self._flatten(page.getObject(), inherit, **addt)
+        elif t == "/Page":
+            for attr, value in list(inherit.items()):
+                # if the page has it's own value, it does not inherit the
+                # parent's value:
+                if attr not in pages:
+                    pages[attr] = value
+            pageObj = PageObject(self, indirectRef)
+            pageObj.update(pages)
+            self.flattenedPages.append(pageObj)
+
+    def _getObjectFromStream(self, indirectReference):
+        # indirect reference to object in object stream
+        # read the entire object stream into memory
+        debug = False
+        stmnum, idx = self.xref_objStm[indirectReference.idnum]
+        if debug: print(("Here1: %s %s"%(stmnum, idx)))
+        objStm = IndirectObject(stmnum, 0, self).getObject()
+        if debug: print(("Here2: objStm=%s.. stmnum=%s data=%s"%(objStm, stmnum, objStm.getData())))
+        # This is an xref to a stream, so its type better be a stream
+        assert objStm['/Type'] == '/ObjStm'
+        # /N is the number of indirect objects in the stream
+        assert idx < objStm['/N']
+        streamData = BytesIO(b_(objStm.getData()))
+        for i in range(objStm['/N']):
+            readNonWhitespace(streamData)
+            streamData.seek(-1, 1)
+            objnum = NumberObject.readFromStream(streamData)
+            readNonWhitespace(streamData)
+            streamData.seek(-1, 1)
+            offset = NumberObject.readFromStream(streamData)
+            readNonWhitespace(streamData)
+            streamData.seek(-1, 1)
+            if objnum != indirectReference.idnum:
+                # We're only interested in one object
+                continue
+            if self.strict and idx != i:
+                raise utils.PdfReadError("Object is in wrong index.")
+            streamData.seek(objStm['/First']+offset, 0)
+            if debug:
+                pos = streamData.tell()
+                streamData.seek(0, 0)
+                lines = streamData.readlines()
+                for i in range(0, len(lines)):
+                    print((lines[i]))
+                streamData.seek(pos, 0)
+            try:
+                obj = readObject(streamData, self)
+            except utils.PdfStreamError as e:
+                # Stream object cannot be read. Normally, a critical error, but
+                # Adobe Reader doesn't complain, so continue (in strict mode?)
+                e = sys.exc_info()[1]
+                warnings.warn("Invalid stream (index %d) within object %d %d: %s" % \
+                              (i, indirectReference.idnum, indirectReference.generation, e), utils.PdfReadWarning)
+
+                if self.strict:
+                    raise utils.PdfReadError("Can't read object stream: %s" % e)
+                # Replace with null. Hopefully it's nothing important.
+                obj = NullObject()
+            return obj
+
+        if self.strict: raise utils.PdfReadError("This is a fatal error in strict mode.")
+        return NullObject()
+
+    def getObject(self, indirectReference):
+        debug = False
+        if debug: print(("looking at:", indirectReference.idnum, indirectReference.generation))
+        retval = self.cacheGetIndirectObject(indirectReference.generation,
+                                                indirectReference.idnum)
+        if retval != None:
+            return retval
+        if indirectReference.generation == 0 and \
+                        indirectReference.idnum in self.xref_objStm:
+            retval = self._getObjectFromStream(indirectReference)
+        elif indirectReference.generation in self.xref and \
+                indirectReference.idnum in self.xref[indirectReference.generation]:
+            start = self.xref[indirectReference.generation][indirectReference.idnum]
+            if debug: print(("  Uncompressed Object", indirectReference.idnum, indirectReference.generation, ":", start))
+            self.stream.seek(start, 0)
+            idnum, generation = self.readObjectHeader(self.stream)
+            if idnum != indirectReference.idnum and self.xrefIndex:
+                # Xref table probably had bad indexes due to not being zero-indexed
+                if self.strict:
+                    raise utils.PdfReadError("Expected object ID (%d %d) does not match actual (%d %d); xref table not zero-indexed." \
+                                             % (indirectReference.idnum, indirectReference.generation, idnum, generation))
+                else: pass # xref table is corrected in non-strict mode
+            elif idnum != indirectReference.idnum and self.strict:
+                # some other problem
+                raise utils.PdfReadError("Expected object ID (%d %d) does not match actual (%d %d)." \
+                                         % (indirectReference.idnum, indirectReference.generation, idnum, generation))
+            if self.strict:
+                assert generation == indirectReference.generation
+            retval = readObject(self.stream, self)
+
+            # override encryption is used for the /Encrypt dictionary
+            if not self._override_encryption and self.isEncrypted:
+                # if we don't have the encryption key:
+                if not hasattr(self, '_decryption_key'):
+                    raise utils.PdfReadError("file has not been decrypted")
+                # otherwise, decrypt here...
+                import struct
+                pack1 = struct.pack("<i", indirectReference.idnum)[:3]
+                pack2 = struct.pack("<i", indirectReference.generation)[:2]
+                key = self._decryption_key + pack1 + pack2
+                assert len(key) == (len(self._decryption_key) + 5)
+                md5_hash = md5(key).digest()
+                key = md5_hash[:min(16, len(self._decryption_key) + 5)]
+                retval = self._decryptObject(retval, key)
+        else:
+            warnings.warn("Object %d %d not defined." % (indirectReference.idnum,
+                        indirectReference.generation), utils.PdfReadWarning)
+            #if self.strict:
+            raise utils.PdfReadError("Could not find object.")
+        self.cacheIndirectObject(indirectReference.generation,
+                    indirectReference.idnum, retval)
+        return retval
+
+    def _decryptObject(self, obj, key):
+        if isinstance(obj, ByteStringObject) or isinstance(obj, TextStringObject):
+            obj = createStringObject(utils.RC4_encrypt(key, obj.original_bytes))
+        elif isinstance(obj, StreamObject):
+            obj._data = utils.RC4_encrypt(key, obj._data)
+        elif isinstance(obj, DictionaryObject):
+            for dictkey, value in list(obj.items()):
+                obj[dictkey] = self._decryptObject(value, key)
+        elif isinstance(obj, ArrayObject):
+            for i in range(len(obj)):
+                obj[i] = self._decryptObject(obj[i], key)
+        return obj
+
+    def readObjectHeader(self, stream):
+        # Should never be necessary to read out whitespace, since the
+        # cross-reference table should put us in the right spot to read the
+        # object header.  In reality... some files have stupid cross reference
+        # tables that are off by whitespace bytes.
+        extra = False
+        utils.skipOverComment(stream)
+        extra |= utils.skipOverWhitespace(stream); stream.seek(-1, 1)
+        idnum = readUntilWhitespace(stream)
+        extra |= utils.skipOverWhitespace(stream); stream.seek(-1, 1)
+        generation = readUntilWhitespace(stream)
+        obj = stream.read(3)
+        readNonWhitespace(stream)
+        stream.seek(-1, 1)
+        if (extra and self.strict):
+            #not a fatal error
+            warnings.warn("Superfluous whitespace found in object header %s %s" % \
+                          (idnum, generation), utils.PdfReadWarning)
+        return int(idnum), int(generation)
+
+    def cacheGetIndirectObject(self, generation, idnum):
+        debug = False
+        out = self.resolvedObjects.get((generation, idnum))
+        if debug and out: print(("cache hit: %d %d"%(idnum, generation)))
+        elif debug: print(("cache miss: %d %d"%(idnum, generation)))
+        return out
+
+    def cacheIndirectObject(self, generation, idnum, obj):
+        # return None # Sometimes we want to turn off cache for debugging.
+        if (generation, idnum) in self.resolvedObjects:
+            msg = "Overwriting cache for %s %s"%(generation, idnum)
+            if self.strict: raise utils.PdfReadError(msg)
+            else:           warnings.warn(msg)
+        self.resolvedObjects[(generation, idnum)] = obj
+        return obj
+
+    def read(self, stream):
+        debug = False
+        if debug: print(">>read", stream)
+        # start at the end:
+        stream.seek(-1, 2)
+        if not stream.tell():
+            raise utils.PdfReadError('Cannot read an empty file')
+        last1K = stream.tell() - 1024 + 1 # offset of last 1024 bytes of stream
+        line = b_('')
+        while line[:5] != b_("%%EOF"):
+            if stream.tell() < last1K:
+                raise utils.PdfReadError("EOF marker not found")
+            line = self.readNextEndLine(stream)
+            if debug: print("  line:",line)
+
+        # find startxref entry - the location of the xref table
+        line = self.readNextEndLine(stream)
+        try:
+            startxref = int(line)
+        except ValueError:
+            # 'startxref' may be on the same line as the location
+            if not line.startswith(b_("startxref")):
+                raise utils.PdfReadError("startxref not found")
+            startxref = int(line[9:].strip())
+            warnings.warn("startxref on same line as offset")
+        else:
+            line = self.readNextEndLine(stream)
+            if line[:9] != b_("startxref"):
+                raise utils.PdfReadError("startxref not found")
+
+        # read all cross reference tables and their trailers
+        self.xref = {}
+        self.xref_objStm = {}
+        self.trailer = DictionaryObject()
+        while True:
+            # load the xref table
+            stream.seek(startxref, 0)
+            x = stream.read(1)
+            if x == b_("x"):
+                # standard cross-reference table
+                ref = stream.read(4)
+                if ref[:3] != b_("ref"):
+                    raise utils.PdfReadError("xref table read error")
+                readNonWhitespace(stream)
+                stream.seek(-1, 1)
+                firsttime = True; # check if the first time looking at the xref table
+                while True:
+                    num = readObject(stream, self)
+                    if firsttime and num != 0:
+                         self.xrefIndex = num
+                         if self.strict:
+                            warnings.warn("Xref table not zero-indexed. ID numbers for objects will be corrected.", utils.PdfReadWarning)
+                            #if table not zero indexed, could be due to error from when PDF was created
+                            #which will lead to mismatched indices later on, only warned and corrected if self.strict=True
+                    firsttime = False
+                    readNonWhitespace(stream)
+                    stream.seek(-1, 1)
+                    size = readObject(stream, self)
+                    readNonWhitespace(stream)
+                    stream.seek(-1, 1)
+                    cnt = 0
+                    while cnt < size:
+                        line = stream.read(20)
+
+                        # It's very clear in section 3.4.3 of the PDF spec
+                        # that all cross-reference table lines are a fixed
+                        # 20 bytes (as of PDF 1.7). However, some files have
+                        # 21-byte entries (or more) due to the use of \r\n
+                        # (CRLF) EOL's. Detect that case, and adjust the line
+                        # until it does not begin with a \r (CR) or \n (LF).
+                        while line[0] in b_("\x0D\x0A"):
+                            stream.seek(-20 + 1, 1)
+                            line = stream.read(20)
+
+                        # On the other hand, some malformed PDF files
+                        # use a single character EOL without a preceeding
+                        # space.  Detect that case, and seek the stream
+                        # back one character.  (0-9 means we've bled into
+                        # the next xref entry, t means we've bled into the
+                        # text "trailer"):
+                        if line[-1] in b_("0123456789t"):
+                            stream.seek(-1, 1)
+
+                        offset, generation = line[:16].split(b_(" "))
+                        offset, generation = int(offset), int(generation)
+                        if generation not in self.xref:
+                            self.xref[generation] = {}
+                        if num in self.xref[generation]:
+                            # It really seems like we should allow the last
+                            # xref table in the file to override previous
+                            # ones. Since we read the file backwards, assume
+                            # any existing key is already set correctly.
+                            pass
+                        else:
+                            self.xref[generation][num] = offset
+                        cnt += 1
+                        num += 1
+                    readNonWhitespace(stream)
+                    stream.seek(-1, 1)
+                    trailertag = stream.read(7)
+                    if trailertag != b_("trailer"):
+                        # more xrefs!
+                        stream.seek(-7, 1)
+                    else:
+                        break
+                readNonWhitespace(stream)
+                stream.seek(-1, 1)
+                newTrailer = readObject(stream, self)
+                for key, value in list(newTrailer.items()):
+                    if key not in self.trailer:
+                        self.trailer[key] = value
+                if "/Prev" in newTrailer:
+                    startxref = newTrailer["/Prev"]
+                else:
+                    break
+            elif x.isdigit():
+                # PDF 1.5+ Cross-Reference Stream
+                stream.seek(-1, 1)
+                idnum, generation = self.readObjectHeader(stream)
+                xrefstream = readObject(stream, self)
+                assert xrefstream["/Type"] == "/XRef"
+                self.cacheIndirectObject(generation, idnum, xrefstream)
+                streamData = BytesIO(b_(xrefstream.getData()))
+                # Index pairs specify the subsections in the dictionary. If
+                # none create one subsection that spans everything.
+                idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
+                if debug: print(("read idx_pairs=%s"%list(self._pairs(idx_pairs))))
+                entrySizes = xrefstream.get("/W")
+                assert len(entrySizes) >= 3
+                if self.strict and len(entrySizes) > 3:
+                    raise utils.PdfReadError("Too many entry sizes: %s" % entrySizes)
+
+                def getEntry(i):
+                    # Reads the correct number of bytes for each entry. See the
+                    # discussion of the W parameter in PDF spec table 17.
+                    if entrySizes[i] > 0:
+                        d = streamData.read(entrySizes[i])
+                        return convertToInt(d, entrySizes[i])
+
+                    # PDF Spec Table 17: A value of zero for an element in the
+                    # W array indicates...the default value shall be used
+                    if i == 0:  return 1 # First value defaults to 1
+                    else:       return 0
+
+                def used_before(num, generation):
+                    # We move backwards through the xrefs, don't replace any.
+                    return num in self.xref.get(generation, []) or \
+                            num in self.xref_objStm
+
+                # Iterate through each subsection
+                last_end = 0
+                for start, size in self._pairs(idx_pairs):
+                    # The subsections must increase
+                    assert start >= last_end
+                    last_end = start + size
+                    for num in range(start, start+size):
+                        # The first entry is the type
+                        xref_type = getEntry(0)
+                        # The rest of the elements depend on the xref_type
+                        if xref_type == 0:
+                            # linked list of free objects
+                            next_free_object = getEntry(1)
+                            next_generation = getEntry(2)
+                        elif xref_type == 1:
+                            # objects that are in use but are not compressed
+                            byte_offset = getEntry(1)
+                            generation = getEntry(2)
+                            if generation not in self.xref:
+                                self.xref[generation] = {}
+                            if not used_before(num, generation):
+                                self.xref[generation][num] = byte_offset
+                                if debug: print(("XREF Uncompressed: %s %s"%(
+                                                num, generation)))
+                        elif xref_type == 2:
+                            # compressed objects
+                            objstr_num = getEntry(1)
+                            obstr_idx = getEntry(2)
+                            generation = 0 # PDF spec table 18, generation is 0
+                            if not used_before(num, generation):
+                                if debug: print(("XREF Compressed: %s %s %s"%(
+                                        num, objstr_num, obstr_idx)))
+                                self.xref_objStm[num] = (objstr_num, obstr_idx)
+                        elif self.strict:
+                            raise utils.PdfReadError("Unknown xref type: %s" %
+                                                     xref_type)
+
+                trailerKeys = "/Root", "/Encrypt", "/Info", "/ID"
+                for key in trailerKeys:
+                    if key in xrefstream and key not in self.trailer:
+                        self.trailer[NameObject(key)] = xrefstream.raw_get(key)
+                if "/Prev" in xrefstream:
+                    startxref = xrefstream["/Prev"]
+                else:
+                    break
+            else:
+                # bad xref character at startxref.  Let's see if we can find
+                # the xref table nearby, as we've observed this error with an
+                # off-by-one before.
+                stream.seek(-11, 1)
+                tmp = stream.read(20)
+                xref_loc = tmp.find(b_("xref"))
+                if xref_loc != -1:
+                    startxref -= (10 - xref_loc)
+                    continue
+                # No explicit xref table, try finding a cross-reference stream.
+                stream.seek(startxref, 0)
+                found = False
+                for look in range(5):
+                    if stream.read(1).isdigit():
+                        # This is not a standard PDF, consider adding a warning
+                        startxref += look
+                        found = True
+                        break
+                if found:
+                    continue
+                # no xref table found at specified location
+                raise utils.PdfReadError("Could not find xref table at specified location")
+        #if not zero-indexed, verify that the table is correct; change it if necessary
+        if self.xrefIndex and not self.strict:
+            loc = stream.tell()
+            for gen in self.xref:
+                if gen == 65535: continue
+                for id in self.xref[gen]:
+                    stream.seek(self.xref[gen][id], 0)
+                    try:
+                        pid, pgen = self.readObjectHeader(stream)
+                    except ValueError:
+                        break
+                    if pid == id - self.xrefIndex:
+                        self._zeroXref(gen)
+                        break
+                    #if not, then either it's just plain wrong, or the non-zero-index is actually correct
+            stream.seek(loc, 0) #return to where it was
+
+    def _zeroXref(self, generation):
+        self.xref[generation] = dict( (k-self.xrefIndex, v) for (k, v) in list(self.xref[generation].items()) )
+
+    def _pairs(self, array):
+        i = 0
+        while True:
+            yield array[i], array[i+1]
+            i += 2
+            if (i+1) >= len(array):
+                break
+
+    def readNextEndLine(self, stream):
+        debug = False
+        if debug: print(">>readNextEndLine")
+        line = b_("")
+        while True:
+            # Prevent infinite loops in malformed PDFs
+            if stream.tell() == 0:
+                raise utils.PdfReadError("Could not read malformed PDF file")
+            x = stream.read(1)
+            if debug: print(("  x:", x, "%x"%ord(x)))
+            if stream.tell() < 2:
+                raise utils.PdfReadError("EOL marker not found")
+            stream.seek(-2, 1)
+            if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR
+                crlf = False
+                while x == b_('\n') or x == b_('\r'):
+                    if debug:
+                        if ord(x) == 0x0D: print("  x is CR 0D")
+                        elif ord(x) == 0x0A: print("  x is LF 0A")
+                    x = stream.read(1)
+                    if x == b_('\n') or x == b_('\r'): # account for CR+LF
+                        stream.seek(-1, 1)
+                        crlf = True
+                    if stream.tell() < 2:
+                        raise utils.PdfReadError("EOL marker not found")
+                    stream.seek(-2, 1)
+                stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1
+                break
+            else:
+                if debug: print("  x is neither")
+                line = x + line
+                if debug: print(("  RNEL line:", line))
+        if debug: print("leaving RNEL")
+        return line
+
+    def decrypt(self, password):
+        """
+        When using an encrypted / secured PDF file with the PDF Standard
+        encryption handler, this function will allow the file to be decrypted.
+        It checks the given password against the document's user password and
+        owner password, and then stores the resulting decryption key if either
+        password is correct.
+
+        It does not matter which password was matched.  Both passwords provide
+        the correct decryption key that will allow the document to be used with
+        this library.
+
+        :param str password: The password to match.
+        :return: ``0`` if the password failed, ``1`` if the password matched the user
+            password, and ``2`` if the password matched the owner password.
+        :rtype: int
+        :raises NotImplementedError: if document uses an unsupported encryption
+            method.
+        """
+
+        self._override_encryption = True
+        try:
+            return self._decrypt(password)
+        finally:
+            self._override_encryption = False
+
+    def _decrypt(self, password):
+        encrypt = self.trailer['/Encrypt'].getObject()
+        if encrypt['/Filter'] != '/Standard':
+            raise NotImplementedError("only Standard PDF encryption handler is available")
+        if not (encrypt['/V'] in (1, 2)):
+            raise NotImplementedError("only algorithm code 1 and 2 are supported. This PDF uses code %s" % encrypt['/V'])
+        user_password, key = self._authenticateUserPassword(password)
+        if user_password:
+            self._decryption_key = key
+            return 1
+        else:
+            rev = encrypt['/R'].getObject()
+            if rev == 2:
+                keylen = 5
+            else:
+                keylen = encrypt['/Length'].getObject() // 8
+            key = _alg33_1(password, rev, keylen)
+            real_O = encrypt["/O"].getObject()
+            if rev == 2:
+                userpass = utils.RC4_encrypt(key, real_O)
+            else:
+                val = real_O
+                for i in range(19, -1, -1):
+                    new_key = b_('')
+                    for l in range(len(key)):
+                        new_key += b_(chr(utils.ord_(key[l]) ^ i))
+                    val = utils.RC4_encrypt(new_key, val)
+                userpass = val
+            owner_password, key = self._authenticateUserPassword(userpass)
+            if owner_password:
+                self._decryption_key = key
+                return 2
+        return 0
+
+    def _authenticateUserPassword(self, password):
+        encrypt = self.trailer['/Encrypt'].getObject()
+        rev = encrypt['/R'].getObject()
+        owner_entry = encrypt['/O'].getObject()
+        p_entry = encrypt['/P'].getObject()
+        id_entry = self.trailer['/ID'].getObject()
+        id1_entry = id_entry[0].getObject()
+        real_U = encrypt['/U'].getObject().original_bytes
+        if rev == 2:
+            U, key = _alg34(password, owner_entry, p_entry, id1_entry)
+        elif rev >= 3:
+            U, key = _alg35(password, rev,
+                    encrypt["/Length"].getObject() // 8, owner_entry,
+                    p_entry, id1_entry,
+                    encrypt.get("/EncryptMetadata", BooleanObject(False)).getObject())
+            U, real_U = U[:16], real_U[:16]
+        return U == real_U, key
+
+    def getIsEncrypted(self):
+        return "/Encrypt" in self.trailer
+
+    isEncrypted = property(lambda self: self.getIsEncrypted(), None, None)
+    """
+    Read-only boolean property showing whether this PDF file is encrypted.
+    Note that this property, if true, will remain true even after the
+    :meth:`decrypt()<PdfFileReader.decrypt>` method is called.
+    """
+
+
+def getRectangle(self, name, defaults):
+    retval = self.get(name)
+    if isinstance(retval, RectangleObject):
+        return retval
+    if retval == None:
+        for d in defaults:
+            retval = self.get(d)
+            if retval != None:
+                break
+    if isinstance(retval, IndirectObject):
+        retval = self.pdf.getObject(retval)
+    retval = RectangleObject(retval)
+    setRectangle(self, name, retval)
+    return retval
+
+
+def setRectangle(self, name, value):
+    if not isinstance(name, NameObject):
+        name = NameObject(name)
+    self[name] = value
+
+
+def deleteRectangle(self, name):
+    del self[name]
+
+
+def createRectangleAccessor(name, fallback):
+    return \
+        property(
+            lambda self: getRectangle(self, name, fallback),
+            lambda self, value: setRectangle(self, name, value),
+            lambda self: deleteRectangle(self, name)
+            )
+
+
+class PageObject(DictionaryObject):
+    """
+    This class represents a single page within a PDF file.  Typically this
+    object will be created by accessing the
+    :meth:`getPage()<PyPDF2.PdfFileReader.getPage>` method of the
+    :class:`PdfFileReader<PyPDF2.PdfFileReader>` class, but it is
+    also possible to create an empty page with the
+    :meth:`createBlankPage()<PageObject.createBlankPage>` static method.
+
+    :param pdf: PDF file the page belongs to.
+    :param indirectRef: Stores the original indirect reference to
+        this object in its source PDF
+    """
+    def __init__(self, pdf=None, indirectRef=None):
+        DictionaryObject.__init__(self)
+        self.pdf = pdf
+        self.indirectRef = indirectRef
+
+    def createBlankPage(pdf=None, width=None, height=None):
+        """
+        Returns a new blank page.
+        If ``width`` or ``height`` is ``None``, try to get the page size
+        from the last page of *pdf*.
+
+        :param pdf: PDF file the page belongs to
+        :param float width: The width of the new page expressed in default user
+            space units.
+        :param float height: The height of the new page expressed in default user
+            space units.
+        :return: the new blank page:
+        :rtype: :class:`PageObject<PageObject>`
+        :raises PageSizeNotDefinedError: if ``pdf`` is ``None`` or contains
+            no page
+        """
+        page = PageObject(pdf)
+
+        # Creates a new page (cf PDF Reference  7.7.3.3)
+        page.__setitem__(NameObject('/Type'), NameObject('/Page'))
+        page.__setitem__(NameObject('/Parent'), NullObject())
+        page.__setitem__(NameObject('/Resources'), DictionaryObject())
+        if width is None or height is None:
+            if pdf is not None and pdf.getNumPages() > 0:
+                lastpage = pdf.getPage(pdf.getNumPages() - 1)
+                width = lastpage.mediaBox.getWidth()
+                height = lastpage.mediaBox.getHeight()
+            else:
+                raise utils.PageSizeNotDefinedError()
+        page.__setitem__(NameObject('/MediaBox'),
+            RectangleObject([0, 0, width, height]))
+
+        return page
+    createBlankPage = staticmethod(createBlankPage)
+
+    def rotateClockwise(self, angle):
+        """
+        Rotates a page clockwise by increments of 90 degrees.
+
+        :param int angle: Angle to rotate the page.  Must be an increment
+            of 90 deg.
+        """
+        assert angle % 90 == 0
+        self._rotate(angle)
+        return self
+
+    def rotateCounterClockwise(self, angle):
+        """
+        Rotates a page counter-clockwise by increments of 90 degrees.
+
+        :param int angle: Angle to rotate the page.  Must be an increment
+            of 90 deg.
+        """
+        assert angle % 90 == 0
+        self._rotate(-angle)
+        return self
+
+    def _rotate(self, angle):
+        rotateObj = self.get("/Rotate", 0)
+        currentAngle = rotateObj if isinstance(rotateObj, int) else rotateObj.getObject()
+        self[NameObject("/Rotate")] = NumberObject(currentAngle + angle)
+
+    def _mergeResources(res1, res2, resource):
+        newRes = DictionaryObject()
+        newRes.update(res1.get(resource, DictionaryObject()).getObject())
+        page2Res = res2.get(resource, DictionaryObject()).getObject()
+        renameRes = {}
+        for key in list(page2Res.keys()):
+            if key in newRes and newRes.raw_get(key) != page2Res.raw_get(key):
+                newname = NameObject(key + str(uuid.uuid4()))
+                renameRes[key] = newname
+                newRes[newname] = page2Res[key]
+            elif key not in newRes:
+                newRes[key] = page2Res.raw_get(key)
+        return newRes, renameRes
+    _mergeResources = staticmethod(_mergeResources)
+
+    def _contentStreamRename(stream, rename, pdf):
+        if not rename:
+            return stream
+        stream = ContentStream(stream, pdf)
+        for operands, _operator in stream.operations:
+            for i in range(len(operands)):
+                op = operands[i]
+                if isinstance(op, NameObject):
+                    operands[i] = rename.get(op,op)
+        return stream
+    _contentStreamRename = staticmethod(_contentStreamRename)
+
+    def _pushPopGS(contents, pdf):
+        # adds a graphics state "push" and "pop" to the beginning and end
+        # of a content stream.  This isolates it from changes such as
+        # transformation matricies.
+        stream = ContentStream(contents, pdf)
+        stream.operations.insert(0, [[], "q"])
+        stream.operations.append([[], "Q"])
+        return stream
+    _pushPopGS = staticmethod(_pushPopGS)
+
+    def _addTransformationMatrix(contents, pdf, ctm):
+        # adds transformation matrix at the beginning of the given
+        # contents stream.
+        a, b, c, d, e, f = ctm
+        contents = ContentStream(contents, pdf)
+        contents.operations.insert(0, [[FloatObject(a), FloatObject(b),
+            FloatObject(c), FloatObject(d), FloatObject(e),
+            FloatObject(f)], " cm"])
+        return contents
+    _addTransformationMatrix = staticmethod(_addTransformationMatrix)
+
+    def getContents(self):
+        """
+        Accesses the page contents.
+
+        :return: the ``/Contents`` object, or ``None`` if it doesn't exist.
+            ``/Contents`` is optional, as described in PDF Reference  7.7.3.3
+        """
+        if "/Contents" in self:
+            return self["/Contents"].getObject()
+        else:
+            return None
+
+    def mergePage(self, page2):
+        """
+        Merges the content streams of two pages into one.  Resource references
+        (i.e. fonts) are maintained from both pages.  The mediabox/cropbox/etc
+        of this page are not altered.  The parameter page's content stream will
+        be added to the end of this page's content stream, meaning that it will
+        be drawn after, or "on top" of this page.
+
+        :param PageObject page2: The page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        """
+        self._mergePage(page2)
+
+    def _mergePage(self, page2, page2transformation=None, ctm=None, expand=False):
+        # First we work on merging the resource dictionaries.  This allows us
+        # to find out what symbols in the content streams we might need to
+        # rename.
+
+        newResources = DictionaryObject()
+        rename = {}
+        originalResources = self["/Resources"].getObject()
+        page2Resources = page2["/Resources"].getObject()
+        newAnnots = ArrayObject()
+
+        for page in (self, page2):
+            if "/Annots" in page:
+                annots = page["/Annots"]
+                if isinstance(annots, ArrayObject):
+                    for ref in annots:
+                        newAnnots.append(ref)
+
+        for res in "/ExtGState", "/Font", "/XObject", "/ColorSpace", "/Pattern", "/Shading", "/Properties":
+            new, newrename = PageObject._mergeResources(originalResources, page2Resources, res)
+            if new:
+                newResources[NameObject(res)] = new
+                rename.update(newrename)
+
+        # Combine /ProcSet sets.
+        newResources[NameObject("/ProcSet")] = ArrayObject(
+            frozenset(originalResources.get("/ProcSet", ArrayObject()).getObject()).union(
+                frozenset(page2Resources.get("/ProcSet", ArrayObject()).getObject())
+            )
+        )
+
+        newContentArray = ArrayObject()
+
+        originalContent = self.getContents()
+        if originalContent is not None:
+            newContentArray.append(PageObject._pushPopGS(
+                  originalContent, self.pdf))
+
+        page2Content = page2.getContents()
+        if page2Content is not None:
+            if page2transformation is not None:
+                page2Content = page2transformation(page2Content)
+            page2Content = PageObject._contentStreamRename(
+                page2Content, rename, self.pdf)
+            page2Content = PageObject._pushPopGS(page2Content, self.pdf)
+            newContentArray.append(page2Content)
+
+        # if expanding the page to fit a new page, calculate the new media box size
+        if expand:
+            corners1 = [self.mediaBox.getLowerLeft_x().as_numeric(), self.mediaBox.getLowerLeft_y().as_numeric(),
+                        self.mediaBox.getUpperRight_x().as_numeric(), self.mediaBox.getUpperRight_y().as_numeric()]
+            corners2 = [page2.mediaBox.getLowerLeft_x().as_numeric(), page2.mediaBox.getLowerLeft_y().as_numeric(),
+                        page2.mediaBox.getUpperLeft_x().as_numeric(), page2.mediaBox.getUpperLeft_y().as_numeric(),
+                        page2.mediaBox.getUpperRight_x().as_numeric(), page2.mediaBox.getUpperRight_y().as_numeric(),
+                        page2.mediaBox.getLowerRight_x().as_numeric(), page2.mediaBox.getLowerRight_y().as_numeric()]
+            if ctm is not None:
+                ctm = [float(x) for x in ctm]
+                new_x = [ctm[0]*corners2[i] + ctm[2]*corners2[i+1] + ctm[4] for i in range(0, 8, 2)]
+                new_y = [ctm[1]*corners2[i] + ctm[3]*corners2[i+1] + ctm[5] for i in range(0, 8, 2)]
+            else:
+                new_x = corners2[0:8:2]
+                new_y = corners2[1:8:2]
+            lowerleft = [min(new_x), min(new_y)]
+            upperright = [max(new_x), max(new_y)]
+            lowerleft = [min(corners1[0], lowerleft[0]), min(corners1[1], lowerleft[1])]
+            upperright = [max(corners1[2], upperright[0]), max(corners1[3], upperright[1])]
+
+            self.mediaBox.setLowerLeft(lowerleft)
+            self.mediaBox.setUpperRight(upperright)
+
+        self[NameObject('/Contents')] = ContentStream(newContentArray, self.pdf)
+        self[NameObject('/Resources')] = newResources
+        self[NameObject('/Annots')] = newAnnots
+
+    def mergeTransformedPage(self, page2, ctm, expand=False):
+        """
+        This is similar to mergePage, but a transformation matrix is
+        applied to the merged stream.
+
+        :param PageObject page2: The page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param tuple ctm: a 6-element tuple containing the operands of the
+            transformation matrix
+        :param bool expand: Whether the page should be expanded to fit the dimensions
+            of the page to be merged.
+        """
+        self._mergePage(page2, lambda page2Content:
+            PageObject._addTransformationMatrix(page2Content, page2.pdf, ctm), ctm, expand)
+
+    def mergeScaledPage(self, page2, scale, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is scaled
+        by appling a transformation matrix.
+
+        :param PageObject page2: The page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float scale: The scaling factor
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+        # CTM to scale : [ sx 0 0 sy 0 0 ]
+        return self.mergeTransformedPage(page2, [scale, 0,
+                                                 0,      scale,
+                                                 0,      0], expand)
+
+    def mergeRotatedPage(self, page2, rotation, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is rotated
+        by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float rotation: The angle of the rotation, in degrees
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+        rotation = math.radians(rotation)
+        return self.mergeTransformedPage(page2,
+            [math.cos(rotation),  math.sin(rotation),
+             -math.sin(rotation), math.cos(rotation),
+             0,                   0], expand)
+
+    def mergeTranslatedPage(self, page2, tx, ty, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is translated
+        by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float tx: The translation on X axis
+        :param float ty: The translation on Y axis
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+        return self.mergeTransformedPage(page2, [1,  0,
+                                                 0,  1,
+                                                 tx, ty], expand)
+
+    def mergeRotatedTranslatedPage(self, page2, rotation, tx, ty, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is rotated
+        and translated by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float tx: The translation on X axis
+        :param float ty: The translation on Y axis
+        :param float rotation: The angle of the rotation, in degrees
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+
+        translation = [[1, 0, 0],
+                       [0, 1, 0],
+                       [-tx, -ty, 1]]
+        rotation = math.radians(rotation)
+        rotating = [[math.cos(rotation), math.sin(rotation), 0],
+                    [-math.sin(rotation), math.cos(rotation), 0],
+                    [0,                  0,                  1]]
+        rtranslation = [[1, 0, 0],
+                       [0, 1, 0],
+                       [tx, ty, 1]]
+        ctm = utils.matrixMultiply(translation, rotating)
+        ctm = utils.matrixMultiply(ctm, rtranslation)
+
+        return self.mergeTransformedPage(page2, [ctm[0][0], ctm[0][1],
+                                                 ctm[1][0], ctm[1][1],
+                                                 ctm[2][0], ctm[2][1]], expand)
+
+    def mergeRotatedScaledPage(self, page2, rotation, scale, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is rotated
+        and scaled by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float rotation: The angle of the rotation, in degrees
+        :param float scale: The scaling factor
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+        rotation = math.radians(rotation)
+        rotating = [[math.cos(rotation), math.sin(rotation), 0],
+                    [-math.sin(rotation), math.cos(rotation), 0],
+                    [0,                  0,                  1]]
+        scaling = [[scale, 0,    0],
+                   [0,    scale, 0],
+                   [0,    0,    1]]
+        ctm = utils.matrixMultiply(rotating, scaling)
+
+        return self.mergeTransformedPage(page2,
+                                         [ctm[0][0], ctm[0][1],
+                                          ctm[1][0], ctm[1][1],
+                                          ctm[2][0], ctm[2][1]], expand)
+
+    def mergeScaledTranslatedPage(self, page2, scale, tx, ty, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is translated
+        and scaled by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float scale: The scaling factor
+        :param float tx: The translation on X axis
+        :param float ty: The translation on Y axis
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+
+        translation = [[1, 0, 0],
+                       [0, 1, 0],
+                       [tx, ty, 1]]
+        scaling = [[scale, 0,    0],
+                   [0,    scale, 0],
+                   [0,    0,    1]]
+        ctm = utils.matrixMultiply(scaling, translation)
+
+        return self.mergeTransformedPage(page2, [ctm[0][0], ctm[0][1],
+                                                 ctm[1][0], ctm[1][1],
+                                                 ctm[2][0], ctm[2][1]], expand)
+
+    def mergeRotatedScaledTranslatedPage(self, page2, rotation, scale, tx, ty, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is translated,
+        rotated and scaled by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float tx: The translation on X axis
+        :param float ty: The translation on Y axis
+        :param float rotation: The angle of the rotation, in degrees
+        :param float scale: The scaling factor
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+        translation = [[1, 0, 0],
+                       [0, 1, 0],
+                       [tx, ty, 1]]
+        rotation = math.radians(rotation)
+        rotating = [[math.cos(rotation), math.sin(rotation), 0],
+                    [-math.sin(rotation), math.cos(rotation), 0],
+                    [0,                  0,                  1]]
+        scaling = [[scale, 0,    0],
+                   [0,    scale, 0],
+                   [0,    0,    1]]
+        ctm = utils.matrixMultiply(rotating, scaling)
+        ctm = utils.matrixMultiply(ctm, translation)
+
+        return self.mergeTransformedPage(page2, [ctm[0][0], ctm[0][1],
+                                                 ctm[1][0], ctm[1][1],
+                                                 ctm[2][0], ctm[2][1]], expand)
+
+    ##
+    # Applys a transformation matrix the page.
+    #
+    # @param ctm   A 6 elements tuple containing the operands of the
+    #              transformation matrix
+    def addTransformation(self, ctm):
+        """
+        Applies a transformation matrix to the page.
+
+        :param tuple ctm: A 6-element tuple containing the operands of the
+            transformation matrix.
+        """
+        originalContent = self.getContents()
+        if originalContent is not None:
+            newContent = PageObject._addTransformationMatrix(
+                originalContent, self.pdf, ctm)
+            newContent = PageObject._pushPopGS(newContent, self.pdf)
+            self[NameObject('/Contents')] = newContent
+
+    def scale(self, sx, sy):
+        """
+        Scales a page by the given factors by appling a transformation
+        matrix to its content and updating the page size.
+
+        :param float sx: The scaling factor on horizontal axis.
+        :param float sy: The scaling factor on vertical axis.
+        """
+        self.addTransformation([sx, 0,
+                                0,  sy,
+                                0,  0])
+        self.mediaBox = RectangleObject([
+            float(self.mediaBox.getLowerLeft_x()) * sx,
+            float(self.mediaBox.getLowerLeft_y()) * sy,
+            float(self.mediaBox.getUpperRight_x()) * sx,
+            float(self.mediaBox.getUpperRight_y()) * sy])
+        if "/VP" in self:
+            viewport = self["/VP"]
+            if isinstance(viewport, ArrayObject):
+                bbox = viewport[0]["/BBox"]
+            else:
+                bbox = viewport["/BBox"]
+            scaled_bbox = RectangleObject([
+                float(bbox[0]) * sx,
+                float(bbox[1]) * sy,
+                float(bbox[2]) * sx,
+                float(bbox[3]) * sy])
+            if isinstance(viewport, ArrayObject):
+                self[NameObject("/VP")][NumberObject(0)][NameObject("/BBox")] = scaled_bbox
+            else:
+                self[NameObject("/VP")][NameObject("/BBox")] = scaled_bbox
+
+    def scaleBy(self, factor):
+        """
+        Scales a page by the given factor by appling a transformation
+        matrix to its content and updating the page size.
+
+        :param float factor: The scaling factor (for both X and Y axis).
+        """
+        self.scale(factor, factor)
+
+    def scaleTo(self, width, height):
+        """
+        Scales a page to the specified dimentions by appling a
+        transformation matrix to its content and updating the page size.
+
+        :param float width: The new width.
+        :param float height: The new heigth.
+        """
+        sx = width / float(self.mediaBox.getUpperRight_x() -
+                      self.mediaBox.getLowerLeft_x ())
+        sy = height / float(self.mediaBox.getUpperRight_y() -
+                       self.mediaBox.getLowerLeft_y ())
+        self.scale(sx, sy)
+
+    def compressContentStreams(self):
+        """
+        Compresses the size of this page by joining all content streams and
+        applying a FlateDecode filter.
+
+        However, it is possible that this function will perform no action if
+        content stream compression becomes "automatic" for some reason.
+        """
+        content = self.getContents()
+        if content is not None:
+            if not isinstance(content, ContentStream):
+                content = ContentStream(content, self.pdf)
+            self[NameObject("/Contents")] = content.flateEncode()
+
+    def extractText(self):
+        """
+        Locate all text drawing commands, in the order they are provided in the
+        content stream, and extract the text.  This works well for some PDF
+        files, but poorly for others, depending on the generator used.  This will
+        be refined in the future.  Do not rely on the order of text coming out of
+        this function, as it will change if this function is made more
+        sophisticated.
+
+        :return: a unicode string object.
+        """
+        text = u_("")
+        content = self["/Contents"].getObject()
+        if not isinstance(content, ContentStream):
+            content = ContentStream(content, self.pdf)
+        # Note: we check all strings are TextStringObjects.  ByteStringObjects
+        # are strings where the byte->string encoding was unknown, so adding
+        # them to the text here would be gibberish.
+        for operands, operator in content.operations:
+            if operator == b_("Tj"):
+                _text = operands[0]
+                if isinstance(_text, TextStringObject):
+                    text += _text
+                    text += "\n"
+            elif operator == b_("T*"):
+                text += "\n"
+            elif operator == b_("'"):
+                text += "\n"
+                _text = operands[0]
+                if isinstance(_text, TextStringObject):
+                    text += operands[0]
+            elif operator == b_('"'):
+                _text = operands[2]
+                if isinstance(_text, TextStringObject):
+                    text += "\n"
+                    text += _text
+            elif operator == b_("TJ"):
+                for i in operands[0]:
+                    if isinstance(i, TextStringObject):
+                        text += " "
+                        text += i
+                text += "\n"
+        return text
+
+    mediaBox = createRectangleAccessor("/MediaBox", ())
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the boundaries of the physical medium on which the page is
+    intended to be displayed or printed.
+    """
+
+    cropBox = createRectangleAccessor("/CropBox", ("/MediaBox",))
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the visible region of default user space.  When the page is
+    displayed or printed, its contents are to be clipped (cropped) to this
+    rectangle and then imposed on the output medium in some
+    implementation-defined manner.  Default value: same as :attr:`mediaBox<mediaBox>`.
+    """
+
+    bleedBox = createRectangleAccessor("/BleedBox", ("/CropBox", "/MediaBox"))
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the region to which the contents of the page should be clipped
+    when output in a production enviroment.
+    """
+
+    trimBox = createRectangleAccessor("/TrimBox", ("/CropBox", "/MediaBox"))
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the intended dimensions of the finished page after trimming.
+    """
+
+    artBox = createRectangleAccessor("/ArtBox", ("/CropBox", "/MediaBox"))
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the extent of the page's meaningful content as intended by the
+    page's creator.
+    """
+
+
+class ContentStream(DecodedStreamObject):
+    def __init__(self, stream, pdf):
+        self.pdf = pdf
+        self.operations = []
+        # stream may be a StreamObject or an ArrayObject containing
+        # multiple StreamObjects to be cat'd together.
+        stream = stream.getObject()
+        if isinstance(stream, ArrayObject):
+            data = b_("")
+            for s in stream:
+                data += b_(s.getObject().getData())
+            stream = BytesIO(b_(data))
+        else:
+            stream = BytesIO(b_(stream.getData()))
+        self.__parseContentStream(stream)
+
+    def __parseContentStream(self, stream):
+        # file("f:\\tmp.txt", "w").write(stream.read())
+        stream.seek(0, 0)
+        operands = []
+        while True:
+            peek = readNonWhitespace(stream)
+            if peek == b_('') or ord_(peek) == 0:
+                break
+            stream.seek(-1, 1)
+            if peek.isalpha() or peek == b_("'") or peek == b_('"'):
+                operator = utils.readUntilRegex(stream,
+                                                NameObject.delimiterPattern, True)
+                if operator == b_("BI"):
+                    # begin inline image - a completely different parsing
+                    # mechanism is required, of course... thanks buddy...
+                    assert operands == []
+                    ii = self._readInlineImage(stream)
+                    self.operations.append((ii, b_("INLINE IMAGE")))
+                else:
+                    self.operations.append((operands, operator))
+                    operands = []
+            elif peek == b_('%'):
+                # If we encounter a comment in the content stream, we have to
+                # handle it here.  Typically, readObject will handle
+                # encountering a comment -- but readObject assumes that
+                # following the comment must be the object we're trying to
+                # read.  In this case, it could be an operator instead.
+                while peek not in (b_('\r'), b_('\n')):
+                    peek = stream.read(1)
+            else:
+                operands.append(readObject(stream, None))
+
+    def _readInlineImage(self, stream):
+        # begin reading just after the "BI" - begin image
+        # first read the dictionary of settings.
+        settings = DictionaryObject()
+        while True:
+            tok = readNonWhitespace(stream)
+            stream.seek(-1, 1)
+            if tok == b_("I"):
+                # "ID" - begin of image data
+                break
+            key = readObject(stream, self.pdf)
+            tok = readNonWhitespace(stream)
+            stream.seek(-1, 1)
+            value = readObject(stream, self.pdf)
+            settings[key] = value
+        # left at beginning of ID
+        tmp = stream.read(3)
+        assert tmp[:2] == b_("ID")
+        data = b_("")
+        while True:
+            # Read the inline image, while checking for EI (End Image) operator.
+            tok = stream.read(1)
+            if tok == b_("E"):
+                # Check for End Image
+                tok2 = stream.read(1)
+                if tok2 == b_("I"):
+                    # Data can contain EI, so check for the Q operator.
+                    tok3 = stream.read(1)
+                    info = tok + tok2
+                    # We need to find whitespace between EI and Q.
+                    has_q_whitespace = False
+                    while tok3 in utils.WHITESPACES:
+                        has_q_whitespace = True
+                        info += tok3
+                        tok3 = stream.read(1)
+                    if tok3 == b_("Q") and has_q_whitespace:
+                        stream.seek(-1, 1)
+                        break
+                    else:
+                        stream.seek(-1,1)
+                        data += info
+                else:
+                    stream.seek(-1, 1)
+                    data += tok
+            else:
+                data += tok
+        return {"settings": settings, "data": data}
+
+    def _getData(self):
+        newdata = BytesIO()
+        for operands, operator in self.operations:
+            if operator == b_("INLINE IMAGE"):
+                newdata.write(b_("BI"))
+                dicttext = BytesIO()
+                operands["settings"].writeToStream(dicttext, None)
+                newdata.write(dicttext.getvalue()[2:-2])
+                newdata.write(b_("ID "))
+                newdata.write(operands["data"])
+                newdata.write(b_("EI"))
+            else:
+                for op in operands:
+                    op.writeToStream(newdata, None)
+                    newdata.write(b_(" "))
+                newdata.write(b_(operator))
+            newdata.write(b_("\n"))
+        return newdata.getvalue()
+
+    def _setData(self, value):
+        self.__parseContentStream(BytesIO(b_(value)))
+
+    _data = property(_getData, _setData)
+
+
+class DocumentInformation(DictionaryObject):
+    """
+    A class representing the basic document metadata provided in a PDF File.
+    This class is accessible through
+    :meth:`getDocumentInfo()<PyPDF2.PdfFileReader.getDocumentInfo()>`
+
+    All text properties of the document metadata have
+    *two* properties, eg. author and author_raw. The non-raw property will
+    always return a ``TextStringObject``, making it ideal for a case where
+    the metadata is being displayed. The raw property can sometimes return
+    a ``ByteStringObject``, if PyPDF2 was unable to decode the string's
+    text encoding; this requires additional safety in the caller and
+    therefore is not as commonly accessed.
+    """
+
+    def __init__(self):
+        DictionaryObject.__init__(self)
+
+    def getText(self, key):
+        retval = self.get(key, None)
+        if isinstance(retval, TextStringObject):
+            return retval
+        return None
+
+    title = property(lambda self: self.getText("/Title"))
+    """Read-only property accessing the document's **title**.
+    Returns a unicode string (``TextStringObject``) or ``None``
+    if the title is not specified."""
+    title_raw = property(lambda self: self.get("/Title"))
+    """The "raw" version of title; can return a ``ByteStringObject``."""
+
+    author = property(lambda self: self.getText("/Author"))
+    """Read-only property accessing the document's **author**.
+    Returns a unicode string (``TextStringObject``) or ``None``
+    if the author is not specified."""
+    author_raw = property(lambda self: self.get("/Author"))
+    """The "raw" version of author; can return a ``ByteStringObject``."""
+
+    subject = property(lambda self: self.getText("/Subject"))
+    """Read-only property accessing the document's **subject**.
+    Returns a unicode string (``TextStringObject``) or ``None``
+    if the subject is not specified."""
+    subject_raw = property(lambda self: self.get("/Subject"))
+    """The "raw" version of subject; can return a ``ByteStringObject``."""
+
+    creator = property(lambda self: self.getText("/Creator"))
+    """Read-only property accessing the document's **creator**. If the
+    document was converted to PDF from another format, this is the name of the
+    application (e.g. OpenOffice) that created the original document from
+    which it was converted. Returns a unicode string (``TextStringObject``)
+    or ``None`` if the creator is not specified."""
+    creator_raw = property(lambda self: self.get("/Creator"))
+    """The "raw" version of creator; can return a ``ByteStringObject``."""
+
+    producer = property(lambda self: self.getText("/Producer"))
+    """Read-only property accessing the document's **producer**.
+    If the document was converted to PDF from another format, this is
+    the name of the application (for example, OSX Quartz) that converted
+    it to PDF. Returns a unicode string (``TextStringObject``)
+    or ``None`` if the producer is not specified."""
+    producer_raw = property(lambda self: self.get("/Producer"))
+    """The "raw" version of producer; can return a ``ByteStringObject``."""
+
+
+def convertToInt(d, size):
+    if size > 8:
+        raise utils.PdfReadError("invalid size in convertToInt")
+    d = b_("\x00\x00\x00\x00\x00\x00\x00\x00") + b_(d)
+    d = d[-8:]
+    return struct.unpack(">q", d)[0]
+
+# ref: pdf1.8 spec section 3.5.2 algorithm 3.2
+_encryption_padding = b_('\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56') + \
+        b_('\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c') + \
+        b_('\xa9\xfe\x64\x53\x69\x7a')
+
+
+# Implementation of algorithm 3.2 of the PDF standard security handler,
+# section 3.5.2 of the PDF 1.6 reference.
+def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True):
+    # 1. Pad or truncate the password string to exactly 32 bytes.  If the
+    # password string is more than 32 bytes long, use only its first 32 bytes;
+    # if it is less than 32 bytes long, pad it by appending the required number
+    # of additional bytes from the beginning of the padding string
+    # (_encryption_padding).
+    password = b_((str_(password) + str_(_encryption_padding))[:32])
+    # 2. Initialize the MD5 hash function and pass the result of step 1 as
+    # input to this function.
+    import struct
+    m = md5(password)
+    # 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash
+    # function.
+    m.update(owner_entry.original_bytes)
+    # 4. Treat the value of the /P entry as an unsigned 4-byte integer and pass
+    # these bytes to the MD5 hash function, low-order byte first.
+    p_entry = struct.pack('<i', p_entry)
+    m.update(p_entry)
+    # 5. Pass the first element of the file's file identifier array to the MD5
+    # hash function.
+    m.update(id1_entry.original_bytes)
+    # 6. (Revision 3 or greater) If document metadata is not being encrypted,
+    # pass 4 bytes with the value 0xFFFFFFFF to the MD5 hash function.
+    if rev >= 3 and not metadata_encrypt:
+        m.update(b_("\xff\xff\xff\xff"))
+    # 7. Finish the hash.
+    md5_hash = m.digest()
+    # 8. (Revision 3 or greater) Do the following 50 times: Take the output
+    # from the previous MD5 hash and pass the first n bytes of the output as
+    # input into a new MD5 hash, where n is the number of bytes of the
+    # encryption key as defined by the value of the encryption dictionary's
+    # /Length entry.
+    if rev >= 3:
+        for _ in range(50):
+            md5_hash = md5(md5_hash[:keylen]).digest()
+    # 9. Set the encryption key to the first n bytes of the output from the
+    # final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or
+    # greater, depends on the value of the encryption dictionary's /Length
+    # entry.
+    return md5_hash[:keylen]
+
+
+# Implementation of algorithm 3.3 of the PDF standard security handler,
+# section 3.5.2 of the PDF 1.6 reference.
+def _alg33(owner_pwd, user_pwd, rev, keylen):
+    # steps 1 - 4
+    key = _alg33_1(owner_pwd, rev, keylen)
+    # 5. Pad or truncate the user password string as described in step 1 of
+    # algorithm 3.2.
+    user_pwd = b_((user_pwd + str_(_encryption_padding))[:32])
+    # 6. Encrypt the result of step 5, using an RC4 encryption function with
+    # the encryption key obtained in step 4.
+    val = utils.RC4_encrypt(key, user_pwd)
+    # 7. (Revision 3 or greater) Do the following 19 times: Take the output
+    # from the previous invocation of the RC4 function and pass it as input to
+    # a new invocation of the function; use an encryption key generated by
+    # taking each byte of the encryption key obtained in step 4 and performing
+    # an XOR operation between that byte and the single-byte value of the
+    # iteration counter (from 1 to 19).
+    if rev >= 3:
+        for i in range(1, 20):
+            new_key = ''
+            for l in range(len(key)):
+                new_key += chr(ord_(key[l]) ^ i)
+            val = utils.RC4_encrypt(new_key, val)
+    # 8. Store the output from the final invocation of the RC4 as the value of
+    # the /O entry in the encryption dictionary.
+    return val
+
+
+# Steps 1-4 of algorithm 3.3
+def _alg33_1(password, rev, keylen):
+    # 1. Pad or truncate the owner password string as described in step 1 of
+    # algorithm 3.2.  If there is no owner password, use the user password
+    # instead.
+    password = b_((password + str_(_encryption_padding))[:32])
+    # 2. Initialize the MD5 hash function and pass the result of step 1 as
+    # input to this function.
+    m = md5(password)
+    # 3. (Revision 3 or greater) Do the following 50 times: Take the output
+    # from the previous MD5 hash and pass it as input into a new MD5 hash.
+    md5_hash = m.digest()
+    if rev >= 3:
+        for _ in range(50):
+            md5_hash = md5(md5_hash).digest()
+    # 4. Create an RC4 encryption key using the first n bytes of the output
+    # from the final MD5 hash, where n is always 5 for revision 2 but, for
+    # revision 3 or greater, depends on the value of the encryption
+    # dictionary's /Length entry.
+    key = md5_hash[:keylen]
+    return key
+
+
+# Implementation of algorithm 3.4 of the PDF standard security handler,
+# section 3.5.2 of the PDF 1.6 reference.
+def _alg34(password, owner_entry, p_entry, id1_entry):
+    # 1. Create an encryption key based on the user password string, as
+    # described in algorithm 3.2.
+    key = _alg32(password, 2, 5, owner_entry, p_entry, id1_entry)
+    # 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2,
+    # using an RC4 encryption function with the encryption key from the
+    # preceding step.
+    U = utils.RC4_encrypt(key, _encryption_padding)
+    # 3. Store the result of step 2 as the value of the /U entry in the
+    # encryption dictionary.
+    return U, key
+
+
+# Implementation of algorithm 3.4 of the PDF standard security handler,
+# section 3.5.2 of the PDF 1.6 reference.
+def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt):
+    # 1. Create an encryption key based on the user password string, as
+    # described in Algorithm 3.2.
+    key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
+    # 2. Initialize the MD5 hash function and pass the 32-byte padding string
+    # shown in step 1 of Algorithm 3.2 as input to this function.
+    m = md5()
+    m.update(_encryption_padding)
+    # 3. Pass the first element of the file's file identifier array (the value
+    # of the ID entry in the document's trailer dictionary; see Table 3.13 on
+    # page 73) to the hash function and finish the hash.  (See implementation
+    # note 25 in Appendix H.)
+    m.update(id1_entry.original_bytes)
+    md5_hash = m.digest()
+    # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption
+    # function with the encryption key from step 1.
+    val = utils.RC4_encrypt(key, md5_hash)
+    # 5. Do the following 19 times: Take the output from the previous
+    # invocation of the RC4 function and pass it as input to a new invocation
+    # of the function; use an encryption key generated by taking each byte of
+    # the original encryption key (obtained in step 2) and performing an XOR
+    # operation between that byte and the single-byte value of the iteration
+    # counter (from 1 to 19).
+    for i in range(1, 20):
+        new_key = b_('')
+        for k in key:
+            new_key += b_(chr(ord_(k) ^ i))
+        val = utils.RC4_encrypt(new_key, val)
+    # 6. Append 16 bytes of arbitrary padding to the output from the final
+    # invocation of the RC4 function and store the 32-byte result as the value
+    # of the U entry in the encryption dictionary.
+    # (implementator note: I don't know what "arbitrary padding" is supposed to
+    # mean, so I have used null bytes.  This seems to match a few other
+    # people's implementations)
+    return val + (b_('\x00') * 16), key

+ 311 - 0
need_package/PyPDF2-master/PyPDF2/utils.py

@@ -0,0 +1,311 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Utility functions for PDF library.
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+
+import sys
+
+try:
+    import __builtin__ as builtins
+except ImportError:  # Py3
+    import builtins
+
+
+xrange_fn = getattr(builtins, "xrange", range)
+_basestring = getattr(builtins, "basestring", str)
+
+bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
+string_type = getattr(builtins, "unicode", str)
+int_types = (int, long) if sys.version_info[0] < 3 else (int,)
+
+
+# Make basic type tests more consistent
+def isString(s):
+    """Test if arg is a string. Compatible with Python 2 and 3."""
+    return isinstance(s, _basestring)
+
+
+def isInt(n):
+    """Test if arg is an int. Compatible with Python 2 and 3."""
+    return isinstance(n, int_types)
+
+
+def isBytes(b):
+    """Test if arg is a bytes instance. Compatible with Python 2 and 3."""
+    import warnings
+    warnings.warn("PyPDF2.utils.isBytes will be deprecated", DeprecationWarning)
+    return isinstance(b, bytes_type)
+
+
+#custom implementation of warnings.formatwarning
+def formatWarning(message, category, filename, lineno, line=None):
+    file = filename.replace("/", "\\").rsplit("\\", 1)[-1] # find the file name
+    return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
+
+
+def readUntilWhitespace(stream, maxchars=None):
+    """
+    Reads non-whitespace characters and returns them.
+    Stops upon encountering whitespace or when maxchars is reached.
+    """
+    txt = b_("")
+    while True:
+        tok = stream.read(1)
+        if tok.isspace() or not tok:
+            break
+        txt += tok
+        if len(txt) == maxchars:
+            break
+    return txt
+
+
+def readNonWhitespace(stream):
+    """
+    Finds and reads the next non-whitespace character (ignores whitespace).
+    """
+    tok = WHITESPACES[0]
+    while tok in WHITESPACES:
+        tok = stream.read(1)
+    return tok
+
+
+def skipOverWhitespace(stream):
+    """
+    Similar to readNonWhitespace, but returns a Boolean if more than
+    one whitespace character was read.
+    """
+    tok = WHITESPACES[0]
+    cnt = 0;
+    while tok in WHITESPACES:
+        tok = stream.read(1)
+        cnt+=1
+    return (cnt > 1)
+
+
+def skipOverComment(stream):
+    tok = stream.read(1)
+    stream.seek(-1, 1)
+    if tok == b_('%'):
+        while tok not in (b_('\n'), b_('\r')):
+            tok = stream.read(1)
+
+
+def readUntilRegex(stream, regex, ignore_eof=False):
+    """
+    Reads until the regular expression pattern matched (ignore the match)
+    Raise PdfStreamError on premature end-of-file.
+    :param bool ignore_eof: If true, ignore end-of-line and return immediately
+    """
+    name = b_('')
+    while True:
+        tok = stream.read(16)
+        if not tok:
+            # stream has truncated prematurely
+            if ignore_eof == True:
+                return name
+            else:
+                raise PdfStreamError("Stream has ended unexpectedly")
+        m = regex.search(tok)
+        if m is not None:
+            name += tok[:m.start()]
+            stream.seek(m.start()-len(tok), 1)
+            break
+        name += tok
+    return name
+
+
+class ConvertFunctionsToVirtualList(object):
+    def __init__(self, lengthFunction, getFunction):
+        self.lengthFunction = lengthFunction
+        self.getFunction = getFunction
+
+    def __len__(self):
+        return self.lengthFunction()
+
+    def __getitem__(self, index):
+        if isinstance(index, slice):
+            indices = xrange_fn(*index.indices(len(self)))
+            cls = type(self)
+            return cls(indices.__len__, lambda idx: self[indices[idx]])
+        if not isInt(index):
+            raise TypeError("sequence indices must be integers")
+        len_self = len(self)
+        if index < 0:
+            # support negative indexes
+            index = len_self + index
+        if index < 0 or index >= len_self:
+            raise IndexError("sequence index out of range")
+        return self.getFunction(index)
+
+
+def RC4_encrypt(key, plaintext):
+    S = [i for i in range(256)]
+    j = 0
+    for i in range(256):
+        j = (j + S[i] + ord_(key[i % len(key)])) % 256
+        S[i], S[j] = S[j], S[i]
+    i, j = 0, 0
+    retval = []
+    for x in range(len(plaintext)):
+        i = (i + 1) % 256
+        j = (j + S[i]) % 256
+        S[i], S[j] = S[j], S[i]
+        t = S[(S[i] + S[j]) % 256]
+        retval.append(b_(chr(ord_(plaintext[x]) ^ t)))
+    return b_("").join(retval)
+
+
+def matrixMultiply(a, b):
+    return [[sum([float(i)*float(j)
+                  for i, j in zip(row, col)]
+                ) for col in zip(*b)]
+            for row in a]
+
+
+def markLocation(stream):
+    """Creates text file showing current location in context."""
+    # Mainly for debugging
+    RADIUS = 5000
+    stream.seek(-RADIUS, 1)
+    outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
+    outputDoc.write(stream.read(RADIUS))
+    outputDoc.write('HERE')
+    outputDoc.write(stream.read(RADIUS))
+    outputDoc.close()
+    stream.seek(-RADIUS, 1)
+
+
+class PyPdfError(Exception):
+    pass
+
+
+class PdfReadError(PyPdfError):
+    pass
+
+
+class PageSizeNotDefinedError(PyPdfError):
+    pass
+
+
+class PdfReadWarning(UserWarning):
+    pass
+
+
+class PdfStreamError(PdfReadError):
+    pass
+
+
+if sys.version_info[0] < 3:
+    def b_(s):
+        return s
+else:
+    B_CACHE = {}
+
+    def b_(s):
+        bc = B_CACHE
+        if s in bc:
+            return bc[s]
+        if type(s) == bytes:
+            return s
+        else:
+            r = s.encode('latin-1')
+            if len(s) < 2:
+                bc[s] = r
+            return r
+
+
+def u_(s):
+    if sys.version_info[0] < 3:
+        return unicode(s, 'unicode_escape')
+    else:
+        return s
+
+
+def str_(b):
+    if sys.version_info[0] < 3:
+        return b
+    else:
+        if type(b) == bytes:
+            return b.decode('latin-1')
+        else:
+            return b
+
+
+def ord_(b):
+    if sys.version_info[0] < 3 or type(b) == str:
+        return ord(b)
+    else:
+        return b
+
+
+def chr_(c):
+    if sys.version_info[0] < 3:
+        return c
+    else:
+        return chr(c)
+
+
+def barray(b):
+    if sys.version_info[0] < 3:
+        return b
+    else:
+        return bytearray(b)
+
+
+def hexencode(b):
+    if sys.version_info[0] < 3:
+        return b.encode('hex')
+    else:
+        import codecs
+        coder = codecs.getencoder('hex_codec')
+        return coder(b)[0]
+
+
+def hexStr(num):
+    return hex(num).replace('L', '')
+
+
+WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]
+
+
+def paethPredictor(left, up, up_left):
+    p = left + up - up_left
+    dist_left = abs(p - left)
+    dist_up = abs(p - up)
+    dist_up_left = abs(p - up_left)
+
+    if dist_left <= dist_up and dist_left <= dist_up_left:
+        return left
+    elif dist_up <= dist_up_left:
+        return up
+    else:
+        return up_left

+ 358 - 0
need_package/PyPDF2-master/PyPDF2/xmp.py

@@ -0,0 +1,358 @@
+import re
+import datetime
+import decimal
+from .generic import PdfObject
+from xml.dom import getDOMImplementation
+from xml.dom.minidom import parseString
+from .utils import u_
+
+RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
+XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
+PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
+XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
+
+# What is the PDFX namespace, you might ask?  I might ask that too.  It's
+# a completely undocumented namespace used to place "custom metadata"
+# properties, which are arbitrary metadata properties with no semantic or
+# documented meaning.  Elements in the namespace are key/value-style storage,
+# where the element name is the key and the content is the value.  The keys
+# are transformed into valid XML identifiers by substituting an invalid
+# identifier character with \u2182 followed by the unicode hex ID of the
+# original character.  A key like "my car" is therefore "my\u21820020car".
+#
+# \u2182, in case you're wondering, is the unicode character
+# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
+# escaping characters.
+#
+# Intentional users of the pdfx namespace should be shot on sight.  A
+# custom data schema and sensical XML elements could be used instead, as is
+# suggested by Adobe's own documentation on XMP (under "Extensibility of
+# Schemas").
+#
+# Information presented here on the /pdfx/ schema is a result of limited
+# reverse engineering, and does not constitute a full specification.
+PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
+
+iso8601 = re.compile("""
+        (?P<year>[0-9]{4})
+        (-
+            (?P<month>[0-9]{2})
+            (-
+                (?P<day>[0-9]+)
+                (T
+                    (?P<hour>[0-9]{2}):
+                    (?P<minute>[0-9]{2})
+                    (:(?P<second>[0-9]{2}(.[0-9]+)?))?
+                    (?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
+                )?
+            )?
+        )?
+        """, re.VERBOSE)
+
+
+class XmpInformation(PdfObject):
+    """
+    An object that represents Adobe XMP metadata.
+    Usually accessed by :meth:`getXmpMetadata()<PyPDF2.PdfFileReader.getXmpMetadata>`
+    """
+
+    def __init__(self, stream):
+        self.stream = stream
+        docRoot = parseString(self.stream.getData())
+        self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
+        self.cache = {}
+
+    def writeToStream(self, stream, encryption_key):
+        self.stream.writeToStream(stream, encryption_key)
+
+    def getElement(self, aboutUri, namespace, name):
+        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
+            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
+                attr = desc.getAttributeNodeNS(namespace, name)
+                if attr != None:
+                    yield attr
+                for element in desc.getElementsByTagNameNS(namespace, name):
+                    yield element
+
+    def getNodesInNamespace(self, aboutUri, namespace):
+        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
+            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
+                for i in range(desc.attributes.length):
+                    attr = desc.attributes.item(i)
+                    if attr.namespaceURI == namespace:
+                        yield attr
+                for child in desc.childNodes:
+                    if child.namespaceURI == namespace:
+                        yield child
+
+    def _getText(self, element):
+        text = ""
+        for child in element.childNodes:
+            if child.nodeType == child.TEXT_NODE:
+                text += child.data
+        return text
+
+    def _converter_string(value):
+        return value
+
+    def _converter_date(value):
+        m = iso8601.match(value)
+        year = int(m.group("year"))
+        month = int(m.group("month") or "1")
+        day = int(m.group("day") or "1")
+        hour = int(m.group("hour") or "0")
+        minute = int(m.group("minute") or "0")
+        second = decimal.Decimal(m.group("second") or "0")
+        seconds = second.to_integral(decimal.ROUND_FLOOR)
+        milliseconds = (second - seconds) * 1000000
+        tzd = m.group("tzd") or "Z"
+        dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
+        if tzd != "Z":
+            tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
+            tzd_hours *= -1
+            if tzd_hours < 0:
+                tzd_minutes *= -1
+            dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
+        return dt
+    _test_converter_date = staticmethod(_converter_date)
+
+    def _getter_bag(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            retval = []
+            for element in self.getElement("", namespace, name):
+                bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
+                if len(bags):
+                    for bag in bags:
+                        for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                            value = self._getText(item)
+                            value = converter(value)
+                            retval.append(value)
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = retval
+            return retval
+        return get
+
+    def _getter_seq(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            retval = []
+            for element in self.getElement("", namespace, name):
+                seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
+                if len(seqs):
+                    for seq in seqs:
+                        for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                            value = self._getText(item)
+                            value = converter(value)
+                            retval.append(value)
+                else:
+                    value = converter(self._getText(element))
+                    retval.append(value)
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = retval
+            return retval
+        return get
+
+    def _getter_langalt(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            retval = {}
+            for element in self.getElement("", namespace, name):
+                alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
+                if len(alts):
+                    for alt in alts:
+                        for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                            value = self._getText(item)
+                            value = converter(value)
+                            retval[item.getAttribute("xml:lang")] = value
+                else:
+                    retval["x-default"] = converter(self._getText(element))
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = retval
+            return retval
+        return get
+
+    def _getter_single(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            value = None
+            for element in self.getElement("", namespace, name):
+                if element.nodeType == element.ATTRIBUTE_NODE:
+                    value = element.nodeValue
+                else:
+                    value = self._getText(element)
+                break
+            if value != None:
+                value = converter(value)
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = value
+            return value
+        return get
+
+    dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
+    """
+    Contributors to the resource (other than the authors). An unsorted
+    array of names.
+    """
+
+    dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
+    """
+    Text describing the extent or scope of the resource.
+    """
+
+    dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
+    """
+    A sorted array of names of the authors of the resource, listed in order
+    of precedence.
+    """
+
+    dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
+    """
+    A sorted array of dates (datetime.datetime instances) of significance to
+    the resource.  The dates and times are in UTC.
+    """
+
+    dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
+    """
+    A language-keyed dictionary of textual descriptions of the content of the
+    resource.
+    """
+
+    dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
+    """
+    The mime-type of the resource.
+    """
+
+    dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
+    """
+    Unique identifier of the resource.
+    """
+
+    dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
+    """
+    An unordered array specifying the languages used in the resource.
+    """
+
+    dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
+    """
+    An unordered array of publisher names.
+    """
+
+    dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
+    """
+    An unordered array of text descriptions of relationships to other
+    documents.
+    """
+
+    dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
+    """
+    A language-keyed dictionary of textual descriptions of the rights the
+    user has to this resource.
+    """
+
+    dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
+    """
+    Unique identifier of the work from which this resource was derived.
+    """
+
+    dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
+    """
+    An unordered array of descriptive phrases or keywrods that specify the
+    topic of the content of the resource.
+    """
+
+    dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
+    """
+    A language-keyed dictionary of the title of the resource.
+    """
+
+    dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
+    """
+    An unordered array of textual descriptions of the document type.
+    """
+
+    pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
+    """
+    An unformatted text string representing document keywords.
+    """
+
+    pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
+    """
+    The PDF file version, for example 1.0, 1.3.
+    """
+
+    pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
+    """
+    The name of the tool that created the PDF document.
+    """
+
+    xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
+    """
+    The date and time the resource was originally created.  The date and
+    time are returned as a UTC datetime.datetime object.
+    """
+
+    xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
+    """
+    The date and time the resource was last modified.  The date and time
+    are returned as a UTC datetime.datetime object.
+    """
+
+    xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
+    """
+    The date and time that any metadata for this resource was last
+    changed.  The date and time are returned as a UTC datetime.datetime
+    object.
+    """
+
+    xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
+    """
+    The name of the first known tool used to create the resource.
+    """
+
+    xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
+    """
+    The common identifier for all versions and renditions of this resource.
+    """
+
+    xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
+    """
+    An identifier for a specific incarnation of a document, updated each
+    time a file is saved.
+    """
+
+    def custom_properties(self):
+        if not hasattr(self, "_custom_properties"):
+            self._custom_properties = {}
+            for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
+                key = node.localName
+                while True:
+                    # see documentation about PDFX_NAMESPACE earlier in file
+                    idx = key.find(u_("\u2182"))
+                    if idx == -1:
+                        break
+                    key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
+                if node.nodeType == node.ATTRIBUTE_NODE:
+                    value = node.nodeValue
+                else:
+                    value = self._getText(node)
+                self._custom_properties[key] = value
+        return self._custom_properties
+
+    custom_properties = property(custom_properties)
+    """
+    Retrieves custom metadata properties defined in the undocumented pdfx
+    metadata schema.
+
+    :return: a dictionary of key/value items for custom metadata properties.
+    :rtype: dict
+    """

+ 44 - 0
need_package/PyPDF2-master/README.md

@@ -0,0 +1,44 @@
+[![PyPI version](https://badge.fury.io/py/PyPDF2.svg)](https://badge.fury.io/py/PyPDF2)
+[![Python Support](https://img.shields.io/pypi/pyversions/PyPDF2.svg)](https://pypi.org/project/PyPDF2/)
+[![](https://img.shields.io/badge/-documentation-green)](https://pythonhosted.org/PyPDF2/)
+![GitHub last commit](https://img.shields.io/github/last-commit/mstamy2/PyPDF2)
+
+# PyPDF2
+
+PyPDF2 is a pure-python PDF library capable of
+splitting, merging together, cropping, and transforming
+the pages of PDF files. It can also add custom
+data, viewing options, and passwords to PDF files.
+It can retrieve text and metadata from PDFs as well
+as merge entire files together.
+
+[Homepage](http://mstamy2.github.io/PyPDF2/)
+
+
+
+## Installation
+
+To install via pip:
+
+```
+pip install PyPDF2
+```
+
+## Examples
+
+Please see the `Sample_Code` folder.
+
+## FAQ
+
+A lot of questions are asked [on StackOverflow](https://stackoverflow.com/questions/tagged/pypdf2).
+
+Please see [FAQ](http://mstamy2.github.io/PyPDF2/FAQ.html)
+
+
+## Tests
+PyPDF2 includes a test suite built on the unittest framework. All tests are located in the "Tests" folder.
+Tests can be run from the command line by:
+
+```bash
+python -m unittest Tests.tests
+```

二进制
need_package/PyPDF2-master/Resources/attachment.pdf


二进制
need_package/PyPDF2-master/Resources/commented-xmp.pdf


二进制
need_package/PyPDF2-master/Resources/commented.pdf


二进制
need_package/PyPDF2-master/Resources/crazyones.pdf


+ 1 - 0
need_package/PyPDF2-master/Resources/crazyones.txt

@@ -0,0 +1 @@
+ The Cr azy Ones Octob er 14, 1998 Heres to the crazy ones. The mis˝ts. The reb els. The troublemak ers. The round p egs in the square holes. The ones who see things di˙eren tly . Theyre not fond of rules. And they ha v e no resp ect for the status quo. Y ou can quote them, disagree with them, glorify or vilify them. Ab out the only thing y ou cant do is ignore them. Because they c hange things. They in v en t. They imagine. They heal. They explore. They create. They inspire. They push the h uman race forw ard. Ma yb e they ha v e to b e crazy . Ho w else can y ou stare at an empt y can v as and see a w ork of art? Or sit in silence and hear a song thats nev er b een written? Or gaze at a red planet and see a lab oratory on wheels? W e mak e to ols for these kinds of p eople. While some see them as the crazy ones, w e see genius. Because the p eople who are crazy enough to think they can c hange the w orld, are the ones who do.

二进制
need_package/PyPDF2-master/Resources/git.pdf


二进制
need_package/PyPDF2-master/Resources/jpeg.pdf


文件差异内容过多而无法显示
+ 0 - 0
need_package/PyPDF2-master/Resources/jpeg.txt


二进制
need_package/PyPDF2-master/Resources/libreoffice-writer-password.pdf


二进制
need_package/PyPDF2-master/Resources/pdflatex-outline.pdf


+ 14 - 0
need_package/PyPDF2-master/Sample_Code/README.txt

@@ -0,0 +1,14 @@
+PyPDF2 Sample Code Folder
+-------------------------
+
+This will contain demonstrations of the many features
+PyPDF2 is capable of. Example code should make it easy
+for users to know how to use all aspects of PyPDF2.
+
+
+
+Feel free to add any type of PDF file or sample code, 
+either by
+
+	1) sending it via email to PyPDF2@phaseit.net
+	2) including it in a pull request on GitHub

+ 0 - 0
need_package/PyPDF2-master/Sample_Code/__init__.py


+ 51 - 0
need_package/PyPDF2-master/Sample_Code/basic_features.py

@@ -0,0 +1,51 @@
+from PyPDF2 import PdfFileWriter, PdfFileReader
+
+output = PdfFileWriter()
+input1 = PdfFileReader(open("document1.pdf", "rb"))
+
+# print how many pages input1 has:
+print("document1.pdf has %d pages." % input1.getNumPages())
+
+# print how many pages input1 has in python3:
+# print("This chart has {} pages.".format(input1.getNumPages()))
+
+# add page 1 from input1 to output document, unchanged:
+output.addPage(input1.getPage(0))
+
+# add page 2 from input1, but rotated clockwise 90 degrees:
+output.addPage(input1.getPage(1).rotateClockwise(90))
+
+# add page 3 from input1, rotated the other way:
+output.addPage(input1.getPage(2).rotateCounterClockwise(90))
+# alt: output.addPage(input1.getPage(2).rotateClockwise(270))
+
+# add page 4 from input1, but first add a watermark from another PDF:
+page4 = input1.getPage(3)
+watermark = PdfFileReader(open("watermark.pdf", "rb"))
+page4.mergePage(watermark.getPage(0))
+output.addPage(page4)
+
+
+# add page 5 from input1, but crop it to half size:
+page5 = input1.getPage(4)
+page5.mediaBox.upperRight = (
+    page5.mediaBox.getUpperRight_x() / 2,
+    page5.mediaBox.getUpperRight_y() / 2
+)
+output.addPage(page5)
+
+# add some Javascript to launch the print window on opening this PDF.
+# the password dialog may prevent the print dialog from being shown,
+# comment the the encription lines, if that's the case, to try this out:
+output.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
+
+# encrypt your new PDF and add a password:
+password = "secret"
+output.encrypt(password)
+
+# add a title to your new PDF's metadata:
+output.addMetadata({'/Title': 'PDF Metadata Title'})
+
+# finally, write "output" to document-output.pdf
+with open("PyPDF2-output.pdf", "wb") as outputStream:
+    output.write(outputStream)

+ 20 - 0
need_package/PyPDF2-master/Sample_Code/basic_merging.py

@@ -0,0 +1,20 @@
+from PyPDF2 import PdfFileMerger
+
+merger = PdfFileMerger()
+
+input1 = open("document1.pdf", "rb")
+input2 = open("document2.pdf", "rb")
+input3 = open("document3.pdf", "rb")
+
+# add the first 3 pages of input1 document to output
+merger.append(fileobj = input1, pages = (0,3))
+
+# insert the first page of input2 into the output beginning after the second page
+merger.merge(position = 2, fileobj = input2, pages = (0,1))
+
+# append entire input3 document to the end of the output document
+merger.append(input3)
+
+# Write to an output PDF document
+output = open("document-output.pdf", "wb")
+merger.write(output)

+ 38 - 0
need_package/PyPDF2-master/Sample_Code/makesimple.py

@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+"Make some simple multipage pdf files."
+
+from __future__ import print_function
+from sys import argv
+
+from reportlab.pdfgen import canvas
+
+point = 1
+inch = 72
+
+TEXT = """%s    page %d of %d
+
+a wonderful file
+created with Sample_Code/makesimple.py"""
+
+
+def make_pdf_file(output_filename, np):
+    title = output_filename
+    c = canvas.Canvas(output_filename, pagesize=(8.5 * inch, 11 * inch))
+    c.setStrokeColorRGB(0,0,0)
+    c.setFillColorRGB(0,0,0)
+    c.setFont("Helvetica", 12 * point)
+    for pn in range(1, np + 1):
+        v = 10 * inch
+        for subtline in (TEXT % (output_filename, pn, np)).split( '\n' ):
+            c.drawString( 1 * inch, v, subtline )
+            v -= 12 * point
+        c.showPage()
+    c.save()
+
+if __name__ == "__main__":
+    nps = [None, 5, 11, 17]
+    for i, np in enumerate(nps):
+        if np:
+            filename = "simple%d.pdf" % i
+            make_pdf_file(filename, np)
+            print ("Wrote", filename)

+ 19 - 0
need_package/PyPDF2-master/Sample_Code/makesimple.sh

@@ -0,0 +1,19 @@
+#!/bin/sh
+
+n=1
+for np in 5 11 17; do
+   p=1
+   f=simple$n.pdf
+   while expr $p \<= $np > /dev/null; do
+     if [ $p != 1 ]; then
+       echo "\c"
+      fi
+     echo "$f           page $p of $np"
+     echo ""
+     echo "an incredible, yet simple example"
+     echo "Created with Sample_Code/makesimple.sh"
+     p=$(expr $p + 1)
+    done | enscript --no-header -o - |ps2pdf - $f
+   echo $f
+   n=$(expr $n + 1)
+ done

+ 54 - 0
need_package/PyPDF2-master/Scripts/2-up.py

@@ -0,0 +1,54 @@
+from PyPDF2 import PdfFileWriter, PdfFileReader
+import sys
+import math
+
+
+def main():
+    if (len(sys.argv) != 3):
+        print("usage: python 2-up.py input_file output_file")
+        sys.exit(1)
+    print ("2-up input " + sys.argv[1])
+    input1 = PdfFileReader(open(sys.argv[1], "rb"))
+    output = PdfFileWriter()
+    for iter in range (0, input1.getNumPages()-1, 2):
+        lhs = input1.getPage(iter)
+        rhs = input1.getPage(iter+1)
+        lhs.mergeTranslatedPage(rhs, lhs.mediaBox.getUpperRight_x(),0, True)
+        output.addPage(lhs)
+        print (str(iter) + " "),
+        sys.stdout.flush()
+
+    print("writing " + sys.argv[2])
+    outputStream = file(sys.argv[2], "wb")
+    output.write(outputStream)
+    print("done.")
+
+if __name__ == "__main__":
+    main()
+from PyPDF2 import PdfFileWriter, PdfFileReader
+import sys
+import math
+
+
+def main():
+    if (len(sys.argv) != 3):
+        print("usage: python 2-up.py input_file output_file")
+        sys.exit(1)
+    print ("2-up input " + sys.argv[1])
+    input1 = PdfFileReader(open(sys.argv[1], "rb"))
+    output = PdfFileWriter()
+    for iter in range (0, input1.getNumPages()-1, 2):
+        lhs = input1.getPage(iter)
+        rhs = input1.getPage(iter+1)
+        lhs.mergeTranslatedPage(rhs, lhs.mediaBox.getUpperRight_x(),0, True)
+        output.addPage(lhs)
+        print (str(iter) + " "),
+        sys.stdout.flush()
+
+    print("writing " + sys.argv[2])
+    outputStream = open(sys.argv[2], "wb")
+    output.write(outputStream)
+    print("done.")
+
+if __name__ == "__main__":
+    main()

+ 0 - 0
need_package/PyPDF2-master/Scripts/__init__.py


+ 57 - 0
need_package/PyPDF2-master/Scripts/pdf-image-extractor.py

@@ -0,0 +1,57 @@
+'''
+Extract images from PDF without resampling or altering.
+
+Adapted from work by Sylvain Pelissier
+http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python
+'''
+
+import sys
+import PyPDF2
+from PIL import Image
+
+if (len(sys.argv) != 2):
+    print("\nUsage: python {} input_file\n".format(sys.argv[0]))
+    sys.exit(1)
+
+pdf = sys.argv[1]
+
+if __name__ == '__main__':
+    input1 = PyPDF2.PdfFileReader(open(pdf, "rb"))
+    page0 = input1.getPage(30)
+
+    if '/XObject' in page0['/Resources']:
+        xObject = page0['/Resources']['/XObject'].getObject()
+
+        for obj in xObject:
+            if xObject[obj]['/Subtype'] == '/Image':
+                size = (xObject[obj]['/Width'], xObject[obj]['/Height'])
+                data = xObject[obj].getData()
+                if xObject[obj]['/ColorSpace'] == '/DeviceRGB':
+                    mode = "RGB"
+                else:
+                    mode = "P"
+                
+                if '/Filter' in xObject[obj]:
+                    if xObject[obj]['/Filter'] == '/FlateDecode':
+                        img = Image.frombytes(mode, size, data)
+                        if "/SMask" in xObject[obj]: # add alpha channel
+                            alpha = Image.frombytes("L", size, xObject[obj]["/SMask"].getData())
+                            img.putalpha(alpha)
+                        img.save(obj[1:] + ".png")
+                    elif xObject[obj]['/Filter'] == '/DCTDecode':
+                        img = open(obj[1:] + ".jpg", "wb")
+                        img.write(data)
+                        img.close()
+                    elif xObject[obj]['/Filter'] == '/JPXDecode':
+                        img = open(obj[1:] + ".jp2", "wb")
+                        img.write(data)
+                        img.close()
+                    elif xObject[obj]['/Filter'] == '/CCITTFaxDecode':
+                        img = open(obj[1:] + ".tiff", "wb")
+                        img.write(data)
+                        img.close()
+                else:
+                    img = Image.frombytes(mode, size, data)
+                    img.save(obj[1:] + ".png")
+    else:
+        print("No image found.")

+ 80 - 0
need_package/PyPDF2-master/Scripts/pdfcat

@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+"""
+Concatenate pages from pdf files into a single pdf file.
+
+Page ranges refer to the previously-named file.
+A file not followed by a page range means all the pages of the file.
+
+PAGE RANGES are like Python slices.
+        {page_range_help}
+EXAMPLES
+    pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1
+        Concatenate all of head.pdf, all but page seven of content.pdf,
+        and the last page of tail.pdf, producing output.pdf.
+
+    pdfcat chapter*.pdf >book.pdf
+        You can specify the output file by redirection.
+
+    pdfcat chapter?.pdf chapter10.pdf >book.pdf
+        In case you don't want chapter 10 before chapter 2.
+"""
+# Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
+# All rights reserved. This software is available under a BSD license;
+# see https://github.com/mstamy2/PyPDF2/LICENSE
+
+from __future__ import print_function
+import argparse
+from PyPDF2.pagerange import PAGE_RANGE_HELP
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=__doc__.format(page_range_help=PAGE_RANGE_HELP),
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument("-o", "--output",
+                        metavar="output_file")
+    parser.add_argument("-v", "--verbose", action="store_true",
+                        help="show page ranges as they are being read")
+    parser.add_argument("first_filename", nargs=1,
+                        metavar="filename [page range...]")
+    # argparse chokes on page ranges like "-2:" unless caught like this:
+    parser.add_argument("fn_pgrgs", nargs=argparse.REMAINDER,
+                        metavar="filenames and/or page ranges")
+    args = parser.parse_args()
+    args.fn_pgrgs.insert(0, args.first_filename[0])
+    return args
+
+
+from sys import stderr, stdout, exit
+import os
+import traceback
+from collections import defaultdict
+
+from PyPDF2 import PdfFileMerger, parse_filename_page_ranges
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    filename_page_ranges = parse_filename_page_ranges(args.fn_pgrgs)
+    if args.output:
+        output = open(args.output, "wb")
+    else:
+        stdout.flush()
+        output = os.fdopen(stdout.fileno(), "wb")
+
+    merger = PdfFileMerger()
+    in_fs = dict()
+    try:
+        for (filename, page_range) in filename_page_ranges:
+            if args.verbose:
+                print(filename, page_range, file=stderr)
+            if filename not in in_fs:
+                in_fs[filename] = open(filename, "rb")
+            merger.append(in_fs[filename], pages=page_range)
+    except:
+        print(traceback.format_exc(), file=stderr)
+        print("Error while reading " + filename, file=stderr)
+        exit(1)
+    merger.write(output)
+    # In 3.0, input files must stay open until output is written.
+    # Not closing the in_fs because this script exits now.

+ 0 - 0
need_package/PyPDF2-master/Tests/__init__.py


+ 53 - 0
need_package/PyPDF2-master/Tests/test_basic_features.py

@@ -0,0 +1,53 @@
+from PyPDF2 import PdfFileWriter, PdfFileReader
+import os
+
+TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
+PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
+RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources")
+
+
+def test_basic_features():
+    output = PdfFileWriter()
+    document1 = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
+    input1 = PdfFileReader(open(document1, "rb"))
+
+    # print how many pages input1 has:
+    print("document1.pdf has %d pages." % input1.getNumPages())
+
+    # add page 1 from input1 to output document, unchanged
+    output.addPage(input1.getPage(0))
+
+    # add page 2 from input1, but rotated clockwise 90 degrees
+    output.addPage(input1.getPage(0).rotateClockwise(90))
+
+    # add page 3 from input1, rotated the other way:
+    output.addPage(input1.getPage(0).rotateCounterClockwise(90))
+    # alt: output.addPage(input1.getPage(0).rotateClockwise(270))
+
+    # add page 4 from input1, but first add a watermark from another PDF:
+    page4 = input1.getPage(0)
+    watermark_pdf = document1
+    watermark = PdfFileReader(open(watermark_pdf, "rb"))
+    page4.mergePage(watermark.getPage(0))
+    output.addPage(page4)
+
+    # add page 5 from input1, but crop it to half size:
+    page5 = input1.getPage(0)
+    page5.mediaBox.upperRight = (
+        page5.mediaBox.getUpperRight_x() / 2,
+        page5.mediaBox.getUpperRight_y() / 2,
+    )
+    output.addPage(page5)
+
+    # add some Javascript to launch the print window on opening this PDF.
+    # the password dialog may prevent the print dialog from being shown,
+    # comment the the encription lines, if that's the case, to try this out
+    output.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
+
+    # encrypt your new PDF and add a password
+    password = "secret"
+    output.encrypt(password)
+
+    # finally, write "output" to document-output.pdf
+    with open("PyPDF2-output.pdf", "wb") as outputStream:
+        output.write(outputStream)

+ 42 - 0
need_package/PyPDF2-master/Tests/test_merger.py

@@ -0,0 +1,42 @@
+import os
+import binascii
+import sys
+
+import PyPDF2
+
+TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
+PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
+RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources")
+
+sys.path.append(PROJECT_ROOT)
+
+
+def test_merge():
+    pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
+    outline = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")
+
+    file_merger = PyPDF2.PdfFileMerger()
+
+    # string path:
+    file_merger.append(pdf_path)
+    file_merger.append(outline)
+    file_merger.append(pdf_path, pages=PyPDF2.pagerange.PageRange(slice(0, 0)))
+
+
+    # PdfFileReader object:
+    file_merger.append(PyPDF2.PdfFileReader(pdf_path, "rb"))
+
+    # Is merging encrypted files broken?
+    # encrypted = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf")
+    # reader = PyPDF2.PdfFileReader(pdf_path, "rb")
+    # reader.decrypt("openpassword")
+    # file_merger.append(reader)
+
+    # File handle
+    fh = open(pdf_path, "rb")
+    file_merger.append(fh)
+
+    file_merger.addBookmark("A bookmark", 0)
+
+    file_merger.write("dont_commit_merged.pdf")
+    file_merger.close()

+ 129 - 0
need_package/PyPDF2-master/Tests/test_reader.py

@@ -0,0 +1,129 @@
+import os
+import pytest
+import PyPDF2
+
+TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
+PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
+RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources")
+
+
+@pytest.mark.parametrize(
+    "src",
+    [
+        (os.path.join(RESOURCE_ROOT, "crazyones.pdf")),
+        (os.path.join(RESOURCE_ROOT, "commented.pdf")),
+    ],
+)
+def test_get_annotations(src):
+    reader = PyPDF2.PdfFileReader(open(src, "rb"))
+
+    for i in range(reader.getNumPages()):
+        page = reader.getPage(i)
+        print("/Annots" in page)
+        if "/Annots" in page:
+            for annot in page["/Annots"]:
+                subtype = annot.getObject()["/Subtype"]
+                if subtype == "/Text":
+                    print(annot.getObject()["/Contents"])
+                    print("")
+
+
+@pytest.mark.parametrize(
+    "src",
+    [
+        (os.path.join(RESOURCE_ROOT, "attachment.pdf")),
+        (os.path.join(RESOURCE_ROOT, "crazyones.pdf")),
+    ],
+)
+def test_get_attachments(src):
+    reader = PyPDF2.PdfFileReader(open(src, "rb"))
+
+    attachments = {}
+    for i in range(reader.getNumPages()):
+        page = reader.getPage(i)
+        if "/Annots" in page:
+            for annotation in page["/Annots"]:
+                annotobj = annotation.getObject()
+                if annotobj["/Subtype"] == "/FileAttachment":
+                    fileobj = annotobj["/FS"]
+                    attachments[fileobj["/F"]] = fileobj["/EF"]["/F"].getData()
+    return attachments
+
+
+@pytest.mark.parametrize(
+    "src,outline_elements",
+    [
+        (os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf"), 9),
+        (os.path.join(RESOURCE_ROOT, "crazyones.pdf"), 0),
+    ],
+)
+def test_get_outlines(src, outline_elements):
+    reader = PyPDF2.PdfFileReader(open(src, "rb"))
+    outlines = reader.getOutlines()
+    assert len(outlines) == outline_elements
+
+
+@pytest.mark.parametrize(
+    "src,nb_images",
+    [
+        (os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf"), 0),
+        (os.path.join(RESOURCE_ROOT, "crazyones.pdf"), 0),
+        (os.path.join(RESOURCE_ROOT, "git.pdf"), 1),
+    ],
+)
+def test_get_images(src, nb_images):
+    from PIL import Image
+
+    input1 = PyPDF2.PdfFileReader(open(src, "rb"))
+    page0 = input1.getPage(0)
+
+    images_extracted = []
+
+    if "/XObject" in page0["/Resources"]:
+        xObject = page0["/Resources"]["/XObject"].getObject()
+
+        for obj in xObject:
+            if xObject[obj]["/Subtype"] == "/Image":
+                size = (xObject[obj]["/Width"], xObject[obj]["/Height"])
+                data = xObject[obj].getData()
+                if xObject[obj]["/ColorSpace"] == "/DeviceRGB":
+                    mode = "RGB"
+                else:
+                    mode = "P"
+
+                filename = None
+                if "/Filter" in xObject[obj]:
+                    if xObject[obj]["/Filter"] == "/FlateDecode":
+                        img = Image.frombytes(mode, size, data)
+                        if "/SMask" in xObject[obj]:  # add alpha channel
+                            alpha = Image.frombytes(
+                                "L", size, xObject[obj]["/SMask"].getData()
+                            )
+                            img.putalpha(alpha)
+                        filename = obj[1:] + ".png"
+                        img.save(filename)
+                    elif xObject[obj]["/Filter"] == "/DCTDecode":
+                        filename = obj[1:] + ".jpg"
+                        img = open(filename, "wb")
+                        img.write(data)
+                        img.close()
+                    elif xObject[obj]["/Filter"] == "/JPXDecode":
+                        filename = obj[1:] + ".jp2"
+                        img = open(filename, "wb")
+                        img.write(data)
+                        img.close()
+                    elif xObject[obj]["/Filter"] == "/CCITTFaxDecode":
+                        filename = obj[1:] + ".tiff"
+                        img = open(filename, "wb")
+                        img.write(data)
+                        img.close()
+                else:
+                    img = Image.frombytes(mode, size, data)
+                    filename = obj[1:] + ".png"
+                    img.save(filename)
+                if filename is not None:
+                    images_extracted.append(filename)
+    else:
+        print("No image found.")
+
+    assert len(images_extracted) == nb_images

+ 9 - 0
need_package/PyPDF2-master/Tests/test_utils.py

@@ -0,0 +1,9 @@
+import pytest
+import PyPDF2.utils
+
+
+@pytest.mark.parametrize(
+    "value,expected", [(0, True), (-1, True), (1, True), ("1", False), (1.5, False)]
+)
+def test_isInt(value, expected):
+    assert PyPDF2.utils.isInt(value) == expected

+ 108 - 0
need_package/PyPDF2-master/Tests/test_workflows.py

@@ -0,0 +1,108 @@
+import os
+import binascii
+import sys
+import pytest
+
+from PyPDF2 import PdfFileReader, PdfFileWriter
+
+TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
+PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
+RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources")
+
+sys.path.append(PROJECT_ROOT)
+
+
+def test_PdfReaderFileLoad():
+    """
+    Test loading and parsing of a file. Extract text of the file and compare to expected
+    textual output. Expected outcome: file loads, text matches expected.
+    """
+
+    with open(os.path.join(RESOURCE_ROOT, "crazyones.pdf"), "rb") as inputfile:
+        # Load PDF file from file
+        ipdf = PdfFileReader(inputfile)
+        ipdf_p1 = ipdf.getPage(0)
+
+        # Retrieve the text of the PDF
+        with open(os.path.join(RESOURCE_ROOT, "crazyones.txt"), "rb") as pdftext_file:
+            pdftext = pdftext_file.read()
+
+        ipdf_p1_text = ipdf_p1.extractText().replace("\n", "").encode("utf-8")
+
+        # Compare the text of the PDF to a known source
+        assert ipdf_p1_text == pdftext, (
+            "PDF extracted text differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n"
+            % (pdftext, ipdf_p1_text)
+        )
+
+
+def test_PdfReaderJpegImage():
+    """
+    Test loading and parsing of a file. Extract the image of the file and compare to expected
+    textual output. Expected outcome: file loads, image matches expected.
+    """
+
+    with open(os.path.join(RESOURCE_ROOT, "jpeg.pdf"), "rb") as inputfile:
+        # Load PDF file from file
+        ipdf = PdfFileReader(inputfile)
+
+        # Retrieve the text of the image
+        with open(os.path.join(RESOURCE_ROOT, "jpeg.txt"), "r") as pdftext_file:
+            imagetext = pdftext_file.read()
+
+        ipdf_p0 = ipdf.getPage(0)
+        xObject = ipdf_p0["/Resources"]["/XObject"].getObject()
+        data = xObject["/Im4"].getData()
+
+        # Compare the text of the PDF to a known source
+        assert binascii.hexlify(data).decode() == imagetext, (
+            "PDF extracted image differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n"
+            % (imagetext, binascii.hexlify(data).decode())
+        )
+
+
+def test_read_metadata():
+    with open(os.path.join(RESOURCE_ROOT, "crazyones.pdf"), "rb") as inputfile:
+        ipdf = PdfFileReader(inputfile)
+        metadict = ipdf.getDocumentInfo()
+        assert metadict.title is None
+        assert dict(metadict) == {
+            "/CreationDate": "D:20150604133406-06'00'",
+            "/Creator": " XeTeX output 2015.06.04:1334",
+            "/Producer": "xdvipdfmx (20140317)",
+        }
+
+
+def test_decrypt():
+    with open(
+        os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf"), "rb"
+    ) as inputfile:
+        ipdf = PdfFileReader(inputfile)
+        assert ipdf.isEncrypted == True
+        ipdf.decrypt("openpassword")
+        assert ipdf.getNumPages() == 1
+        assert ipdf.isEncrypted == True
+        metadict = ipdf.getDocumentInfo()
+        assert dict(metadict) == {
+            "/CreationDate": "D:20220403203552+02'00'",
+            "/Creator": "Writer",
+            "/Producer": "LibreOffice 6.4",
+        }
+        # Is extractText() broken for encrypted files?
+        # assert ipdf.getPage(0).extractText().replace('\n', '') == "\n˘\n\u02c7\u02c6˙\n\n\n˘\u02c7\u02c6˙\n\n"
+
+
+@pytest.mark.parametrize("degree", [0, 90, 180, 270, 360, -90])
+def test_rotate(degree):
+    with open(os.path.join(RESOURCE_ROOT, "crazyones.pdf"), "rb") as inputfile:
+        ipdf = PdfFileReader(inputfile)
+        page = ipdf.getPage(0)
+        page.rotateCounterClockwise(degree)
+
+
+def test_rotate_45():
+    with open(os.path.join(RESOURCE_ROOT, "crazyones.pdf"), "rb") as inputfile:
+        ipdf = PdfFileReader(inputfile)
+        page = ipdf.getPage(0)
+        with pytest.raises(AssertionError):
+            page.rotateCounterClockwise(45)

+ 22 - 0
need_package/PyPDF2-master/Tests/test_xmp.py

@@ -0,0 +1,22 @@
+import os
+import pytest
+import PyPDF2
+
+TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
+PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
+RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources")
+
+@pytest.mark.parametrize(
+    "src,has_xmp",
+    [
+        (os.path.join(RESOURCE_ROOT, "commented-xmp.pdf"), True),
+        (os.path.join(RESOURCE_ROOT, "crazyones.pdf"), False),
+    ],
+)
+def test_read_xmp(src, has_xmp):
+    with open(src, "rb") as inputfile:
+        ipdf = PyPDF2.PdfFileReader(inputfile)
+        xmp = ipdf.getXmpMetadata()
+        assert (xmp is None) == (not has_xmp)
+        if has_xmp:
+            print(xmp.xmp_createDate )

+ 93 - 0
need_package/PyPDF2-master/Tests/tests.py

@@ -0,0 +1,93 @@
+import os
+import sys
+import unittest
+import binascii
+
+from PyPDF2 import PdfFileReader, PdfFileWriter
+
+
+# Configure path environment
+TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
+PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
+RESOURCE_ROOT = os.path.join(PROJECT_ROOT, 'Resources')
+
+sys.path.append(PROJECT_ROOT)
+
+
+class PdfReaderTestCases(unittest.TestCase):
+
+    def test_PdfReaderFileLoad(self):
+        '''
+        Test loading and parsing of a file. Extract text of the file and compare to expected
+        textual output. Expected outcome: file loads, text matches expected.
+        '''
+
+        with open(os.path.join(RESOURCE_ROOT, 'crazyones.pdf'), 'rb') as inputfile:
+            # Load PDF file from file
+            ipdf = PdfFileReader(inputfile)
+            ipdf_p1 = ipdf.getPage(0)
+
+            # Retrieve the text of the PDF
+            with open(os.path.join(RESOURCE_ROOT, 'crazyones.txt'), 'rb') as pdftext_file:
+                pdftext = pdftext_file.read()
+
+            ipdf_p1_text = ipdf_p1.extractText().replace('\n', '').encode('utf-8')
+
+            # Compare the text of the PDF to a known source
+            self.assertEqual(ipdf_p1_text, pdftext,
+                msg='PDF extracted text differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n'
+                    % (pdftext, ipdf_p1_text))
+
+    def test_PdfReaderJpegImage(self):
+        '''
+        Test loading and parsing of a file. Extract the image of the file and compare to expected
+        textual output. Expected outcome: file loads, image matches expected.
+        '''
+
+        with open(os.path.join(RESOURCE_ROOT, 'jpeg.pdf'), 'rb') as inputfile:
+            # Load PDF file from file
+            ipdf = PdfFileReader(inputfile)
+        
+            # Retrieve the text of the image
+            with open(os.path.join(RESOURCE_ROOT, 'jpeg.txt'), 'r') as pdftext_file:
+                imagetext = pdftext_file.read()
+                
+            ipdf_p0 = ipdf.getPage(0)    
+            xObject = ipdf_p0['/Resources']['/XObject'].getObject()
+            data = xObject['/Im4'].getData()
+    
+            # Compare the text of the PDF to a known source
+            self.assertEqual(binascii.hexlify(data).decode(), imagetext, 
+                             msg='PDF extracted image differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n' 
+                             % (imagetext, binascii.hexlify(data).decode()))
+
+class AddJsTestCase(unittest.TestCase):
+
+    def setUp(self):
+        ipdf = PdfFileReader(os.path.join(RESOURCE_ROOT, 'crazyones.pdf'))
+        self.pdf_file_writer = PdfFileWriter()
+        self.pdf_file_writer.appendPagesFromReader(ipdf)
+
+    def test_add(self):
+
+        self.pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
+
+        self.assertIn('/Names', self.pdf_file_writer._root_object, "addJS should add a name catalog in the root object.")
+        self.assertIn('/JavaScript', self.pdf_file_writer._root_object['/Names'], "addJS should add a JavaScript name tree under the name catalog.")
+        self.assertIn('/OpenAction', self.pdf_file_writer._root_object, "addJS should add an OpenAction to the catalog.")
+
+    def test_overwrite(self):
+
+        self.pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
+        first_js = self.get_javascript_name()
+
+        self.pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
+        second_js = self.get_javascript_name()
+
+        self.assertNotEqual(first_js, second_js, "addJS should overwrite the previous script in the catalog.")
+
+    def get_javascript_name(self):
+        self.assertIn('/Names', self.pdf_file_writer._root_object)
+        self.assertIn('/JavaScript', self.pdf_file_writer._root_object['/Names'])
+        self.assertIn('/Names', self.pdf_file_writer._root_object['/Names']['/JavaScript'])
+        return self.pdf_file_writer._root_object['/Names']['/JavaScript']['/Names'][0]

+ 0 - 0
need_package/PyPDF2-master/__init__.py


+ 5 - 0
need_package/PyPDF2-master/requirements/ci.in

@@ -0,0 +1,5 @@
+pytest
+flake8
+flake8-bugbear
+pytest-cov
+pillow

+ 46 - 0
need_package/PyPDF2-master/requirements/ci.txt

@@ -0,0 +1,46 @@
+#
+# This file is autogenerated by pip-compile with python 3.10
+# To update, run:
+#
+#    pip-compile ci.in
+#
+attrs==21.4.0
+    # via
+    #   flake8-bugbear
+    #   pytest
+coverage[toml]==6.3.2
+    # via pytest-cov
+flake8==4.0.1
+    # via
+    #   -r ci.in
+    #   flake8-bugbear
+flake8-bugbear==22.3.23
+    # via -r ci.in
+iniconfig==1.1.1
+    # via pytest
+mccabe==0.6.1
+    # via flake8
+packaging==21.3
+    # via pytest
+pillow==9.1.0
+    # via -r ci.in
+pluggy==1.0.0
+    # via pytest
+py==1.11.0
+    # via pytest
+pycodestyle==2.8.0
+    # via flake8
+pyflakes==2.4.0
+    # via flake8
+pyparsing==3.0.7
+    # via packaging
+pytest==7.1.1
+    # via
+    #   -r ci.in
+    #   pytest-cov
+pytest-cov==3.0.0
+    # via -r ci.in
+tomli==2.0.1
+    # via
+    #   coverage
+    #   pytest

+ 57 - 0
need_package/PyPDF2-master/setup.py

@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+from distutils.core import setup
+import re
+
+long_description = """
+A Pure-Python library built as a PDF toolkit.  It is capable of:
+
+- extracting document information (title, author, ...)
+- splitting documents page by page
+- merging documents page by page
+- cropping pages
+- merging multiple pages into a single page
+- encrypting and decrypting PDF files
+- and more!
+
+By being Pure-Python, it should run on any Python platform without any
+dependencies on external libraries.  It can also work entirely on StringIO
+objects rather than file streams, allowing for PDF manipulation in memory.
+It is therefore a useful tool for websites that manage or manipulate PDFs.
+"""
+
+VERSIONFILE="PyPDF2/_version.py"
+verstrline = open(VERSIONFILE, "rt").read()
+VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]"
+mo = re.search(VSRE, verstrline, re.M)
+if mo:
+    verstr = mo.group(1)
+else:
+    raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE))
+
+setup(
+        name="PyPDF2",
+        version=verstr,
+        description="PDF toolkit",
+        long_description=long_description,
+        author="Mathieu Fenniak",
+        author_email="biziqe@mathieu.fenniak.net",
+        maintainer="Phaseit, Inc.",
+        maintainer_email="PyPDF2@phaseit.net",
+        url="https://mstamy2.github.io/PyPDF2",
+        project_urls={
+            "Source": "https://github.com/mstamy2/PyPDF2",
+            "Bug Reports": "https://github.com/mstamy2/PyPDF2/issues",
+            "Changelog": "https://raw.githubusercontent.com/mstamy2/PyPDF2/master/CHANGELOG",
+        },
+        classifiers = [
+            "Development Status :: 5 - Production/Stable",
+            "Intended Audience :: Developers",
+            "License :: OSI Approved :: BSD License",
+            "Programming Language :: Python :: 2",
+            "Programming Language :: Python :: 3",
+            "Operating System :: OS Independent",
+            "Topic :: Software Development :: Libraries :: Python Modules",
+            ],
+        packages=["PyPDF2"],
+    )

+ 6 - 0
need_package/PyPDF2-master/tox.ini

@@ -0,0 +1,6 @@
+[tox]
+envlist =
+	py{27,34,35,36,py,py3}
+
+[testenv]
+commands = python -m unittest Tests.tests

+ 7 - 0
need_package/readme.md

@@ -0,0 +1,7 @@
+# 需要安装包
+##依次安装 servicerd pypdf2 
+###
+cd servicerd
+
+python setup.py install
+

+ 11 - 0
need_package/servicerd/PKG-INFO

@@ -0,0 +1,11 @@
+Metadata-Version: 1.2
+Name: servicerd
+Version: 1.1.5
+Summary: Call to a distributed framework
+Home-page: http://ai-01:8080
+Author: lijunliang
+Author-email: lijunliang@topnet.net.cn
+License: GPL License
+Description: UNKNOWN
+Platform: all
+Requires-Python: >=3.6

+ 0 - 0
need_package/servicerd/README.txt


+ 0 - 0
need_package/servicerd/build/lib/servicerd/__init__.py


+ 96 - 0
need_package/servicerd/build/lib/servicerd/async_queue.py

@@ -0,0 +1,96 @@
+"""
+异步队列
+"""
+import time
+import threading
+import uuid
+import grpc
+import queue
+from servicerd.proto import queue_pb2 as qpb, queue_pb2_grpc as qrpc
+
+
+class AsyncQueue(object):
+    def __init__(self, queue_servr: str, channel: str):
+        self.queue_server = queue_servr
+        self.channel = channel
+        self.cancel = False
+        self.conn = None
+        self.callback_fn = None
+        self.id = str(uuid.uuid1())
+        self.client = None
+        self.__connect__()
+        self.pub_queue = queue.LifoQueue(maxsize=5000)  # 发布池
+        # 发布任务的线程
+        threading.Thread(target=self.__publish__).start()
+
+    def publish_job(self, item: any):
+        """
+        发布任务
+        :param item:
+        :return:
+        """
+        self.pub_queue.put(item)
+
+    def __pub_job_gen__(self):
+        """
+        任务发布进程
+        :return:
+        """
+        while True:
+            try:
+                if self.cancel:
+                    raise StopIteration
+                item = self.pub_queue.get(block=True)
+                yield qpb.PubReq(id=str(uuid.uuid1()), channelId=self.channel, sender=self.id, publish_type=item[0],
+                                 param=item[1])
+            except Exception as ex:
+                print(ex)
+
+    def __connect__(self):
+        """
+        连接
+        :return:
+        """
+        while True:
+            try:
+                if self.conn:
+                    self.conn.close()
+                conn = grpc.insecure_channel(self.queue_server)
+                self.conn = conn
+                self.client = qrpc.QueueServiceStub(channel=conn)
+                return
+            except Exception as ex:
+                print(ex)
+                print('连接对垒服务失败,1秒后重新连接')
+                time.sleep(1)
+
+    def __publish__(self):
+        """
+        任务发布
+        :return:
+        """
+        try:
+            self.client.Publish(self.__pub_job_gen__())
+        except Exception as ex:
+            print(ex)
+            print('发送队列任务失败')
+            time.sleep(1)
+
+    def recive(self, callback_fn: any):
+        """
+        接收消息服务
+        :param callback_fn:
+        :return:
+        """
+        self.callback_fn = callback_fn
+        threading.Thread(target=self.__recive__).start()
+
+    def __recive__(self):
+        """
+        :return:
+        """
+        try:
+            for item in self.client.Receive(qpb.RecvReq(sender=self.id, channelId=self.channel)):
+                self.callback_fn(item)
+        except Exception as ex:
+            print(ex)

+ 135 - 0
need_package/servicerd/build/lib/servicerd/client.py

@@ -0,0 +1,135 @@
+"""
+客户端调用封装
+"""
+import time
+import grpc
+from functools import wraps
+from servicerd.proto import service_pb2_grpc as spbrpc, service_pb2 as spb
+
+BALANCE_RANDOM = 0
+BALANCE_LOAD = 1
+BALANCE_SEQ = 2
+
+
+class RdClient(object):
+    def __init__(self, rd_server: str = "192.168.3.240:10021",
+                 service_name: str = "demo",
+                 balance_type: int = 0):
+        """
+        客户端封装
+        :param rd_server:
+        """
+        self.rd_server = rd_server
+        self.service_name = service_name
+        self.balance_type = balance_type
+        self.conn = self.rd_client = None
+        self.__connect__()
+
+    def __connect__(self):
+        """
+        连接服务治理中心
+        :return:
+        """
+        try:
+            conn = grpc.insecure_channel(self.rd_server)
+            self.conn = conn
+            self.rd_client = spbrpc.ServiceStub(channel=conn)
+        except Exception as ex:
+            print(ex)
+            print('连接服务治理中心失败,1秒后重新连接')
+            time.sleep(1)
+
+    def fn_wrap(self, fn):
+        @wraps(fn)
+        def decorated(*args, **kwargs):
+            # 去rd获取服务资源,会尝试多次
+            __ip__ = ''
+            __port__ = 0
+            while True:
+                try:
+                    resp = self.rd_client.Apply(spb.ApplyReqData(
+                        name=self.service_name,
+                        balance=self.balance_type,
+                    ))
+                    __ip__, __port__ = resp.ip, resp.port
+                    break
+                except Exception as ex:
+                    time.sleep(1)
+                    if "没有可用的服务" not in str(ex):
+                        self.__connect__()
+            if __ip__ == '':
+                raise Exception('没有相应的服务提供者')
+            kwargs['ip'] = __ip__
+            kwargs['port'] = __port__
+            ret = None
+            try:
+                ret = fn(*args, **kwargs)
+            except Exception as ex:
+                print(ex)
+            if self.balance_type == 3:
+                release_state = self.rd_client.Release(spb.ApplyRepData(
+                    ip=__ip__,
+                    port=__port__
+                ))
+                print("{}:{}---->释放成功{}".format(__ip__, __port__, release_state))
+            return ret
+
+        return decorated
+
+    def __pc_run__(self, *param, **kwargs) -> any:
+        """
+        单条执行函数
+        :param param:
+        :param callback_fn:
+        :return:
+        """
+        _param = param[0][0]
+        callback_fn = param[0][1]
+        __ip__ = ''
+        __port__ = 0
+        while True:
+            try:
+                resp = self.rd_client.Apply(spb.ApplyReqData(
+                    name=self.service_name,
+                    balance=self.balance_type,
+                ))
+                __ip__, __port__ = resp.ip, resp.port
+                break
+            except Exception as ex:
+                time.sleep(1)
+                if "没有可用的服务" not in str(ex):
+                    self.__connect__()
+        if __ip__ == '':
+            raise Exception('没有相应的服务提供者')
+        kwargs['ip'] = __ip__
+        kwargs['port'] = __port__
+        ret = None
+        try:
+            ret = callback_fn(_param, **kwargs)
+        except Exception as ex:
+            print(ex)
+        if self.balance_type == 3:
+            release_state = self.rd_client.Release(spb.ApplyRepData(
+                ip=__ip__,
+                port=__port__
+            ))
+            # print("{}:{}release---->{}".format(__ip__, __port__, release_state.data))
+        return ret
+
+    def parallel_computing(self, params: list, concurrent_number: int = 2, callback_fn: any = None,
+                           timeout: int = 0) -> any:
+        """
+        工具级,并行计算支持
+        :param params:
+        :param concurrent_number:
+        :param callback_fn:
+        :param timeout:
+        :return:
+        """
+        from concurrent.futures import ThreadPoolExecutor
+        if not timeout:
+            timeout = None
+        with ThreadPoolExecutor(max_workers=concurrent_number) as pool:
+            results = pool.map(self.__pc_run__, [(p, callback_fn) for p in params], timeout=timeout)
+            results = [ret for ret in results]
+            return results

+ 0 - 0
need_package/servicerd/build/lib/servicerd/proto/__init__.py


+ 110 - 0
need_package/servicerd/build/lib/servicerd/proto/ggclassefity_pb2.py

@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: ggclassefity.proto
+
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='ggclassefity.proto',
+  package='proto',
+  syntax='proto3',
+  serialized_options=None,
+  create_key=_descriptor._internal_create_key,
+  serialized_pb=b'\n\x12ggclassefity.proto\x12\x05proto\"\x16\n\x06\x43lfReq\x12\x0c\n\x04item\x18\x01 \x03(\x02\"\x17\n\x07\x43lfResp\x12\x0c\n\x04item\x18\x01 \x03(\x05\x62\x06proto3'
+)
+
+
+
+
+_CLFREQ = _descriptor.Descriptor(
+  name='ClfReq',
+  full_name='proto.ClfReq',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='item', full_name='proto.ClfReq.item', index=0,
+      number=1, type=2, cpp_type=6, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=29,
+  serialized_end=51,
+)
+
+
+_CLFRESP = _descriptor.Descriptor(
+  name='ClfResp',
+  full_name='proto.ClfResp',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='item', full_name='proto.ClfResp.item', index=0,
+      number=1, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=53,
+  serialized_end=76,
+)
+
+DESCRIPTOR.message_types_by_name['ClfReq'] = _CLFREQ
+DESCRIPTOR.message_types_by_name['ClfResp'] = _CLFRESP
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+ClfReq = _reflection.GeneratedProtocolMessageType('ClfReq', (_message.Message,), {
+  'DESCRIPTOR' : _CLFREQ,
+  '__module__' : 'ggclassefity_pb2'
+  # @@protoc_insertion_point(class_scope:proto.ClfReq)
+  })
+_sym_db.RegisterMessage(ClfReq)
+
+ClfResp = _reflection.GeneratedProtocolMessageType('ClfResp', (_message.Message,), {
+  'DESCRIPTOR' : _CLFRESP,
+  '__module__' : 'ggclassefity_pb2'
+  # @@protoc_insertion_point(class_scope:proto.ClfResp)
+  })
+_sym_db.RegisterMessage(ClfResp)
+
+
+# @@protoc_insertion_point(module_scope)

+ 4 - 0
need_package/servicerd/build/lib/servicerd/proto/ggclassefity_pb2_grpc.py

@@ -0,0 +1,4 @@
+# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
+"""Client and server classes corresponding to protobuf-defined services."""
+import grpc
+

+ 150 - 0
need_package/servicerd/build/lib/servicerd/proto/heartbeat_pb2.py

@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: heartbeat.proto
+
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='heartbeat.proto',
+  package='proto',
+  syntax='proto3',
+  serialized_options=None,
+  create_key=_descriptor._internal_create_key,
+  serialized_pb=b'\n\x0fheartbeat.proto\x12\x05proto\"L\n\rStreamReqData\x12\x13\n\x0bserviceName\x18\x01 \x01(\t\x12\x11\n\tserviceIp\x18\x02 \x01(\t\x12\x13\n\x0bservicePort\x18\x03 \x01(\x05\"\x1d\n\rStreamResData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x01(\t2H\n\tHeartBeat\x12;\n\tPutStream\x12\x14.proto.StreamReqData\x1a\x14.proto.StreamResData\"\x00(\x01\x62\x06proto3'
+)
+
+
+
+
+_STREAMREQDATA = _descriptor.Descriptor(
+  name='StreamReqData',
+  full_name='proto.StreamReqData',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='serviceName', full_name='proto.StreamReqData.serviceName', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='serviceIp', full_name='proto.StreamReqData.serviceIp', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='servicePort', full_name='proto.StreamReqData.servicePort', index=2,
+      number=3, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=26,
+  serialized_end=102,
+)
+
+
+_STREAMRESDATA = _descriptor.Descriptor(
+  name='StreamResData',
+  full_name='proto.StreamResData',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='data', full_name='proto.StreamResData.data', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=104,
+  serialized_end=133,
+)
+
+DESCRIPTOR.message_types_by_name['StreamReqData'] = _STREAMREQDATA
+DESCRIPTOR.message_types_by_name['StreamResData'] = _STREAMRESDATA
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+StreamReqData = _reflection.GeneratedProtocolMessageType('StreamReqData', (_message.Message,), {
+  'DESCRIPTOR' : _STREAMREQDATA,
+  '__module__' : 'heartbeat_pb2'
+  # @@protoc_insertion_point(class_scope:proto.StreamReqData)
+  })
+_sym_db.RegisterMessage(StreamReqData)
+
+StreamResData = _reflection.GeneratedProtocolMessageType('StreamResData', (_message.Message,), {
+  'DESCRIPTOR' : _STREAMRESDATA,
+  '__module__' : 'heartbeat_pb2'
+  # @@protoc_insertion_point(class_scope:proto.StreamResData)
+  })
+_sym_db.RegisterMessage(StreamResData)
+
+
+
+_HEARTBEAT = _descriptor.ServiceDescriptor(
+  name='HeartBeat',
+  full_name='proto.HeartBeat',
+  file=DESCRIPTOR,
+  index=0,
+  serialized_options=None,
+  create_key=_descriptor._internal_create_key,
+  serialized_start=135,
+  serialized_end=207,
+  methods=[
+  _descriptor.MethodDescriptor(
+    name='PutStream',
+    full_name='proto.HeartBeat.PutStream',
+    index=0,
+    containing_service=None,
+    input_type=_STREAMREQDATA,
+    output_type=_STREAMRESDATA,
+    serialized_options=None,
+    create_key=_descriptor._internal_create_key,
+  ),
+])
+_sym_db.RegisterServiceDescriptor(_HEARTBEAT)
+
+DESCRIPTOR.services_by_name['HeartBeat'] = _HEARTBEAT
+
+# @@protoc_insertion_point(module_scope)

+ 69 - 0
need_package/servicerd/build/lib/servicerd/proto/heartbeat_pb2_grpc.py

@@ -0,0 +1,69 @@
+# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
+"""Client and server classes corresponding to protobuf-defined services."""
+import grpc
+
+from servicerd.proto import heartbeat_pb2 as heartbeat__pb2
+
+
+class HeartBeatStub(object):
+    """心跳检测
+    """
+
+    def __init__(self, channel):
+        """Constructor.
+
+        Args:
+            channel: A grpc.Channel.
+        """
+        self.PutStream = channel.stream_unary(
+                '/proto.HeartBeat/PutStream',
+                request_serializer=heartbeat__pb2.StreamReqData.SerializeToString,
+                response_deserializer=heartbeat__pb2.StreamResData.FromString,
+                )
+
+
+class HeartBeatServicer(object):
+    """心跳检测
+    """
+
+    def PutStream(self, request_iterator, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+
+def add_HeartBeatServicer_to_server(servicer, server):
+    rpc_method_handlers = {
+            'PutStream': grpc.stream_unary_rpc_method_handler(
+                    servicer.PutStream,
+                    request_deserializer=heartbeat__pb2.StreamReqData.FromString,
+                    response_serializer=heartbeat__pb2.StreamResData.SerializeToString,
+            ),
+    }
+    generic_handler = grpc.method_handlers_generic_handler(
+            'proto.HeartBeat', rpc_method_handlers)
+    server.add_generic_rpc_handlers((generic_handler,))
+
+
+ # This class is part of an EXPERIMENTAL API.
+class HeartBeat(object):
+    """心跳检测
+    """
+
+    @staticmethod
+    def PutStream(request_iterator,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.stream_unary(request_iterator, target, '/proto.HeartBeat/PutStream',
+            heartbeat__pb2.StreamReqData.SerializeToString,
+            heartbeat__pb2.StreamResData.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)

+ 235 - 0
need_package/servicerd/build/lib/servicerd/proto/queue_pb2.py

@@ -0,0 +1,235 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: queue.proto
+
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='queue.proto',
+  package='proto',
+  syntax='proto3',
+  serialized_options=None,
+  create_key=_descriptor._internal_create_key,
+  serialized_pb=b'\n\x0bqueue.proto\x12\x05proto\"q\n\x06PubReq\x12\n\n\x02id\x18\x01 \x01(\t\x12\x11\n\tchannelId\x18\x02 \x01(\t\x12\x0e\n\x06sender\x18\x03 \x01(\t\x12\x13\n\x0bserial_type\x18\x04 \x01(\x05\x12\x14\n\x0cpublish_type\x18\x05 \x01(\x05\x12\r\n\x05param\x18\x06 \x01(\x0c\"$\n\x07PubResp\x12\x0c\n\x04\x63ode\x18\x01 \x01(\x05\x12\x0b\n\x03msg\x18\x02 \x01(\t\",\n\x07RecvReq\x12\x0e\n\x06sender\x18\x01 \x01(\t\x12\x11\n\tchannelId\x18\x02 \x01(\t2j\n\x0cQueueService\x12,\n\x07Publish\x12\r.proto.PubReq\x1a\x0e.proto.PubResp\"\x00(\x01\x12,\n\x07Receive\x12\x0e.proto.RecvReq\x1a\r.proto.PubReq\"\x00\x30\x01\x62\x06proto3'
+)
+
+
+
+
+_PUBREQ = _descriptor.Descriptor(
+  name='PubReq',
+  full_name='proto.PubReq',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='id', full_name='proto.PubReq.id', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='channelId', full_name='proto.PubReq.channelId', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='sender', full_name='proto.PubReq.sender', index=2,
+      number=3, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='serial_type', full_name='proto.PubReq.serial_type', index=3,
+      number=4, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='publish_type', full_name='proto.PubReq.publish_type', index=4,
+      number=5, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='param', full_name='proto.PubReq.param', index=5,
+      number=6, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"",
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=22,
+  serialized_end=135,
+)
+
+
+_PUBRESP = _descriptor.Descriptor(
+  name='PubResp',
+  full_name='proto.PubResp',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='code', full_name='proto.PubResp.code', index=0,
+      number=1, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='msg', full_name='proto.PubResp.msg', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=137,
+  serialized_end=173,
+)
+
+
+_RECVREQ = _descriptor.Descriptor(
+  name='RecvReq',
+  full_name='proto.RecvReq',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='sender', full_name='proto.RecvReq.sender', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='channelId', full_name='proto.RecvReq.channelId', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=175,
+  serialized_end=219,
+)
+
+DESCRIPTOR.message_types_by_name['PubReq'] = _PUBREQ
+DESCRIPTOR.message_types_by_name['PubResp'] = _PUBRESP
+DESCRIPTOR.message_types_by_name['RecvReq'] = _RECVREQ
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+PubReq = _reflection.GeneratedProtocolMessageType('PubReq', (_message.Message,), {
+  'DESCRIPTOR' : _PUBREQ,
+  '__module__' : 'queue_pb2'
+  # @@protoc_insertion_point(class_scope:proto.PubReq)
+  })
+_sym_db.RegisterMessage(PubReq)
+
+PubResp = _reflection.GeneratedProtocolMessageType('PubResp', (_message.Message,), {
+  'DESCRIPTOR' : _PUBRESP,
+  '__module__' : 'queue_pb2'
+  # @@protoc_insertion_point(class_scope:proto.PubResp)
+  })
+_sym_db.RegisterMessage(PubResp)
+
+RecvReq = _reflection.GeneratedProtocolMessageType('RecvReq', (_message.Message,), {
+  'DESCRIPTOR' : _RECVREQ,
+  '__module__' : 'queue_pb2'
+  # @@protoc_insertion_point(class_scope:proto.RecvReq)
+  })
+_sym_db.RegisterMessage(RecvReq)
+
+
+
+_QUEUESERVICE = _descriptor.ServiceDescriptor(
+  name='QueueService',
+  full_name='proto.QueueService',
+  file=DESCRIPTOR,
+  index=0,
+  serialized_options=None,
+  create_key=_descriptor._internal_create_key,
+  serialized_start=221,
+  serialized_end=327,
+  methods=[
+  _descriptor.MethodDescriptor(
+    name='Publish',
+    full_name='proto.QueueService.Publish',
+    index=0,
+    containing_service=None,
+    input_type=_PUBREQ,
+    output_type=_PUBRESP,
+    serialized_options=None,
+    create_key=_descriptor._internal_create_key,
+  ),
+  _descriptor.MethodDescriptor(
+    name='Receive',
+    full_name='proto.QueueService.Receive',
+    index=1,
+    containing_service=None,
+    input_type=_RECVREQ,
+    output_type=_PUBREQ,
+    serialized_options=None,
+    create_key=_descriptor._internal_create_key,
+  ),
+])
+_sym_db.RegisterServiceDescriptor(_QUEUESERVICE)
+
+DESCRIPTOR.services_by_name['QueueService'] = _QUEUESERVICE
+
+# @@protoc_insertion_point(module_scope)

+ 104 - 0
need_package/servicerd/build/lib/servicerd/proto/queue_pb2_grpc.py

@@ -0,0 +1,104 @@
+# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
+"""Client and server classes corresponding to protobuf-defined services."""
+import grpc
+
+from servicerd.proto import queue_pb2 as queue__pb2
+
+
+class QueueServiceStub(object):
+    """服务管理
+    """
+
+    def __init__(self, channel):
+        """Constructor.
+
+        Args:
+            channel: A grpc.Channel.
+        """
+        self.Publish = channel.stream_unary(
+                '/proto.QueueService/Publish',
+                request_serializer=queue__pb2.PubReq.SerializeToString,
+                response_deserializer=queue__pb2.PubResp.FromString,
+                )
+        self.Receive = channel.unary_stream(
+                '/proto.QueueService/Receive',
+                request_serializer=queue__pb2.RecvReq.SerializeToString,
+                response_deserializer=queue__pb2.PubReq.FromString,
+                )
+
+
+class QueueServiceServicer(object):
+    """服务管理
+    """
+
+    def Publish(self, request_iterator, context):
+        """任务发布
+        """
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+    def Receive(self, request, context):
+        """任务接收
+        """
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+
+def add_QueueServiceServicer_to_server(servicer, server):
+    rpc_method_handlers = {
+            'Publish': grpc.stream_unary_rpc_method_handler(
+                    servicer.Publish,
+                    request_deserializer=queue__pb2.PubReq.FromString,
+                    response_serializer=queue__pb2.PubResp.SerializeToString,
+            ),
+            'Receive': grpc.unary_stream_rpc_method_handler(
+                    servicer.Receive,
+                    request_deserializer=queue__pb2.RecvReq.FromString,
+                    response_serializer=queue__pb2.PubReq.SerializeToString,
+            ),
+    }
+    generic_handler = grpc.method_handlers_generic_handler(
+            'proto.QueueService', rpc_method_handlers)
+    server.add_generic_rpc_handlers((generic_handler,))
+
+
+ # This class is part of an EXPERIMENTAL API.
+class QueueService(object):
+    """服务管理
+    """
+
+    @staticmethod
+    def Publish(request_iterator,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.stream_unary(request_iterator, target, '/proto.QueueService/Publish',
+            queue__pb2.PubReq.SerializeToString,
+            queue__pb2.PubResp.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+    @staticmethod
+    def Receive(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_stream(request, target, '/proto.QueueService/Receive',
+            queue__pb2.RecvReq.SerializeToString,
+            queue__pb2.PubReq.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)

+ 335 - 0
need_package/servicerd/build/lib/servicerd/proto/service_pb2.py

@@ -0,0 +1,335 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: proto/service.proto
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='proto/service.proto',
+  package='proto',
+  syntax='proto3',
+  serialized_options=None,
+  create_key=_descriptor._internal_create_key,
+  serialized_pb=b'\n\x13proto/service.proto\x12\x05proto\"e\n\x0bServiceMeta\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02ip\x18\x02 \x01(\t\x12\x0c\n\x04port\x18\x03 \x01(\x05\x12\x0f\n\x07workers\x18\x04 \x01(\x05\x12\x0f\n\x07\x62\x61lance\x18\x05 \x01(\x05\x12\x0c\n\x04meta\x18\x06 \x01(\t\"\x1d\n\rStringRepData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x01(\t\"\x1d\n\rStringReqData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x01(\t\"-\n\x0c\x41pplyReqData\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07\x62\x61lance\x18\x02 \x01(\x05\"(\n\x0c\x41pplyRepData\x12\n\n\x02ip\x18\x01 \x01(\t\x12\x0c\n\x04port\x18\x02 \x01(\x05\x32\xe4\x01\n\x07Service\x12\x35\n\x07Registe\x12\x12.proto.ServiceMeta\x1a\x14.proto.StringRepData\"\x00\x12\x35\n\x07\x44\x65story\x12\x12.proto.ServiceMeta\x1a\x14.proto.StringRepData\"\x00\x12\x33\n\x05\x41pply\x12\x13.proto.ApplyReqData\x1a\x13.proto.ApplyRepData\"\x00\x12\x36\n\x07Release\x12\x13.proto.ApplyRepData\x1a\x14.proto.StringRepData\"\x00\x62\x06proto3'
+)
+
+
+
+
+_SERVICEMETA = _descriptor.Descriptor(
+  name='ServiceMeta',
+  full_name='proto.ServiceMeta',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='name', full_name='proto.ServiceMeta.name', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='ip', full_name='proto.ServiceMeta.ip', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='port', full_name='proto.ServiceMeta.port', index=2,
+      number=3, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='workers', full_name='proto.ServiceMeta.workers', index=3,
+      number=4, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='balance', full_name='proto.ServiceMeta.balance', index=4,
+      number=5, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='meta', full_name='proto.ServiceMeta.meta', index=5,
+      number=6, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=30,
+  serialized_end=131,
+)
+
+
+_STRINGREPDATA = _descriptor.Descriptor(
+  name='StringRepData',
+  full_name='proto.StringRepData',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='data', full_name='proto.StringRepData.data', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=133,
+  serialized_end=162,
+)
+
+
+_STRINGREQDATA = _descriptor.Descriptor(
+  name='StringReqData',
+  full_name='proto.StringReqData',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='data', full_name='proto.StringReqData.data', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=164,
+  serialized_end=193,
+)
+
+
+_APPLYREQDATA = _descriptor.Descriptor(
+  name='ApplyReqData',
+  full_name='proto.ApplyReqData',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='name', full_name='proto.ApplyReqData.name', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='balance', full_name='proto.ApplyReqData.balance', index=1,
+      number=2, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=195,
+  serialized_end=240,
+)
+
+
+_APPLYREPDATA = _descriptor.Descriptor(
+  name='ApplyRepData',
+  full_name='proto.ApplyRepData',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='ip', full_name='proto.ApplyRepData.ip', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='port', full_name='proto.ApplyRepData.port', index=1,
+      number=2, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=242,
+  serialized_end=282,
+)
+
+DESCRIPTOR.message_types_by_name['ServiceMeta'] = _SERVICEMETA
+DESCRIPTOR.message_types_by_name['StringRepData'] = _STRINGREPDATA
+DESCRIPTOR.message_types_by_name['StringReqData'] = _STRINGREQDATA
+DESCRIPTOR.message_types_by_name['ApplyReqData'] = _APPLYREQDATA
+DESCRIPTOR.message_types_by_name['ApplyRepData'] = _APPLYREPDATA
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+ServiceMeta = _reflection.GeneratedProtocolMessageType('ServiceMeta', (_message.Message,), {
+  'DESCRIPTOR' : _SERVICEMETA,
+  '__module__' : 'proto.service_pb2'
+  # @@protoc_insertion_point(class_scope:proto.ServiceMeta)
+  })
+_sym_db.RegisterMessage(ServiceMeta)
+
+StringRepData = _reflection.GeneratedProtocolMessageType('StringRepData', (_message.Message,), {
+  'DESCRIPTOR' : _STRINGREPDATA,
+  '__module__' : 'proto.service_pb2'
+  # @@protoc_insertion_point(class_scope:proto.StringRepData)
+  })
+_sym_db.RegisterMessage(StringRepData)
+
+StringReqData = _reflection.GeneratedProtocolMessageType('StringReqData', (_message.Message,), {
+  'DESCRIPTOR' : _STRINGREQDATA,
+  '__module__' : 'proto.service_pb2'
+  # @@protoc_insertion_point(class_scope:proto.StringReqData)
+  })
+_sym_db.RegisterMessage(StringReqData)
+
+ApplyReqData = _reflection.GeneratedProtocolMessageType('ApplyReqData', (_message.Message,), {
+  'DESCRIPTOR' : _APPLYREQDATA,
+  '__module__' : 'proto.service_pb2'
+  # @@protoc_insertion_point(class_scope:proto.ApplyReqData)
+  })
+_sym_db.RegisterMessage(ApplyReqData)
+
+ApplyRepData = _reflection.GeneratedProtocolMessageType('ApplyRepData', (_message.Message,), {
+  'DESCRIPTOR' : _APPLYREPDATA,
+  '__module__' : 'proto.service_pb2'
+  # @@protoc_insertion_point(class_scope:proto.ApplyRepData)
+  })
+_sym_db.RegisterMessage(ApplyRepData)
+
+
+
+_SERVICE = _descriptor.ServiceDescriptor(
+  name='Service',
+  full_name='proto.Service',
+  file=DESCRIPTOR,
+  index=0,
+  serialized_options=None,
+  create_key=_descriptor._internal_create_key,
+  serialized_start=285,
+  serialized_end=513,
+  methods=[
+  _descriptor.MethodDescriptor(
+    name='Registe',
+    full_name='proto.Service.Registe',
+    index=0,
+    containing_service=None,
+    input_type=_SERVICEMETA,
+    output_type=_STRINGREPDATA,
+    serialized_options=None,
+    create_key=_descriptor._internal_create_key,
+  ),
+  _descriptor.MethodDescriptor(
+    name='Destory',
+    full_name='proto.Service.Destory',
+    index=1,
+    containing_service=None,
+    input_type=_SERVICEMETA,
+    output_type=_STRINGREPDATA,
+    serialized_options=None,
+    create_key=_descriptor._internal_create_key,
+  ),
+  _descriptor.MethodDescriptor(
+    name='Apply',
+    full_name='proto.Service.Apply',
+    index=2,
+    containing_service=None,
+    input_type=_APPLYREQDATA,
+    output_type=_APPLYREPDATA,
+    serialized_options=None,
+    create_key=_descriptor._internal_create_key,
+  ),
+  _descriptor.MethodDescriptor(
+    name='Release',
+    full_name='proto.Service.Release',
+    index=3,
+    containing_service=None,
+    input_type=_APPLYREPDATA,
+    output_type=_STRINGREPDATA,
+    serialized_options=None,
+    create_key=_descriptor._internal_create_key,
+  ),
+])
+_sym_db.RegisterServiceDescriptor(_SERVICE)
+
+DESCRIPTOR.services_by_name['Service'] = _SERVICE
+
+# @@protoc_insertion_point(module_scope)

部分文件因为文件数量过多而无法显示