utils.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. # Copyright (c) 2006, Mathieu Fenniak
  2. # All rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are
  6. # met:
  7. #
  8. # * Redistributions of source code must retain the above copyright notice,
  9. # this list of conditions and the following disclaimer.
  10. # * Redistributions in binary form must reproduce the above copyright notice,
  11. # this list of conditions and the following disclaimer in the documentation
  12. # and/or other materials provided with the distribution.
  13. # * The name of the author may not be used to endorse or promote products
  14. # derived from this software without specific prior written permission.
  15. #
  16. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  21. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  22. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  23. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  24. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  25. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  26. # POSSIBILITY OF SUCH DAMAGE.
  27. """
  28. Utility functions for PDF library.
  29. """
  30. __author__ = "Mathieu Fenniak"
  31. __author_email__ = "biziqe@mathieu.fenniak.net"
  32. import sys
  33. try:
  34. import __builtin__ as builtins
  35. except ImportError: # Py3
  36. import builtins
  37. xrange_fn = getattr(builtins, "xrange", range)
  38. _basestring = getattr(builtins, "basestring", str)
  39. bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
  40. string_type = getattr(builtins, "unicode", str)
  41. int_types = (int, long) if sys.version_info[0] < 3 else (int,)
  42. # Make basic type tests more consistent
  43. def isString(s):
  44. """Test if arg is a string. Compatible with Python 2 and 3."""
  45. return isinstance(s, _basestring)
  46. def isInt(n):
  47. """Test if arg is an int. Compatible with Python 2 and 3."""
  48. return isinstance(n, int_types)
  49. def isBytes(b):
  50. """Test if arg is a bytes instance. Compatible with Python 2 and 3."""
  51. import warnings
  52. warnings.warn("PyPDF2.utils.isBytes will be deprecated", DeprecationWarning)
  53. return isinstance(b, bytes_type)
  54. #custom implementation of warnings.formatwarning
  55. def formatWarning(message, category, filename, lineno, line=None):
  56. file = filename.replace("/", "\\").rsplit("\\", 1)[-1] # find the file name
  57. return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
  58. def readUntilWhitespace(stream, maxchars=None):
  59. """
  60. Reads non-whitespace characters and returns them.
  61. Stops upon encountering whitespace or when maxchars is reached.
  62. """
  63. txt = b_("")
  64. while True:
  65. tok = stream.read(1)
  66. if tok.isspace() or not tok:
  67. break
  68. txt += tok
  69. if len(txt) == maxchars:
  70. break
  71. return txt
  72. def readNonWhitespace(stream):
  73. """
  74. Finds and reads the next non-whitespace character (ignores whitespace).
  75. """
  76. tok = WHITESPACES[0]
  77. while tok in WHITESPACES:
  78. tok = stream.read(1)
  79. return tok
  80. def skipOverWhitespace(stream):
  81. """
  82. Similar to readNonWhitespace, but returns a Boolean if more than
  83. one whitespace character was read.
  84. """
  85. tok = WHITESPACES[0]
  86. cnt = 0;
  87. while tok in WHITESPACES:
  88. tok = stream.read(1)
  89. cnt+=1
  90. return (cnt > 1)
  91. def skipOverComment(stream):
  92. tok = stream.read(1)
  93. stream.seek(-1, 1)
  94. if tok == b_('%'):
  95. while tok not in (b_('\n'), b_('\r')):
  96. tok = stream.read(1)
  97. def readUntilRegex(stream, regex, ignore_eof=False):
  98. """
  99. Reads until the regular expression pattern matched (ignore the match)
  100. Raise PdfStreamError on premature end-of-file.
  101. :param bool ignore_eof: If true, ignore end-of-line and return immediately
  102. """
  103. name = b_('')
  104. while True:
  105. tok = stream.read(16)
  106. if not tok:
  107. # stream has truncated prematurely
  108. if ignore_eof == True:
  109. return name
  110. else:
  111. raise PdfStreamError("Stream has ended unexpectedly")
  112. m = regex.search(tok)
  113. if m is not None:
  114. name += tok[:m.start()]
  115. stream.seek(m.start()-len(tok), 1)
  116. break
  117. name += tok
  118. return name
  119. class ConvertFunctionsToVirtualList(object):
  120. def __init__(self, lengthFunction, getFunction):
  121. self.lengthFunction = lengthFunction
  122. self.getFunction = getFunction
  123. def __len__(self):
  124. return self.lengthFunction()
  125. def __getitem__(self, index):
  126. if isinstance(index, slice):
  127. indices = xrange_fn(*index.indices(len(self)))
  128. cls = type(self)
  129. return cls(indices.__len__, lambda idx: self[indices[idx]])
  130. if not isInt(index):
  131. raise TypeError("sequence indices must be integers")
  132. len_self = len(self)
  133. if index < 0:
  134. # support negative indexes
  135. index = len_self + index
  136. if index < 0 or index >= len_self:
  137. raise IndexError("sequence index out of range")
  138. return self.getFunction(index)
  139. def RC4_encrypt(key, plaintext):
  140. S = [i for i in range(256)]
  141. j = 0
  142. for i in range(256):
  143. j = (j + S[i] + ord_(key[i % len(key)])) % 256
  144. S[i], S[j] = S[j], S[i]
  145. i, j = 0, 0
  146. retval = []
  147. for x in range(len(plaintext)):
  148. i = (i + 1) % 256
  149. j = (j + S[i]) % 256
  150. S[i], S[j] = S[j], S[i]
  151. t = S[(S[i] + S[j]) % 256]
  152. retval.append(b_(chr(ord_(plaintext[x]) ^ t)))
  153. return b_("").join(retval)
  154. def matrixMultiply(a, b):
  155. return [[sum([float(i)*float(j)
  156. for i, j in zip(row, col)]
  157. ) for col in zip(*b)]
  158. for row in a]
  159. def markLocation(stream):
  160. """Creates text file showing current location in context."""
  161. # Mainly for debugging
  162. RADIUS = 5000
  163. stream.seek(-RADIUS, 1)
  164. outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
  165. outputDoc.write(stream.read(RADIUS))
  166. outputDoc.write('HERE')
  167. outputDoc.write(stream.read(RADIUS))
  168. outputDoc.close()
  169. stream.seek(-RADIUS, 1)
  170. class PyPdfError(Exception):
  171. pass
  172. class PdfReadError(PyPdfError):
  173. pass
  174. class PageSizeNotDefinedError(PyPdfError):
  175. pass
  176. class PdfReadWarning(UserWarning):
  177. pass
  178. class PdfStreamError(PdfReadError):
  179. pass
  180. if sys.version_info[0] < 3:
  181. def b_(s):
  182. return s
  183. else:
  184. B_CACHE = {}
  185. def b_(s):
  186. bc = B_CACHE
  187. if s in bc:
  188. return bc[s]
  189. if type(s) == bytes:
  190. return s
  191. else:
  192. r = s.encode('latin-1')
  193. if len(s) < 2:
  194. bc[s] = r
  195. return r
  196. def u_(s):
  197. if sys.version_info[0] < 3:
  198. return unicode(s, 'unicode_escape')
  199. else:
  200. return s
  201. def str_(b):
  202. if sys.version_info[0] < 3:
  203. return b
  204. else:
  205. if type(b) == bytes:
  206. return b.decode('latin-1')
  207. else:
  208. return b
  209. def ord_(b):
  210. if sys.version_info[0] < 3 or type(b) == str:
  211. return ord(b)
  212. else:
  213. return b
  214. def chr_(c):
  215. if sys.version_info[0] < 3:
  216. return c
  217. else:
  218. return chr(c)
  219. def barray(b):
  220. if sys.version_info[0] < 3:
  221. return b
  222. else:
  223. return bytearray(b)
  224. def hexencode(b):
  225. if sys.version_info[0] < 3:
  226. return b.encode('hex')
  227. else:
  228. import codecs
  229. coder = codecs.getencoder('hex_codec')
  230. return coder(b)[0]
  231. def hexStr(num):
  232. return hex(num).replace('L', '')
  233. WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]
  234. def paethPredictor(left, up, up_left):
  235. p = left + up - up_left
  236. dist_left = abs(p - left)
  237. dist_up = abs(p - up)
  238. dist_up_left = abs(p - up_left)
  239. if dist_left <= dist_up and dist_left <= dist_up_left:
  240. return left
  241. elif dist_up <= dist_up_left:
  242. return up
  243. else:
  244. return up_left