tests.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. import os
  2. import sys
  3. import unittest
  4. import binascii
  5. from PyPDF2 import PdfFileReader, PdfFileWriter
  6. # Configure path environment
  7. TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
  8. PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
  9. RESOURCE_ROOT = os.path.join(PROJECT_ROOT, 'Resources')
  10. sys.path.append(PROJECT_ROOT)
  11. class PdfReaderTestCases(unittest.TestCase):
  12. def test_PdfReaderFileLoad(self):
  13. '''
  14. Test loading and parsing of a file. Extract text of the file and compare to expected
  15. textual output. Expected outcome: file loads, text matches expected.
  16. '''
  17. with open(os.path.join(RESOURCE_ROOT, 'crazyones.pdf'), 'rb') as inputfile:
  18. # Load PDF file from file
  19. ipdf = PdfFileReader(inputfile)
  20. ipdf_p1 = ipdf.getPage(0)
  21. # Retrieve the text of the PDF
  22. with open(os.path.join(RESOURCE_ROOT, 'crazyones.txt'), 'rb') as pdftext_file:
  23. pdftext = pdftext_file.read()
  24. ipdf_p1_text = ipdf_p1.extractText().replace('\n', '').encode('utf-8')
  25. # Compare the text of the PDF to a known source
  26. self.assertEqual(ipdf_p1_text, pdftext,
  27. msg='PDF extracted text differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n'
  28. % (pdftext, ipdf_p1_text))
  29. def test_PdfReaderJpegImage(self):
  30. '''
  31. Test loading and parsing of a file. Extract the image of the file and compare to expected
  32. textual output. Expected outcome: file loads, image matches expected.
  33. '''
  34. with open(os.path.join(RESOURCE_ROOT, 'jpeg.pdf'), 'rb') as inputfile:
  35. # Load PDF file from file
  36. ipdf = PdfFileReader(inputfile)
  37. # Retrieve the text of the image
  38. with open(os.path.join(RESOURCE_ROOT, 'jpeg.txt'), 'r') as pdftext_file:
  39. imagetext = pdftext_file.read()
  40. ipdf_p0 = ipdf.getPage(0)
  41. xObject = ipdf_p0['/Resources']['/XObject'].getObject()
  42. data = xObject['/Im4'].getData()
  43. # Compare the text of the PDF to a known source
  44. self.assertEqual(binascii.hexlify(data).decode(), imagetext,
  45. msg='PDF extracted image differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n'
  46. % (imagetext, binascii.hexlify(data).decode()))
  47. class AddJsTestCase(unittest.TestCase):
  48. def setUp(self):
  49. ipdf = PdfFileReader(os.path.join(RESOURCE_ROOT, 'crazyones.pdf'))
  50. self.pdf_file_writer = PdfFileWriter()
  51. self.pdf_file_writer.appendPagesFromReader(ipdf)
  52. def test_add(self):
  53. self.pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
  54. self.assertIn('/Names', self.pdf_file_writer._root_object, "addJS should add a name catalog in the root object.")
  55. self.assertIn('/JavaScript', self.pdf_file_writer._root_object['/Names'], "addJS should add a JavaScript name tree under the name catalog.")
  56. self.assertIn('/OpenAction', self.pdf_file_writer._root_object, "addJS should add an OpenAction to the catalog.")
  57. def test_overwrite(self):
  58. self.pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
  59. first_js = self.get_javascript_name()
  60. self.pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
  61. second_js = self.get_javascript_name()
  62. self.assertNotEqual(first_js, second_js, "addJS should overwrite the previous script in the catalog.")
  63. def get_javascript_name(self):
  64. self.assertIn('/Names', self.pdf_file_writer._root_object)
  65. self.assertIn('/JavaScript', self.pdf_file_writer._root_object['/Names'])
  66. self.assertIn('/Names', self.pdf_file_writer._root_object['/Names']['/JavaScript'])
  67. return self.pdf_file_writer._root_object['/Names']['/JavaScript']['/Names'][0]