Commits (2)
......@@ -34,6 +34,8 @@ from .utils.command import Command
logger = logging.getLogger(__name__)
PYPDF_MAJOR_VERSION = None
try:
try:
# PyPDF 3.x
......@@ -43,12 +45,22 @@ try:
import PyPDF2 as pypdf
try:
# pyPDF 2.x-3.x
import pypdf.PdfReader as PdfReader
try:
# pyPDF 3.x
from pypdf import PdfReader
PYPDF_MAJOR_VERSION = 3
except ImportError:
# pyPDF 2.x
import pypdf.PdfReader as PdfReader
PYPDF_MAJOR_VERSION = 2
except ImportError:
# PyPDF2 1.x
import pypdf.PdfFileReader as PdfReader
PYPDF_MAJOR_VERSION = 1
try:
# PyPDF 3.x
from pypdf.errors import PdfReadError
......@@ -84,37 +96,10 @@ class PdfFile(File):
def compare_details(self, other, source=None):
xs = []
if pypdf is None:
pkg = get_package_provider("pypdf")
infix = f" from the '{pkg}' package " if pkg else " "
self.add_comment(
f"Installing the 'pypdf' Python module{infix}"
"may produce a better output."
)
else:
difference = Difference.from_text(
self.dump_pypdf_metadata(self),
self.dump_pypdf_metadata(other),
self.name,
other.name,
)
if difference:
difference.add_comment("Document info")
xs.append(difference)
difference = Difference.from_text(
self.dump_pypdf_annotations(self),
self.dump_pypdf_annotations(other),
self.name,
other.name,
)
if difference:
difference.add_comment("Annotations")
xs.append(difference)
xs.extend(self.gen_metadata_differences(other))
xs.append(Difference.from_operation(Pdftotext, self.path, other.path))
# Don't include verbose dumppdf output unless we won't see any any
# Don't include verbose dumppdf output unless we won't see any
# differences without it.
if not any(xs):
xs.append(
......@@ -123,6 +108,38 @@ class PdfFile(File):
return xs
def gen_metadata_differences(self, other):
if PYPDF_MAJOR_VERSION is None:
pkg = get_package_provider("pypdf")
infix = f" from the '{pkg}' package " if pkg else " "
self.add_comment(
f"Installing the 'pypdf' Python module{infix}"
"may produce a better output."
)
return
difference = Difference.from_text(
self.dump_pypdf_metadata(self),
self.dump_pypdf_metadata(other),
self.name,
other.name,
)
if difference:
difference.add_comment("Document info")
yield difference
difference = Difference.from_text(
self.dump_pypdf_annotations(self),
self.dump_pypdf_annotations(other),
self.name,
other.name,
)
if difference:
difference.add_comment("Annotations")
yield difference
def dump_pypdf_metadata(self, file):
try:
pdf = PdfReader(file.path)
......@@ -154,9 +171,14 @@ class PdfFile(File):
try:
for annot in page["/Annots"]:
subtype = annot.getObject()["/Subtype"]
if subtype == "/Text":
xs.append(annot.getObject()["/Contents"])
if PYPDF_MAJOR_VERSION == 3:
subtype = annot.get_object()["/Subtype"]
if subtype == "/Text":
xs.append(annot.get_object()["/Contents"])
else:
subtype = annot.getObject()["/Subtype"]
if subtype == "/Text":
xs.append(annot.getObject()["/Contents"])
except KeyError:
pass
......
......@@ -22,7 +22,11 @@ import pytest
from diffoscope.comparators.pdf import PdfFile
from ..utils.data import load_fixture, assert_diff
from ..utils.tools import skip_unless_tools_exist, skip_unless_module_exists
from ..utils.tools import (
skip_unless_tools_exist,
skip_unless_module_exists,
skipif,
)
from ..utils.nonexisting import assert_non_existing
......@@ -34,6 +38,18 @@ pdf1a = load_fixture("test_weird_non_unicode_chars1.pdf")
pdf2a = load_fixture("test_weird_non_unicode_chars2.pdf")
def skip_unless_pypdf3():
def fn():
try:
import pypdf
except ImportError:
return True
return not pypdf.__version__.startswith("3.")
return skipif(fn(), reason="pypdf not installed or not version 3.x+")
def test_identification(pdf1):
assert isinstance(pdf1, PdfFile)
......@@ -69,9 +85,11 @@ def differences_metadata(pdf1, pdf1a):
return pdf1.compare(pdf1a).details
@skip_unless_pypdf3()
@skip_unless_tools_exist("pdftotext")
@skip_unless_module_exists("pypdf")
def test_metadata(differences_metadata):
assert len(differences_metadata) == 2
assert_diff(differences_metadata[0], "pdf_metadata_expected_diff")
......@@ -80,7 +98,7 @@ def differences_annotations(pdf3, pdf4):
return pdf3.compare(pdf4).details
@skip_unless_pypdf3()
@skip_unless_tools_exist("pdftotext")
@skip_unless_module_exists("pypdf")
def test_annotations(differences_annotations):
assert_diff(differences_annotations[0], "pdf_annotations_expected_diff")
@@ -1,3 +1,7 @@
This is a test PDF.
+10: Typewriter annotation
+2: Another type of
+comment
+
@@ -0,0 +1,2 @@
+1: A PDF comment created in Okular
+11: Comment created with evince
This diff is collapsed.