Commits (2)
......@@ -66,6 +66,16 @@ class PdfFile(File):
difference.add_comment("Document info")
xs.append(difference)
difference = Difference.from_text(
self.dump_pypdf2_annotations(self),
self.dump_pypdf2_annotations(other),
self.path,
other.path,
)
if difference:
difference.add_comment("Annotations")
xs.append(difference)
xs.append(Difference.from_operation(Pdftotext, self.path, other.path))
# Don't include verbose dumppdf output unless we won't see any any
......@@ -93,3 +103,24 @@ class PdfFile(File):
xs.append("{}: {!r}".format(k.lstrip("/"), v))
return "\n".join(xs)
@staticmethod
def dump_pypdf2_annotations(file):
try:
pdf = PyPDF2.PdfFileReader(file.path)
except PyPDF2.utils.PdfReadError as e:
return f"(Could not open file: {e})"
xs = []
for x in range(pdf.getNumPages()):
page = pdf.getPage(x)
try:
for annot in page["/Annots"]:
subtype = annot.getObject()["/Subtype"]
if subtype == "/Text":
xs.append(annot.getObject()["/Contents"])
except:
pass
return "\n".join(xs)
......@@ -21,13 +21,15 @@ import pytest
from diffoscope.comparators.pdf import PdfFile
from ..utils.data import load_fixture, get_data
from ..utils.data import load_fixture, assert_diff
from ..utils.tools import skip_unless_tools_exist, skip_unless_module_exists
from ..utils.nonexisting import assert_non_existing
pdf1 = load_fixture("test1.pdf")
pdf2 = load_fixture("test2.pdf")
pdf3 = load_fixture("test3.pdf")
pdf4 = load_fixture("test4.pdf")
pdf1a = load_fixture("test_weird_non_unicode_chars1.pdf")
pdf2a = load_fixture("test_weird_non_unicode_chars2.pdf")
......@@ -54,8 +56,7 @@ def differences(pdf1, pdf2):
@skip_unless_tools_exist("pdftotext")
def test_text_diff(differences):
expected_diff = get_data("pdf_text_expected_diff")
assert differences[0].unified_diff == expected_diff
assert_diff(differences[0], "pdf_text_expected_diff")
@skip_unless_tools_exist("pdftotext")
......@@ -71,5 +72,17 @@ def differences_metadata(pdf1, pdf1a):
@skip_unless_tools_exist("pdftotext")
@skip_unless_module_exists("PyPDF2")
def test_metadata(differences_metadata):
expected_diff = get_data("pdf_metadata_expected_diff")
assert differences_metadata[0].unified_diff == expected_diff
assert_diff(differences_metadata[0], "pdf_metadata_expected_diff")
@pytest.fixture
def differences_annotations(pdf3, pdf4):
return pdf3.compare(pdf4).details
@skip_unless_tools_exist("pdftotext")
@skip_unless_module_exists("PyPDF2")
def test_annotations(differences_annotations):
with open("tests/data/pdf_annotations_expected_diff", "w") as f:
f.write(differences_annotations[0].unified_diff)
assert_diff(differences_annotations[0], "pdf_annotations_expected_diff")
@@ -0,0 +1,2 @@
+1: A PDF comment created in Okular
+11: Comment created with evince
......@@ -205,9 +205,11 @@ ALLOWED_TEST_FILES = {
"test3.apk",
"test3.changes",
"test3.gif",
"test3.pdf",
"test3.zip",
"test4.changes",
"test4.gif",
"test4.pdf",
"test_comment1.zip",
"test_comment2.zip",
"test_invalid.json",
......