Commit 6aed2e53 authored by Sam James's avatar Sam James Committed by Chris Lamb
Browse files

Support PyPDF version 3.



pypdf upstream has renamed the package from PyPDF2 to pypdf. This patch
supporst the new pypdf version while keeping compatibility for older PyPDF2,
adjusting to API changes.

Signed-off-by: Chris Lamb's avatarChris Lamb <lamby@debian.org>
parent dff253b5
Loading
Loading
Loading
Loading
Loading
+25 −17
Original line number Diff line number Diff line
@@ -34,8 +34,16 @@ from .utils.command import Command
logger = logging.getLogger(__name__)

try:
    try:
        import pypdf
    except ImportError:
        import PyPDF2

        pypdf = PyPDF2

    try:
        from pypdf.errors import PdfReadError
    except ImportError:
        try:
            # PyPDF 2.x
            from PyPDF2.errors import PdfReadError
@@ -44,8 +52,8 @@ try:
            from PyPDF2.utils import PdfReadError

except ImportError:  # noqa
    python_module_missing("PyPDF2")
    PyPDF2 = None
    python_module_missing("pypdf")
    pypdf = None


class Pdftotext(Command):
@@ -67,11 +75,11 @@ class PdfFile(File):
    def compare_details(self, other, source=None):
        xs = []

        if PyPDF2 is None:
            pkg = get_package_provider("pypdf2")
        if pypdf is None:
            pkg = get_package_provider("pypdf")
            infix = f" from the '{pkg}' package " if pkg else " "
            self.add_comment(
                f"Installing the 'PyPDF2' Python module{infix}may produce a better output."
                f"Installing the 'pypdf' Python module{infix}may produce a better output."
            )
        else:
            difference = Difference.from_text(
@@ -107,8 +115,8 @@ class PdfFile(File):

    def dump_pypdf2_metadata(self, file):
        try:
            pdf = PyPDF2.PdfFileReader(file.path)
            document_info = pdf.getDocumentInfo()
            pdf = pypdf.PdfReader(file.path)
            document_info = pdf.metadata

            if document_info is None:
                return ""
@@ -119,18 +127,18 @@ class PdfFile(File):

            return "\n".join(xs)
        except PdfReadError as e:
            msg = f"Could not extract PyPDF2 metadata from {os.path.basename(file.name)}: {e}"
            msg = f"Could not extract pypdf metadata from {os.path.basename(file.name)}: {e}"
            self.add_comment(msg)
            logger.error(msg)
            return ""

    def dump_pypdf2_annotations(self, file):
        try:
            pdf = PyPDF2.PdfFileReader(file.path)
            pdf = pypdf.PdfReader(file.path)

            xs = []
            for x in range(pdf.getNumPages()):
                page = pdf.getPage(x)
            for x in range(len(pdf.pages)):
                page = pdf.pages[x]

                try:
                    for annot in page["/Annots"]:
@@ -142,7 +150,7 @@ class PdfFile(File):

            return "\n".join(xs)
        except PdfReadError as e:
            msg = f"Could not extract PyPDF2 annotations from {os.path.basename(file.name)}: {e}"
            msg = f"Could not extract pypdf annotations from {os.path.basename(file.name)}: {e}"
            file.add_comment(msg)
            logger.error(msg)
            return ""
+1 −1
Original line number Diff line number Diff line
@@ -66,7 +66,7 @@ setup(
            "guestfs",
            "jsondiff",
            "python-debian",
            "pypdf2",
            "pypdf",
            "pyxattr",
            "rpm-python",
            "tlsh",
+2 −2
Original line number Diff line number Diff line
@@ -70,7 +70,7 @@ def differences_metadata(pdf1, pdf1a):


@skip_unless_tools_exist("pdftotext")
@skip_unless_module_exists("PyPDF2")
@skip_unless_module_exists("pypdf")
def test_metadata(differences_metadata):
    assert_diff(differences_metadata[0], "pdf_metadata_expected_diff")

@@ -81,7 +81,7 @@ def differences_annotations(pdf3, pdf4):


@skip_unless_tools_exist("pdftotext")
@skip_unless_module_exists("PyPDF2")
@skip_unless_module_exists("pypdf")
def test_annotations(differences_annotations):
    with open("tests/data/pdf_annotations_expected_diff", "w") as f:
        f.write(differences_annotations[0].unified_diff)