Loading diffoscope/comparators/pdf.py +35 −27 Original line number Diff line number Diff line Loading @@ -17,6 +17,8 @@ # You should have received a copy of the GNU General Public License # along with diffoscope. If not, see <https://www.gnu.org/licenses/>. import logging import os import re from diffoscope.tools import python_module_missing, tool_required Loading @@ -25,6 +27,8 @@ from diffoscope.difference import Difference from .utils.file import File from .utils.command import Command logger = logging.getLogger(__name__) try: import PyPDF2 Loading Loading @@ -95,13 +99,10 @@ class PdfFile(File): return xs @staticmethod def dump_pypdf2_metadata(file): def dump_pypdf2_metadata(self, file): try: pdf = PyPDF2.PdfFileReader(file.path) document_info = pdf.getDocumentInfo() except PdfReadError as e: return f"(Could not extract metadata: {e})" if document_info is None: return "" Loading @@ -111,13 +112,15 @@ class PdfFile(File): xs.append("{}: {!r}".format(k.lstrip("/"), v)) return "\n".join(xs) except PdfReadError as e: msg = f"Could not extract PyPDF2 metadata from {os.path.basename(file.name)}: {e}" self.add_comment(msg) logger.error(msg) return "" @staticmethod def dump_pypdf2_annotations(file): def dump_pypdf2_annotations(self, file): try: pdf = PyPDF2.PdfFileReader(file.path) except PdfReadError as e: return f"(Could not open file: {e})" xs = [] for x in range(pdf.getNumPages()): Loading @@ -132,3 +135,8 @@ class PdfFile(File): pass return "\n".join(xs) except PdfReadError as e: msg = f"Could not extract PyPDF2 annotations from {os.path.basename(file.name)}: {e}" file.add_comment(msg) logger.error(msg) return "" Loading
diffoscope/comparators/pdf.py +35 −27 Original line number Diff line number Diff line Loading @@ -17,6 +17,8 @@ # You should have received a copy of the GNU General Public License # along with diffoscope. If not, see <https://www.gnu.org/licenses/>. import logging import os import re from diffoscope.tools import python_module_missing, tool_required Loading @@ -25,6 +27,8 @@ from diffoscope.difference import Difference from .utils.file import File from .utils.command import Command logger = logging.getLogger(__name__) try: import PyPDF2 Loading Loading @@ -95,13 +99,10 @@ class PdfFile(File): return xs @staticmethod def dump_pypdf2_metadata(file): def dump_pypdf2_metadata(self, file): try: pdf = PyPDF2.PdfFileReader(file.path) document_info = pdf.getDocumentInfo() except PdfReadError as e: return f"(Could not extract metadata: {e})" if document_info is None: return "" Loading @@ -111,13 +112,15 @@ class PdfFile(File): xs.append("{}: {!r}".format(k.lstrip("/"), v)) return "\n".join(xs) except PdfReadError as e: msg = f"Could not extract PyPDF2 metadata from {os.path.basename(file.name)}: {e}" self.add_comment(msg) logger.error(msg) return "" @staticmethod def dump_pypdf2_annotations(file): def dump_pypdf2_annotations(self, file): try: pdf = PyPDF2.PdfFileReader(file.path) except PdfReadError as e: return f"(Could not open file: {e})" xs = [] for x in range(pdf.getNumPages()): Loading @@ -132,3 +135,8 @@ class PdfFile(File): pass return "\n".join(xs) except PdfReadError as e: msg = f"Could not extract PyPDF2 annotations from {os.path.basename(file.name)}: {e}" file.add_comment(msg) logger.error(msg) return ""