Loading diffoscope/comparators/directory.py +4 −0 Original line number Diff line number Diff line Loading @@ -150,6 +150,10 @@ class Directory(object): def recognizes(file): return file.is_directory() @classmethod def fallback_recognizes(cls, file): return False class FilesystemDirectory(Directory): def __init__(self, path): Loading diffoscope/comparators/gzip.py +4 −0 Original line number Diff line number Diff line Loading @@ -56,5 +56,9 @@ class GzipFile(File): CONTAINER_CLASS = GzipContainer FILE_TYPE_RE = re.compile(r'^gzip compressed data\b') # Work around file(1) Debian bug #876316 FALLBACK_FILE_EXTENSION_SUFFIX = ".gz" FALLBACK_FILE_TYPE_HEADER_PREFIX = b"\x1f\x8b" def compare_details(self, other, source=None): return [Difference.from_text(self.magic_file_type, other.magic_file_type, self, other, source='metadata')] diffoscope/comparators/utils/file.py +43 −5 Original line number Diff line number Diff line Loading @@ -57,6 +57,10 @@ def path_apparent_size(path=".", visited=None): return sum(visited.values()) def _run_tests(fold, tests): return fold(t(y, x) for x, t, y in tests) class File(object, metaclass=abc.ABCMeta): if hasattr(magic, 'open'): # use Magic-file-extensions from file @classmethod Loading Loading @@ -112,14 +116,26 @@ class File(object, metaclass=abc.ABCMeta): @classmethod def recognizes(cls, file): """Check if a file's type matches the one represented by this class. The default test returns True if the file matches these tests: (cls.FILE_TYPE_RE OR cls.FILE_TYPE_HEADER_PREFIX) AND (cls.FILE_EXTENSION_SUFFIX) If any test is None then the test is ignored and effectively deleted from the above definition. By default, the tests are all None and the test returns False for all files. Subclasses may override them with specific values, or override this method to implement a totally different test. """ # The structure below allows us to construct a boolean tree of tests # that can be combined with all() and any(). Tests that are not defined # for a class are filtered out, so that we don't get into a "vacuous # truth" situation like a naive all([]) invocation would give. def run_tests(fold, tests): return fold(t(y, x) for x, t, y in tests) file_type_tests = [test for test in ( (cls.FILE_TYPE_RE, lambda m, t: t.search(m), file.magic_file_type), Loading @@ -131,10 +147,32 @@ class File(object, metaclass=abc.ABCMeta): (cls.FILE_EXTENSION_SUFFIX, str.endswith, file.name), (file_type_tests, run_tests, any), _run_tests, any), ) if test[0]] # filter out undefined tests, inc. file_type_tests if it's empty return _run_tests(all, all_tests) if all_tests else False FALLBACK_FILE_EXTENSION_SUFFIX = None FALLBACK_FILE_TYPE_HEADER_PREFIX = None @classmethod def fallback_recognizes(cls, file): """This is checked if the file could not be identified by recognizes(). This helps to work around bugs in file(1), see Debian bug #876316. The default test returns True if the file matches these tests: cls.FALLBACK_FILE_EXTENSION_SUFFIX AND cls.FALLBACK_FILE_TYPE_HEADER_PREFIX """ all_tests = [test for test in ( (cls.FALLBACK_FILE_EXTENSION_SUFFIX, str.endswith, file.name), (cls.FALLBACK_FILE_TYPE_HEADER_PREFIX, bytes.startswith, file.file_header), ) if test[0]] # filter out undefined tests, inc. file_type_tests if it's empty return run_tests(all, all_tests) if all_tests else False return _run_tests(all, all_tests) if all_tests else False # This might be different from path and is used to do file extension matching @property Loading diffoscope/comparators/utils/specialize.py +23 −13 Original line number Diff line number Diff line Loading @@ -26,23 +26,33 @@ from .. import ComparatorManager logger = logging.getLogger(__name__) def specialize(file): for cls in ComparatorManager().classes: def try_recognize(file, cls, recognizes): if isinstance(file, cls): return file return True # Does this file class match? with profile('recognizes', file): if not cls.recognizes(file): continue #logger.debug("trying %s on %s", cls, file) if not recognizes(file): return False # Found a match; perform type magic logger.debug("Using %s for %s", cls.__name__, file.name) new_cls = type(cls.__name__, (cls, type(file)), {}) file.__class__ = new_cls return True def specialize(file): for cls in ComparatorManager().classes: if try_recognize(file, cls, cls.recognizes): return file logger.debug("Unidentified file. Magic says: %s", file.magic_file_type) for cls in ComparatorManager().classes: if try_recognize(file, cls, cls.fallback_recognizes): logger.debug("File recognized by fallback. Magic says: %s", file.magic_file_type) return file logger.debug("Unidentified file. Magic says: %s", file.magic_file_type) return file diffoscope/comparators/xz.py +4 −0 Original line number Diff line number Diff line Loading @@ -54,3 +54,7 @@ class XzContainer(Archive): class XzFile(File): CONTAINER_CLASS = XzContainer FILE_TYPE_RE = re.compile(r'^XZ compressed data$') # Work around file(1) Debian bug #876316 FALLBACK_FILE_EXTENSION_SUFFIX = ".xz" FALLBACK_FILE_TYPE_HEADER_PREFIX = b"\xfd7zXZ\x00" Loading
diffoscope/comparators/directory.py +4 −0 Original line number Diff line number Diff line Loading @@ -150,6 +150,10 @@ class Directory(object): def recognizes(file): return file.is_directory() @classmethod def fallback_recognizes(cls, file): return False class FilesystemDirectory(Directory): def __init__(self, path): Loading
diffoscope/comparators/gzip.py +4 −0 Original line number Diff line number Diff line Loading @@ -56,5 +56,9 @@ class GzipFile(File): CONTAINER_CLASS = GzipContainer FILE_TYPE_RE = re.compile(r'^gzip compressed data\b') # Work around file(1) Debian bug #876316 FALLBACK_FILE_EXTENSION_SUFFIX = ".gz" FALLBACK_FILE_TYPE_HEADER_PREFIX = b"\x1f\x8b" def compare_details(self, other, source=None): return [Difference.from_text(self.magic_file_type, other.magic_file_type, self, other, source='metadata')]
diffoscope/comparators/utils/file.py +43 −5 Original line number Diff line number Diff line Loading @@ -57,6 +57,10 @@ def path_apparent_size(path=".", visited=None): return sum(visited.values()) def _run_tests(fold, tests): return fold(t(y, x) for x, t, y in tests) class File(object, metaclass=abc.ABCMeta): if hasattr(magic, 'open'): # use Magic-file-extensions from file @classmethod Loading Loading @@ -112,14 +116,26 @@ class File(object, metaclass=abc.ABCMeta): @classmethod def recognizes(cls, file): """Check if a file's type matches the one represented by this class. The default test returns True if the file matches these tests: (cls.FILE_TYPE_RE OR cls.FILE_TYPE_HEADER_PREFIX) AND (cls.FILE_EXTENSION_SUFFIX) If any test is None then the test is ignored and effectively deleted from the above definition. By default, the tests are all None and the test returns False for all files. Subclasses may override them with specific values, or override this method to implement a totally different test. """ # The structure below allows us to construct a boolean tree of tests # that can be combined with all() and any(). Tests that are not defined # for a class are filtered out, so that we don't get into a "vacuous # truth" situation like a naive all([]) invocation would give. def run_tests(fold, tests): return fold(t(y, x) for x, t, y in tests) file_type_tests = [test for test in ( (cls.FILE_TYPE_RE, lambda m, t: t.search(m), file.magic_file_type), Loading @@ -131,10 +147,32 @@ class File(object, metaclass=abc.ABCMeta): (cls.FILE_EXTENSION_SUFFIX, str.endswith, file.name), (file_type_tests, run_tests, any), _run_tests, any), ) if test[0]] # filter out undefined tests, inc. file_type_tests if it's empty return _run_tests(all, all_tests) if all_tests else False FALLBACK_FILE_EXTENSION_SUFFIX = None FALLBACK_FILE_TYPE_HEADER_PREFIX = None @classmethod def fallback_recognizes(cls, file): """This is checked if the file could not be identified by recognizes(). This helps to work around bugs in file(1), see Debian bug #876316. The default test returns True if the file matches these tests: cls.FALLBACK_FILE_EXTENSION_SUFFIX AND cls.FALLBACK_FILE_TYPE_HEADER_PREFIX """ all_tests = [test for test in ( (cls.FALLBACK_FILE_EXTENSION_SUFFIX, str.endswith, file.name), (cls.FALLBACK_FILE_TYPE_HEADER_PREFIX, bytes.startswith, file.file_header), ) if test[0]] # filter out undefined tests, inc. file_type_tests if it's empty return run_tests(all, all_tests) if all_tests else False return _run_tests(all, all_tests) if all_tests else False # This might be different from path and is used to do file extension matching @property Loading
diffoscope/comparators/utils/specialize.py +23 −13 Original line number Diff line number Diff line Loading @@ -26,23 +26,33 @@ from .. import ComparatorManager logger = logging.getLogger(__name__) def specialize(file): for cls in ComparatorManager().classes: def try_recognize(file, cls, recognizes): if isinstance(file, cls): return file return True # Does this file class match? with profile('recognizes', file): if not cls.recognizes(file): continue #logger.debug("trying %s on %s", cls, file) if not recognizes(file): return False # Found a match; perform type magic logger.debug("Using %s for %s", cls.__name__, file.name) new_cls = type(cls.__name__, (cls, type(file)), {}) file.__class__ = new_cls return True def specialize(file): for cls in ComparatorManager().classes: if try_recognize(file, cls, cls.recognizes): return file logger.debug("Unidentified file. Magic says: %s", file.magic_file_type) for cls in ComparatorManager().classes: if try_recognize(file, cls, cls.fallback_recognizes): logger.debug("File recognized by fallback. Magic says: %s", file.magic_file_type) return file logger.debug("Unidentified file. Magic says: %s", file.magic_file_type) return file
diffoscope/comparators/xz.py +4 −0 Original line number Diff line number Diff line Loading @@ -54,3 +54,7 @@ class XzContainer(Archive): class XzFile(File): CONTAINER_CLASS = XzContainer FILE_TYPE_RE = re.compile(r'^XZ compressed data$') # Work around file(1) Debian bug #876316 FALLBACK_FILE_EXTENSION_SUFFIX = ".xz" FALLBACK_FILE_TYPE_HEADER_PREFIX = b"\xfd7zXZ\x00"