Commit 7b8b9ae7 authored by Ximin Luo's avatar Ximin Luo
Browse files

comparators: add a fallback_recognizes to work around file(1) #876316. (Closes: #875282)

parent c561ae54
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -150,6 +150,10 @@ class Directory(object):
    def recognizes(file):
        return file.is_directory()

    @classmethod
    def fallback_recognizes(cls, file):
        return False


class FilesystemDirectory(Directory):
    def __init__(self, path):
+4 −0
Original line number Diff line number Diff line
@@ -56,5 +56,9 @@ class GzipFile(File):
    CONTAINER_CLASS = GzipContainer
    FILE_TYPE_RE = re.compile(r'^gzip compressed data\b')

    # Work around file(1) Debian bug #876316
    FALLBACK_FILE_EXTENSION_SUFFIX = ".gz"
    FALLBACK_FILE_TYPE_HEADER_PREFIX = b"\x1f\x8b"

    def compare_details(self, other, source=None):
        return [Difference.from_text(self.magic_file_type, other.magic_file_type, self, other, source='metadata')]
+43 −5
Original line number Diff line number Diff line
@@ -57,6 +57,10 @@ def path_apparent_size(path=".", visited=None):
    return sum(visited.values())


def _run_tests(fold, tests):
    return fold(t(y, x) for x, t, y in tests)


class File(object, metaclass=abc.ABCMeta):
    if hasattr(magic, 'open'):  # use Magic-file-extensions from file
        @classmethod
@@ -112,14 +116,26 @@ class File(object, metaclass=abc.ABCMeta):

    @classmethod
    def recognizes(cls, file):
        """Check if a file's type matches the one represented by this class.

        The default test returns True if the file matches these tests:

        (cls.FILE_TYPE_RE OR
         cls.FILE_TYPE_HEADER_PREFIX) AND
        (cls.FILE_EXTENSION_SUFFIX)

        If any test is None then the test is ignored and effectively deleted
        from the above definition.

        By default, the tests are all None and the test returns False for all
        files. Subclasses may override them with specific values, or override
        this method to implement a totally different test.
        """
        # The structure below allows us to construct a boolean tree of tests
        # that can be combined with all() and any(). Tests that are not defined
        # for a class are filtered out, so that we don't get into a "vacuous
        # truth" situation like a naive all([]) invocation would give.

        def run_tests(fold, tests):
            return fold(t(y, x) for x, t, y in tests)

        file_type_tests = [test for test in (
            (cls.FILE_TYPE_RE,
             lambda m, t: t.search(m), file.magic_file_type),
@@ -131,10 +147,32 @@ class File(object, metaclass=abc.ABCMeta):
            (cls.FILE_EXTENSION_SUFFIX,
             str.endswith, file.name),
            (file_type_tests,
             run_tests, any),
             _run_tests, any),
        ) if test[0]]  # filter out undefined tests, inc. file_type_tests if it's empty

        return _run_tests(all, all_tests) if all_tests else False

    FALLBACK_FILE_EXTENSION_SUFFIX = None
    FALLBACK_FILE_TYPE_HEADER_PREFIX = None

    @classmethod
    def fallback_recognizes(cls, file):
        """This is checked if the file could not be identified by recognizes().
        This helps to work around bugs in file(1), see Debian bug #876316.

        The default test returns True if the file matches these tests:

        cls.FALLBACK_FILE_EXTENSION_SUFFIX AND
        cls.FALLBACK_FILE_TYPE_HEADER_PREFIX
        """
        all_tests = [test for test in (
            (cls.FALLBACK_FILE_EXTENSION_SUFFIX,
             str.endswith, file.name),
            (cls.FALLBACK_FILE_TYPE_HEADER_PREFIX,
             bytes.startswith, file.file_header),
        ) if test[0]]  # filter out undefined tests, inc. file_type_tests if it's empty

        return run_tests(all, all_tests) if all_tests else False
        return _run_tests(all, all_tests) if all_tests else False

    # This might be different from path and is used to do file extension matching
    @property
+23 −13
Original line number Diff line number Diff line
@@ -26,23 +26,33 @@ from .. import ComparatorManager
logger = logging.getLogger(__name__)


def specialize(file):
    for cls in ComparatorManager().classes:
def try_recognize(file, cls, recognizes):
    if isinstance(file, cls):
            return file
        return True

    # Does this file class match?
    with profile('recognizes', file):
            if not cls.recognizes(file):
                continue
        #logger.debug("trying %s on %s", cls, file)
        if not recognizes(file):
            return False

    # Found a match; perform type magic
    logger.debug("Using %s for %s", cls.__name__, file.name)
    new_cls = type(cls.__name__, (cls, type(file)), {})
    file.__class__ = new_cls

    return True


def specialize(file):
    for cls in ComparatorManager().classes:
        if try_recognize(file, cls, cls.recognizes):
            return file

    logger.debug("Unidentified file. Magic says: %s", file.magic_file_type)
    for cls in ComparatorManager().classes:
        if try_recognize(file, cls, cls.fallback_recognizes):
            logger.debug("File recognized by fallback. Magic says: %s", file.magic_file_type)
            return file

    logger.debug("Unidentified file. Magic says: %s", file.magic_file_type)
    return file
+4 −0
Original line number Diff line number Diff line
@@ -54,3 +54,7 @@ class XzContainer(Archive):
class XzFile(File):
    CONTAINER_CLASS = XzContainer
    FILE_TYPE_RE = re.compile(r'^XZ compressed data$')

    # Work around file(1) Debian bug #876316
    FALLBACK_FILE_EXTENSION_SUFFIX = ".xz"
    FALLBACK_FILE_TYPE_HEADER_PREFIX = b"\xfd7zXZ\x00"