Commit 7b8b9ae7 authored by Ximin Luo's avatar Ximin Luo

comparators: add a fallback_recognizes to work around file(1) #876316. (Closes: #875282)

parent c561ae54
......@@ -150,6 +150,10 @@ class Directory(object):
def recognizes(file):
return file.is_directory()
@classmethod
def fallback_recognizes(cls, file):
return False
class FilesystemDirectory(Directory):
def __init__(self, path):
......
......@@ -56,5 +56,9 @@ class GzipFile(File):
CONTAINER_CLASS = GzipContainer
FILE_TYPE_RE = re.compile(r'^gzip compressed data\b')
# Work around file(1) Debian bug #876316
FALLBACK_FILE_EXTENSION_SUFFIX = ".gz"
FALLBACK_FILE_TYPE_HEADER_PREFIX = b"\x1f\x8b"
def compare_details(self, other, source=None):
return [Difference.from_text(self.magic_file_type, other.magic_file_type, self, other, source='metadata')]
......@@ -57,6 +57,10 @@ def path_apparent_size(path=".", visited=None):
return sum(visited.values())
def _run_tests(fold, tests):
return fold(t(y, x) for x, t, y in tests)
class File(object, metaclass=abc.ABCMeta):
if hasattr(magic, 'open'): # use Magic-file-extensions from file
@classmethod
......@@ -112,14 +116,26 @@ class File(object, metaclass=abc.ABCMeta):
@classmethod
def recognizes(cls, file):
"""Check if a file's type matches the one represented by this class.
The default test returns True if the file matches these tests:
(cls.FILE_TYPE_RE OR
cls.FILE_TYPE_HEADER_PREFIX) AND
(cls.FILE_EXTENSION_SUFFIX)
If any test is None then the test is ignored and effectively deleted
from the above definition.
By default, the tests are all None and the test returns False for all
files. Subclasses may override them with specific values, or override
this method to implement a totally different test.
"""
# The structure below allows us to construct a boolean tree of tests
# that can be combined with all() and any(). Tests that are not defined
# for a class are filtered out, so that we don't get into a "vacuous
# truth" situation like a naive all([]) invocation would give.
def run_tests(fold, tests):
return fold(t(y, x) for x, t, y in tests)
file_type_tests = [test for test in (
(cls.FILE_TYPE_RE,
lambda m, t: t.search(m), file.magic_file_type),
......@@ -131,10 +147,32 @@ class File(object, metaclass=abc.ABCMeta):
(cls.FILE_EXTENSION_SUFFIX,
str.endswith, file.name),
(file_type_tests,
run_tests, any),
_run_tests, any),
) if test[0]] # filter out undefined tests, inc. file_type_tests if it's empty
return _run_tests(all, all_tests) if all_tests else False
FALLBACK_FILE_EXTENSION_SUFFIX = None
FALLBACK_FILE_TYPE_HEADER_PREFIX = None
@classmethod
def fallback_recognizes(cls, file):
"""This is checked if the file could not be identified by recognizes().
This helps to work around bugs in file(1), see Debian bug #876316.
The default test returns True if the file matches these tests:
cls.FALLBACK_FILE_EXTENSION_SUFFIX AND
cls.FALLBACK_FILE_TYPE_HEADER_PREFIX
"""
all_tests = [test for test in (
(cls.FALLBACK_FILE_EXTENSION_SUFFIX,
str.endswith, file.name),
(cls.FALLBACK_FILE_TYPE_HEADER_PREFIX,
bytes.startswith, file.file_header),
) if test[0]] # filter out undefined tests, inc. file_type_tests if it's empty
return run_tests(all, all_tests) if all_tests else False
return _run_tests(all, all_tests) if all_tests else False
# This might be different from path and is used to do file extension matching
@property
......
......@@ -26,23 +26,33 @@ from .. import ComparatorManager
logger = logging.getLogger(__name__)
def try_recognize(file, cls, recognizes):
if isinstance(file, cls):
return True
# Does this file class match?
with profile('recognizes', file):
#logger.debug("trying %s on %s", cls, file)
if not recognizes(file):
return False
# Found a match; perform type magic
logger.debug("Using %s for %s", cls.__name__, file.name)
new_cls = type(cls.__name__, (cls, type(file)), {})
file.__class__ = new_cls
return True
def specialize(file):
for cls in ComparatorManager().classes:
if isinstance(file, cls):
if try_recognize(file, cls, cls.recognizes):
return file
# Does this file class match?
with profile('recognizes', file):
if not cls.recognizes(file):
continue
# Found a match; perform type magic
logger.debug("Using %s for %s", cls.__name__, file.name)
new_cls = type(cls.__name__, (cls, type(file)), {})
file.__class__ = new_cls
return file
for cls in ComparatorManager().classes:
if try_recognize(file, cls, cls.fallback_recognizes):
logger.debug("File recognized by fallback. Magic says: %s", file.magic_file_type)
return file
logger.debug("Unidentified file. Magic says: %s", file.magic_file_type)
return file
......@@ -54,3 +54,7 @@ class XzContainer(Archive):
class XzFile(File):
CONTAINER_CLASS = XzContainer
FILE_TYPE_RE = re.compile(r'^XZ compressed data$')
# Work around file(1) Debian bug #876316
FALLBACK_FILE_EXTENSION_SUFFIX = ".xz"
FALLBACK_FILE_TYPE_HEADER_PREFIX = b"\xfd7zXZ\x00"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment