Skip to content
Snippets Groups Projects
Commit eaea1231 authored by Ximin Luo's avatar Ximin Luo
Browse files

comparators: factor common logic from various comparators into File.recognizes

parent e3d62919
No related branches found
No related tags found
No related merge requests found
...@@ -151,13 +151,6 @@ class ApkFile(File): ...@@ -151,13 +151,6 @@ class ApkFile(File):
RE_FILE_EXTENSION = re.compile(r'\.apk$') RE_FILE_EXTENSION = re.compile(r'\.apk$')
CONTAINER_CLASS = ApkContainer CONTAINER_CLASS = ApkContainer
@staticmethod
def recognizes(file):
if not ApkFile.RE_FILE_EXTENSION.search(file.name):
return False
return (ApkFile.RE_FILE_TYPE.search(file.magic_file_type) or
file.file_header[:4] == ApkFile.RE_FILE_TYPE_FALLBACK_HEADER)
def compare_details(self, other, source=None): def compare_details(self, other, source=None):
zipinfo_difference = Difference.from_command(Zipinfo, self.path, other.path) or \ zipinfo_difference = Difference.from_command(Zipinfo, self.path, other.path) or \
Difference.from_command(ZipinfoVerbose, self.path, other.path) Difference.from_command(ZipinfoVerbose, self.path, other.path)
......
...@@ -540,11 +540,6 @@ class StaticLibFile(File): ...@@ -540,11 +540,6 @@ class StaticLibFile(File):
RE_FILE_TYPE = re.compile(r'\bar archive\b') RE_FILE_TYPE = re.compile(r'\bar archive\b')
RE_FILE_EXTENSION = re.compile(r'\.a$') RE_FILE_EXTENSION = re.compile(r'\.a$')
@staticmethod
def recognizes(file):
return StaticLibFile.RE_FILE_TYPE.search(file.magic_file_type) and \
StaticLibFile.RE_FILE_EXTENSION.search(file.name)
def compare_details(self, other, source=None): def compare_details(self, other, source=None):
differences = [Difference.from_text_readers( differences = [Difference.from_text_readers(
list_libarchive(self.path), list_libarchive(self.path),
......
...@@ -27,18 +27,9 @@ from .utils.command import Command ...@@ -27,18 +27,9 @@ from .utils.command import Command
class FontconfigCacheFile(File): class FontconfigCacheFile(File):
MAGIC = struct.pack('<H', 0xFC04) RE_FILE_TYPE_FALLBACK_HEADER = struct.pack('<H', 0xFC04)
RE_FILE_EXTENSION = re.compile(r'\-le64\.cache-4$') RE_FILE_EXTENSION = re.compile(r'\-le64\.cache-4$')
@staticmethod
def recognizes(file):
if not FontconfigCacheFile.RE_FILE_EXTENSION.search(file.name):
return False
with open(file.path, 'rb') as f:
return f.read(len(FontconfigCacheFile.MAGIC)) == \
FontconfigCacheFile.MAGIC
def compare_details(self, other, source=None): def compare_details(self, other, source=None):
return [Difference.from_text( return [Difference.from_text(
describe_cache_file(self.path), describe_cache_file(self.path),
......
...@@ -70,8 +70,7 @@ class RdsFile(File): ...@@ -70,8 +70,7 @@ class RdsFile(File):
if check_rds_extension(file) or \ if check_rds_extension(file) or \
file.container and \ file.container and \
check_rds_extension(file.container.source): check_rds_extension(file.container.source):
with open(file.path, 'rb') as f: return file.file_header.startswith(HEADER)
return f.read(8) == HEADER
return False return False
def compare_details(self, other, source=None): def compare_details(self, other, source=None):
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>. # along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import re
import zlib import zlib
import os.path import os.path
import logging import logging
...@@ -25,6 +26,7 @@ import logging ...@@ -25,6 +26,7 @@ import logging
from diffoscope.difference import Difference from diffoscope.difference import Difference
from .utils.archive import Archive from .utils.archive import Archive
from .utils.file import File
RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET = 15 RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET = 15
RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET = 23 RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET = 23
...@@ -55,16 +57,10 @@ class RustObjectContainer(Archive): ...@@ -55,16 +57,10 @@ class RustObjectContainer(Archive):
return dest_path return dest_path
class RustObjectFile(object): class RustObjectFile(File):
CONTAINER_CLASS = RustObjectContainer CONTAINER_CLASS = RustObjectContainer
RE_FILE_TYPE_FALLBACK_HEADER = b'RUST_OBJECT\x01\x00\x00\x00'
@staticmethod RE_FILE_EXTENSION = re.compile(r'\.deflate$')
def recognizes(file):
if not file.name.endswith(".deflate"):
return False
# See librustc_trans/back/link.rs for details of this format
with open(file.path, "rb") as fp:
return fp.read(RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET) == b'RUST_OBJECT\x01\x00\x00\x00'
def compare_details(self, other, source=None): def compare_details(self, other, source=None):
return [Difference.from_text(self.magic_file_type, other.magic_file_type, self, other, source='metadata')] return [Difference.from_text(self.magic_file_type, other.magic_file_type, self, other, source='metadata')]
...@@ -58,9 +58,6 @@ def path_apparent_size(path=".", visited=None): ...@@ -58,9 +58,6 @@ def path_apparent_size(path=".", visited=None):
class File(object, metaclass=abc.ABCMeta): class File(object, metaclass=abc.ABCMeta):
RE_FILE_TYPE = None
RE_FILE_EXTENSION = None
if hasattr(magic, 'open'): # use Magic-file-extensions from file if hasattr(magic, 'open'): # use Magic-file-extensions from file
@classmethod @classmethod
def guess_file_type(self, path): def guess_file_type(self, path):
...@@ -109,6 +106,39 @@ class File(object, metaclass=abc.ABCMeta): ...@@ -109,6 +106,39 @@ class File(object, metaclass=abc.ABCMeta):
def __del__(self): def __del__(self):
self.cleanup() self.cleanup()
RE_FILE_TYPE = None
RE_FILE_EXTENSION = None
RE_FILE_TYPE_FALLBACK_HEADER = None
RE_CLASS = re.compile("").__class__
@classmethod
def recognizes(cls, file):
# The structure below allows us to construct a boolean tree of tests
# that can be combined with all() and any(). Tests that are not defined
# for a class are filtered out, so that we don't get into a "vacuous
# truth" situation like a naive all([]) invocation would give.
def run_tests(tests, fold):
return fold(t(x, y) for x, t, y in tests)
file_type_tests = [test for test in (
(cls.RE_FILE_TYPE,
cls.RE_CLASS.search, file.magic_file_type),
(cls.RE_FILE_TYPE_FALLBACK_HEADER,
lambda m, h: h.startswith(m), file.file_header),
) if test[0]] # filter out undefined tests
all_tests = [test for test in (
(cls.RE_FILE_EXTENSION,
cls.RE_CLASS.search, file.name),
(file_type_tests,
run_tests, any),
) if test[0]] # filter out undefined tests, inc. file_type_tests if it's empty
if all_tests:
return run_tests(all_tests, all)
return False
# This might be different from path and is used to do file extension matching # This might be different from path and is used to do file extension matching
@property @property
def name(self): def name(self):
......
...@@ -32,22 +32,9 @@ def specialize(file): ...@@ -32,22 +32,9 @@ def specialize(file):
return file return file
# Does this file class match? # Does this file class match?
flag = False with profile('recognizes', file):
if hasattr(cls, 'recognizes'): if not cls.recognizes(file):
with profile('recognizes', file): continue
flag = cls.recognizes(file)
else:
re_tests = [(x, y) for x, y in (
(cls.RE_FILE_TYPE, file.magic_file_type),
(cls.RE_FILE_EXTENSION, file.name),
) if x]
# If neither are defined, it's *not* a match.
if re_tests:
flag = all(x.search(y) for x, y in re_tests)
if not flag:
continue
# Found a match; perform type magic # Found a match; perform type magic
logger.debug("Using %s for %s", cls.__name__, file.name) logger.debug("Using %s for %s", cls.__name__, file.name)
......
...@@ -154,11 +154,7 @@ class MozillaZipFile(File): ...@@ -154,11 +154,7 @@ class MozillaZipFile(File):
# Mozilla-optimized ZIPs start with a 32-bit little endian integer # Mozilla-optimized ZIPs start with a 32-bit little endian integer
# indicating the amount of data to preload, followed by the ZIP # indicating the amount of data to preload, followed by the ZIP
# central directory (with a PK\x01\x02 signature) # central directory (with a PK\x01\x02 signature)
with open(file.path, 'rb') as f: return file.file_header[4:8] == b'PK\x01\x02'
preload = f.read(4)
if len(preload) == 4:
signature = f.read(4)
return signature == b'PK\x01\x02'
def compare_details(self, other, source=None): def compare_details(self, other, source=None):
zipinfo_difference = Difference.from_command(MozillaZipinfo, self.path, other.path) or \ zipinfo_difference = Difference.from_command(MozillaZipinfo, self.path, other.path) or \
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment