Commit 1e401a73 authored by Ximin Luo's avatar Ximin Luo

comparators: more tidying up and making names consistent

parent eaea1231
......@@ -146,9 +146,9 @@ class ApkContainer(Archive):
return differences
class ApkFile(File):
RE_FILE_TYPE_FALLBACK_HEADER = b"PK\x03\x04"
RE_FILE_TYPE = re.compile(r'^(Java|Zip) archive data.*\b')
RE_FILE_EXTENSION = re.compile(r'\.apk$')
FILE_TYPE_HEADER_PREFIX = b"PK\x03\x04"
FILE_TYPE_RE = re.compile(r'^(Java|Zip) archive data.*\b')
FILE_EXTENSION_SUFFIX = '.apk'
CONTAINER_CLASS = ApkContainer
def compare_details(self, other, source=None):
......
......@@ -56,7 +56,7 @@ class ArSymbolTableDumper(Command):
class ArFile(File):
CONTAINER_CLASS = ArContainer
RE_FILE_TYPE = re.compile(r'\bar archive\b')
FILE_TYPE_RE = re.compile(r'\bar archive\b')
def compare_details(self, other, source=None):
return [Difference.from_command(ArSymbolTableDumper, self.path, other.path),
......
......@@ -53,4 +53,4 @@ class Bzip2Container(Archive):
class Bzip2File(File):
CONTAINER_CLASS = Bzip2Container
RE_FILE_TYPE = re.compile(r'^bzip2 compressed data\b')
FILE_TYPE_RE = re.compile(r'^bzip2 compressed data\b')
......@@ -28,7 +28,7 @@ from .utils.libarchive import LibarchiveContainer, list_libarchive
class CpioFile(File):
CONTAINER_CLASS = LibarchiveContainer
RE_FILE_TYPE = re.compile(r'\bcpio archive\b')
FILE_TYPE_RE = re.compile(r'\bcpio archive\b')
def compare_details(self, other, source=None):
return [Difference.from_text_readers(
......
......@@ -80,7 +80,7 @@ class DebContainer(LibarchiveContainer):
class DebFile(File):
CONTAINER_CLASS = DebContainer
RE_FILE_TYPE = re.compile(r'^Debian binary package')
FILE_TYPE_RE = re.compile(r'^Debian binary package')
@property
def md5sums(self):
......
......@@ -160,11 +160,11 @@ class DebControlFile(File):
return differences
class DotChangesFile(DebControlFile):
RE_FILE_EXTENSION = re.compile(r'\.changes$')
FILE_EXTENSION_SUFFIX = '.changes'
@staticmethod
def recognizes(file):
if not DotChangesFile.RE_FILE_EXTENSION.search(file.name):
@classmethod
def recognizes(cls, file):
if not super().recognizes(file):
return False
changes = Changes(filename=file.path)
......@@ -201,11 +201,11 @@ class DotChangesFile(DebControlFile):
class DotDscFile(DebControlFile):
RE_FILE_EXTENSION = re.compile(r'\.dsc$')
FILE_EXTENSION_SUFFIX = '.dsc'
@staticmethod
def recognizes(file):
if not DotDscFile.RE_FILE_EXTENSION.search(file.name):
@classmethod
def recognizes(cls, file):
if not super().recognizes(file):
return False
with open(file.path, 'rb') as f:
......@@ -252,11 +252,11 @@ class DotBuildinfoContainer(DebControlContainer):
class DotBuildinfoFile(DebControlFile):
CONTAINER_CLASS = DotBuildinfoContainer
RE_FILE_EXTENSION = re.compile(r'\.buildinfo$')
FILE_EXTENSION_SUFFIX = '.buildinfo'
@staticmethod
def recognizes(file):
if not DotBuildinfoFile.RE_FILE_EXTENSION.search(file.name):
@classmethod
def recognizes(cls, file):
if not super().recognizes(file):
return False
with open(file.path, 'rb') as f:
......
......@@ -17,13 +17,11 @@
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import re
from .text import TextFile
class DotChangesFile(TextFile):
RE_FILE_EXTENSION = re.compile(r'\.changes$')
FILE_EXTENSION_SUFFIX = '.changes'
def compare(self, other, *args, **kwargs):
difference = super().compare(other, *args, **kwargs)
......@@ -33,7 +31,7 @@ class DotChangesFile(TextFile):
return difference
class DotDscFile(TextFile):
RE_FILE_EXTENSION = re.compile(r'\.dsc$')
FILE_EXTENSION_SUFFIX = '.dsc'
def compare(self, other, *args, **kwargs):
difference = super().compare(other, *args, **kwargs)
......@@ -43,7 +41,7 @@ class DotDscFile(TextFile):
return difference
class DotBuildinfoFile(TextFile):
RE_FILE_EXTENSION = re.compile(r'\.buildinfo$')
FILE_EXTENSION_SUFFIX = '.buildinfo'
def compare(self, other, *args, **kwargs):
difference = super().compare(other, *args, **kwargs)
......
......@@ -53,5 +53,5 @@ class DexContainer(Archive):
return dest_path
class DexFile(File):
RE_FILE_TYPE = re.compile(r'^Dalvik dex file .*\b')
FILE_TYPE_RE = re.compile(r'^Dalvik dex file .*\b')
CONTAINER_CLASS = DexContainer
......@@ -37,7 +37,7 @@ class Docx2txt(Command):
class DocxFile(File):
RE_FILE_TYPE = re.compile(r'^Microsoft Word 2007+\b')
FILE_TYPE_RE = re.compile(r'^Microsoft Word 2007+\b')
def compare_details(self, other, source=None):
return [Difference.from_command(
......
......@@ -33,7 +33,7 @@ class DeviceTreeContents(Command):
return ['fdtdump', self.path]
class DeviceTreeFile(File):
RE_FILE_TYPE = re.compile(r'^Device Tree Blob')
FILE_TYPE_RE = re.compile(r'^Device Tree Blob')
def compare_details(self, other, source=None):
return [Difference.from_command(DeviceTreeContents, self.path, other.path)]
......@@ -530,15 +530,15 @@ class ElfContainer(Container):
class ElfFile(File):
CONTAINER_CLASS = ElfContainer
RE_FILE_TYPE = re.compile(r'^ELF ')
FILE_TYPE_RE = re.compile(r'^ELF ')
def compare_details(self, other, source=None):
return _compare_elf_data(self.path, other.path)
class StaticLibFile(File):
CONTAINER_CLASS = ElfContainer
RE_FILE_TYPE = re.compile(r'\bar archive\b')
RE_FILE_EXTENSION = re.compile(r'\.a$')
FILE_TYPE_RE = re.compile(r'\bar archive\b')
FILE_EXTENSION_SUFFIX = '.a'
def compare_details(self, other, source=None):
differences = [Difference.from_text_readers(
......
......@@ -17,7 +17,6 @@
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import re
import struct
from diffoscope.difference import Difference
......@@ -27,8 +26,8 @@ from .utils.command import Command
class FontconfigCacheFile(File):
RE_FILE_TYPE_FALLBACK_HEADER = struct.pack('<H', 0xFC04)
RE_FILE_EXTENSION = re.compile(r'\-le64\.cache-4$')
FILE_TYPE_HEADER_PREFIX = struct.pack('<H', 0xFC04)
FILE_EXTENSION_SUFFIX = '-le64.cache-4'
def compare_details(self, other, source=None):
return [Difference.from_text(
......
......@@ -36,7 +36,7 @@ class Showttf(Command):
class TtfFile(File):
RE_FILE_TYPE = re.compile(r'^(TrueType|OpenType) font data$')
FILE_TYPE_RE = re.compile(r'^(TrueType|OpenType) font data$')
def compare_details(self, other, source=None):
return [Difference.from_command(Showttf, self.path, other.path)]
......@@ -74,7 +74,7 @@ class FsImageContainer(Archive):
class FsImageFile(File):
CONTAINER_CLASS = FsImageContainer
RE_FILE_TYPE = re.compile(r'^(Linux.*filesystem data|BTRFS Filesystem).*')
FILE_TYPE_RE = re.compile(r'^(Linux.*filesystem data|BTRFS Filesystem).*')
def compare_details(self, other, source=None):
differences = []
......
......@@ -61,7 +61,7 @@ class Msgunfmt(Command):
class MoFile(File):
RE_FILE_TYPE = re.compile(r'^GNU message catalog\b')
FILE_TYPE_RE = re.compile(r'^GNU message catalog\b')
def compare_details(self, other, source=None):
return [Difference.from_command(Msgunfmt, self.path, other.path)]
......@@ -67,7 +67,7 @@ def can_compose_gif_images(image1, image2):
class GifFile(File):
RE_FILE_TYPE = re.compile(r'^GIF image data\b')
FILE_TYPE_RE = re.compile(r'^GIF image data\b')
def compare_details(self, other, source=None):
gifbuild_diff = Difference.from_command(
......
......@@ -28,7 +28,7 @@ from .utils.file import File
class GitIndexFile(File):
RE_FILE_TYPE = re.compile(r'^Git index')
FILE_TYPE_RE = re.compile(r'^Git index')
def compare_details(self, other, source=None):
return [Difference.from_text(
......
......@@ -55,7 +55,7 @@ class GzipContainer(Archive):
class GzipFile(File):
CONTAINER_CLASS = GzipContainer
RE_FILE_TYPE = re.compile(r'^gzip compressed data\b')
FILE_TYPE_RE = re.compile(r'^gzip compressed data\b')
def compare_details(self, other, source=None):
return [Difference.from_text(self.magic_file_type, other.magic_file_type, self, other, source='metadata')]
......@@ -33,7 +33,7 @@ class Iccdump(Command):
class IccFile(File):
RE_FILE_TYPE = re.compile(r'\bColorSync (ICC|color) [Pp]rofile')
FILE_TYPE_RE = re.compile(r'\bColorSync (ICC|color) [Pp]rofile')
def compare_details(self, other, source=None):
return [Difference.from_command(Iccdump, self.path, other.path)]
......@@ -144,7 +144,7 @@ def same_size(image1, image2):
class JPEGImageFile(File):
RE_FILE_TYPE = re.compile(r'\bJPEG image data\b')
FILE_TYPE_RE = re.compile(r'\bJPEG image data\b')
def compare_details(self, other, source=None):
content_diff = Difference.from_command(
......@@ -179,7 +179,7 @@ class JPEGImageFile(File):
class ICOImageFile(File):
RE_FILE_TYPE = re.compile(r'\bMS Windows icon resource\b')
FILE_TYPE_RE = re.compile(r'\bMS Windows icon resource\b')
def compare_details(self, other, source=None):
differences = []
......
......@@ -24,4 +24,4 @@ from .gzip import GzipFile
class IpkFile(GzipFile):
RE_FILE_EXTENSION = re.compile('\.ipk$')
FILE_EXTENSION_SUFFIX = '.ipk'
......@@ -69,12 +69,12 @@ class ISO9660Listing(Command):
class Iso9660File(File):
CONTAINER_CLASS = LibarchiveContainer
RE_FILE_TYPE = re.compile(r'\bISO 9660\b')
FILE_TYPE_RE = re.compile(r'\bISO 9660\b')
@classmethod
def recognizes(cls, file):
if file.magic_file_type and \
cls.RE_FILE_TYPE.search(file.magic_file_type):
cls.FILE_TYPE_RE.search(file.magic_file_type):
return True
# Sometimes CDs put things like MBRs at the front which is an expected
......
......@@ -44,7 +44,7 @@ class Javap(Command):
class ClassFile(File):
RE_FILE_TYPE = re.compile(r'^compiled Java class data\b')
FILE_TYPE_RE = re.compile(r'^compiled Java class data\b')
def compare_details(self, other, source=None):
return [Difference.from_command(Javap, self.path, other.path)]
......@@ -17,8 +17,6 @@
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import re
from diffoscope.tools import tool_required
from diffoscope.difference import Difference
......@@ -32,7 +30,7 @@ class JavaScriptBeautify(Command):
return ['js-beautify', self.path]
class JavaScriptFile(File):
RE_FILE_EXTENSION = re.compile(r'\.js$')
FILE_EXTENSION_SUFFIX = '.js'
def compare_details(self, other, source=None):
return [Difference.from_command(JavaScriptBeautify, self.path, other.path)]
......
......@@ -27,11 +27,11 @@ from .utils.file import File
class JSONFile(File):
RE_FILE_EXTENSION = re.compile(r'\.json$')
FILE_EXTENSION_SUFFIX = '.json'
@staticmethod
def recognizes(file):
if JSONFile.RE_FILE_EXTENSION.search(file.name) is None:
@classmethod
def recognizes(cls, file):
if not super().recognizes(file):
return False
with open(file.path) as f:
......
......@@ -18,6 +18,8 @@
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import re
from diffoscope.tools import tool_required
from diffoscope.difference import Difference
......@@ -39,9 +41,7 @@ class LlvmBcDisassembler(Command):
return ['find', self.path, '-execdir', 'llvm-dis', '-o', '-', '{}', ';']
class LlvmBitCodeFile(File):
@staticmethod
def recognizes(file):
return file.magic_file_type and file.magic_file_type.startswith('LLVM IR bitcode')
FILE_TYPE_RE = re.compile(r'^LLVM IR bitcode')
def compare_details(self, other, source=None):
return [Difference.from_command(LlvmBcAnalyzer, self.path, other.path),
......
......@@ -68,7 +68,7 @@ class OtoolDisassemble(Otool):
class MachoFile(File):
RE_FILE_TYPE = re.compile(r'^Mach-O ')
FILE_TYPE_RE = re.compile(r'^Mach-O ')
RE_EXTRACT_ARCHS = re.compile(r'^(?:Architectures in the fat file: .* are|Non-fat file: .* is architecture): (.*)$')
@staticmethod
......
......@@ -34,7 +34,7 @@ class Pedump(Command):
class MonoExeFile(File):
RE_FILE_TYPE = re.compile(r'\bPE[0-9]+\b.*\bMono\b')
FILE_TYPE_RE = re.compile(r'\bPE[0-9]+\b.*\bMono\b')
def compare_details(self, other, source=None):
return [Difference.from_command(Pedump, self.path, other.path)]
......@@ -37,7 +37,7 @@ class Odt2txt(Command):
class OdtFile(File):
RE_FILE_TYPE = re.compile(r'^OpenDocument Text\b')
FILE_TYPE_RE = re.compile(r'^OpenDocument Text\b')
def compare_details(self, other, source=None):
return [Difference.from_command(
......
......@@ -33,7 +33,7 @@ class OggDump(Command):
class OggFile(File):
RE_FILE_TYPE = re.compile(r'^Ogg data')
FILE_TYPE_RE = re.compile(r'^Ogg data')
def compare_details(self, other, source=None):
return [Difference.from_command(
......
......@@ -32,7 +32,7 @@ class SSHKeyList(Command):
return ['ssh-keygen', '-l', '-f', self.path]
class PublicKeyFile(File):
RE_FILE_TYPE = re.compile(r'^OpenSSH \S+ public key')
FILE_TYPE_RE = re.compile(r'^OpenSSH \S+ public key')
def compare_details(self, other, source=None):
return [Difference.from_command(SSHKeyList, self.path, other.path)]
......
......@@ -39,7 +39,7 @@ class Tcpdump(Command):
class PcapFile(File):
RE_FILE_TYPE = re.compile(r'^tcpdump capture file\b')
FILE_TYPE_RE = re.compile(r'^tcpdump capture file\b')
def compare_details(self, other, source=None):
return [Difference.from_command(
......
......@@ -42,7 +42,7 @@ class Pdftk(Command):
class PdfFile(File):
RE_FILE_TYPE = re.compile(r'^PDF document\b')
FILE_TYPE_RE = re.compile(r'^PDF document\b')
def compare_details(self, other, source=None):
return [Difference.from_command(Pdftotext, self.path, other.path),
......
......@@ -41,7 +41,7 @@ class Pgpdump(Command):
class PgpFile(File):
RE_FILE_TYPE = re.compile(r'^PGP message\b')
FILE_TYPE_RE = re.compile(r'^PGP message\b')
def compare_details(self, other, source=None):
return [Difference.from_command(
......
......@@ -45,7 +45,7 @@ class Sng(Command):
class PngFile(File):
RE_FILE_TYPE = re.compile(r'^PNG image data\b')
FILE_TYPE_RE = re.compile(r'^PNG image data\b')
def compare_details(self, other, source=None):
sng_diff = Difference.from_command(Sng, self.path, other.path, source='sng')
......
......@@ -58,11 +58,11 @@ class Ppudump(Command):
class PpuFile(File):
RE_FILE_EXTENSION = re.compile(r'\.ppu$')
FILE_EXTENSION_SUFFIX = '.ppu'
@staticmethod
def recognizes(file):
if not PpuFile.RE_FILE_EXTENSION.search(file.name):
@classmethod
def recognizes(cls, file):
if not super().recognizes(file):
return False
with open(file.path, 'rb') as f:
magic = f.read(3)
......
......@@ -37,7 +37,7 @@ class Pstotext(Command):
class PsFile(TextFile):
RE_FILE_TYPE = re.compile(r'^PostScript document\b')
FILE_TYPE_RE = re.compile(r'^PostScript document\b')
def compare(self, other, *args, **kwargs):
differences = super().compare(other, *args, **kwargs)
......
......@@ -82,10 +82,7 @@ class RdbReader(Command):
return ['Rscript', '-e', DUMP_RDB, self.path[:-4]]
class RdbFile(File):
@staticmethod
def recognizes(file):
if file.name.endswith(".rdb"):
return True
FILE_EXTENSION_SUFFIX = '.rdb'
def compare_details(self, other, source=None):
self_path = ensure_archive_rdx(self)
......
......@@ -23,7 +23,7 @@ from .utils.file import File
class AbstractRpmFile(File):
RE_FILE_TYPE = re.compile('^RPM\s')
FILE_TYPE_RE = re.compile('^RPM\s')
class RpmFile(AbstractRpmFile):
def compare(self, other, source=None):
......
......@@ -18,7 +18,6 @@
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import re
import zlib
import os.path
import logging
......@@ -59,8 +58,8 @@ class RustObjectContainer(Archive):
class RustObjectFile(File):
CONTAINER_CLASS = RustObjectContainer
RE_FILE_TYPE_FALLBACK_HEADER = b'RUST_OBJECT\x01\x00\x00\x00'
RE_FILE_EXTENSION = re.compile(r'\.deflate$')
FILE_TYPE_HEADER_PREFIX = b'RUST_OBJECT\x01\x00\x00\x00'
FILE_EXTENSION_SUFFIX = '.deflate'
def compare_details(self, other, source=None):
return [Difference.from_text(self.magic_file_type, other.magic_file_type, self, other, source='metadata')]
......@@ -33,7 +33,7 @@ class Sqlite3Dump(Command):
class Sqlite3Database(File):
RE_FILE_TYPE = re.compile(r'^SQLite 3.x database')
FILE_TYPE_RE = re.compile(r'^SQLite 3.x database')
def compare_details(self, other, source=None):
return [Difference.from_command(Sqlite3Dump, self.path, other.path)]
......@@ -298,7 +298,7 @@ class SquashfsContainer(Archive):
class SquashfsFile(File):
CONTAINER_CLASS = SquashfsContainer
RE_FILE_TYPE = re.compile(r'^Squashfs filesystem\b')
FILE_TYPE_RE = re.compile(r'^Squashfs filesystem\b')
def compare_details(self, other, source=None):
return [
......
......@@ -30,7 +30,7 @@ class TarContainer(LibarchiveContainer):
class TarFile(File):
CONTAINER_CLASS = TarContainer
RE_FILE_TYPE = re.compile(r'\btar archive\b')
FILE_TYPE_RE = re.compile(r'\btar archive\b')
def compare_details(self, other, source=None):
return [Difference.from_text_readers(list_libarchive(self.path),
......
......@@ -36,7 +36,7 @@ def order_only_difference(unified_diff):
class TextFile(File):
RE_FILE_TYPE = re.compile(r'\btext\b')
FILE_TYPE_RE = re.compile(r'\btext\b')
@property
def encoding(self):
......
......@@ -106,10 +106,9 @@ class File(object, metaclass=abc.ABCMeta):
def __del__(self):
self.cleanup()
RE_FILE_TYPE = None
RE_FILE_EXTENSION = None
RE_FILE_TYPE_FALLBACK_HEADER = None
RE_CLASS = re.compile("").__class__
FILE_EXTENSION_SUFFIX = None
FILE_TYPE_RE = None
FILE_TYPE_HEADER_PREFIX = None
@classmethod
def recognizes(cls, file):
......@@ -118,26 +117,24 @@ class File(object, metaclass=abc.ABCMeta):
# for a class are filtered out, so that we don't get into a "vacuous
# truth" situation like a naive all([]) invocation would give.
def run_tests(tests, fold):
return fold(t(x, y) for x, t, y in tests)
def run_tests(fold, tests):
return fold(t(y, x) for x, t, y in tests)
file_type_tests = [test for test in (
(cls.RE_FILE_TYPE,
cls.RE_CLASS.search, file.magic_file_type),
(cls.RE_FILE_TYPE_FALLBACK_HEADER,
lambda m, h: h.startswith(m), file.file_header),
(cls.FILE_TYPE_RE,
lambda m, t: t.search(m), file.magic_file_type),
(cls.FILE_TYPE_HEADER_PREFIX,
bytes.startswith, file.file_header),
) if test[0]] # filter out undefined tests
all_tests = [test for test in (
(cls.RE_FILE_EXTENSION,
cls.RE_CLASS.search, file.name),
(cls.FILE_EXTENSION_SUFFIX,
str.endswith, file.name),
(file_type_tests,
run_tests, any),
) if test[0]] # filter out undefined tests, inc. file_type_tests if it's empty
if all_tests:
return run_tests(all_tests, all)
return False
return run_tests(all, all_tests) if all_tests else False
# This might be different from path and is used to do file extension matching
@property
......
......@@ -53,4 +53,4 @@ class XzContainer(Archive):
class XzFile(File):
CONTAINER_CLASS = XzContainer
RE_FILE_TYPE = re.compile(r'^XZ compressed data$')
FILE_TYPE_RE = re.compile(r'^XZ compressed data$')
......@@ -106,7 +106,7 @@ class ZipContainer(Archive):
class ZipFile(File):
CONTAINER_CLASS = ZipContainer
RE_FILE_TYPE = re.compile(r'^(Zip archive|Java archive|EPUB document|OpenDocument (Text|Spreadsheet|Presentation|Drawing|Formula|Template|Text Template))\b')
FILE_TYPE_RE = re.compile(r'^(Zip archive|Java archive|EPUB document|OpenDocument (Text|Spreadsheet|Presentation|Drawing|Formula|Template|Text Template))\b')
def compare_details(self, other, source=None):
zipinfo_difference = Difference.from_command(Zipinfo, self.path, other.path) or \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment