__init__.py 6.36 KB
Newer Older
1 2
# -*- coding: utf-8 -*-
#
Jérémy Bobbio's avatar
Jérémy Bobbio committed
3
# diffoscope: in-depth comparison of files, archives, and directories
4
#
Jérémy Bobbio's avatar
Jérémy Bobbio committed
5 6
# Copyright © 2014-2015 Jérémy Bobbio <lunar@debian.org>
#           ©      2015  Helmut Grohne <helmut@subdivi.de>
7
#
Jérémy Bobbio's avatar
Jérémy Bobbio committed
8
# diffoscope is free software: you can redistribute it and/or modify
9 10 11 12
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
Jérémy Bobbio's avatar
Jérémy Bobbio committed
13
# diffoscope is distributed in the hope that it will be useful,
14 15 16 17 18
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
19
# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
20

Chris Lamb's avatar
Chris Lamb committed
21 22
import re
import sys
23 24
import magic
import os.path
25 26
import operator
import importlib
Chris Lamb's avatar
Chris Lamb committed
27

Jérémy Bobbio's avatar
Jérémy Bobbio committed
28
from diffoscope import logger, tool_required
29
from diffoscope.config import Config
Jérémy Bobbio's avatar
Jérémy Bobbio committed
30
from diffoscope.difference import Difference
31 32 33

from .binary import NonExistingFile
from .directory import FilesystemDirectory, FilesystemFile, compare_directories
Chris Lamb's avatar
Chris Lamb committed
34 35 36 37 38 39

try:
    import tlsh
except ImportError:
    tlsh = None

40 41 42 43 44 45 46
COMPARATORS = (
    ('directory.Directory',),
    ('binary.NonExistingFile',),
    ('symlink.Symlink',),
    ('device.Device',),
    ('debian.DotChangesFile', 'debian_fallback.DotChangesFile'),
    ('debian.DotDscFile', 'debian_fallback.DotDscFile'),
47
    ('debian.DotBuildinfoFile', 'debian_fallback.DotBuildinfoFile'),
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
    ('deb.Md5sumsFile',),
    ('deb.DebDataTarFile',),
    ('elf.ElfSection',),
    ('ps.PsFile',),
    ('json.JSONFile',),
    ('text.TextFile',),
    ('bzip2.Bzip2File',),
    ('cpio.CpioFile',),
    ('deb.DebFile',),
    ('dex.DexFile',),
    ('elf.ElfFile',),
    ('macho.MachoFile',),
    ('fsimage.FsImageFile',),
    ('elf.StaticLibFile',),
    ('llvm.LlvmBitCodeFile',),
    ('sqlite.Sqlite3Database',),
    ('fonts.TtfFile',),
    ('gettext.MoFile',),
    ('ipk.IpkFile',),
    ('rust.RustObjectFile',),
    ('gzip.GzipFile',),
    ('haskell.HiFile',),
    ('icc.IccFile',),
    ('iso9660.Iso9660File',),
    ('java.ClassFile',),
    ('mono.MonoExeFile',),
    ('pdf.PdfFile',),
    ('png.PngFile',),
    ('ppu.PpuFile',),
    ('rpm.RpmFile', 'rpm_fallback.RpmFile'),
    ('squashfs.SquashfsFile',),
    ('ar.ArFile',),
    ('tar.TarFile',),
    ('xz.XzFile',),
82
    ('apk.ApkFile',),
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
    ('zip.ZipFile',),
    ('zip.MozillaZipFile',),
    ('image.ImageFile',),
    ('cbfs.CbfsFile',),
    ('git.GitIndexFile',),
)


def import_comparators(comparators):
    result = []

    for xs in comparators:
        for x in xs:
            package, klass_name = x.rsplit('.', 1)

            try:
                mod = importlib.import_module(
                    'diffoscope.comparators.{}'.format(package)
                )
            except ImportError:
                continue
Chris Lamb's avatar
Chris Lamb committed
104

105 106 107 108 109 110
            result.append(getattr(mod, klass_name))
            break
        else:
            raise ImportError(
                "Could not import any of {}".format(', '.join(xs))
            )
111

112
    return result
113

114 115 116
def bail_if_non_existing(*paths):
    if not all(map(os.path.lexists, paths)):
        for path in paths:
117 118 119
            if not os.path.lexists(path):
                sys.stderr.write('%s: %s: No such file or directory\n' % (sys.argv[0], path))
        sys.exit(2)
120 121

def compare_root_paths(path1, path2):
122
    if not Config().new_file:
123
        bail_if_non_existing(path1, path2)
124 125
    if os.path.isdir(path1) and os.path.isdir(path2):
        return compare_directories(path1, path2)
126 127 128 129
    container1 = FilesystemDirectory(os.path.dirname(path1)).as_container
    file1 = specialize(FilesystemFile(path1, container=container1))
    container2 = FilesystemDirectory(os.path.dirname(path2)).as_container
    file2 = specialize(FilesystemFile(path2, container=container2))
130
    return compare_files(file1, file2)
131

132
def compare_files(file1, file2, source=None):
133
    logger.debug('compare files %s and %s', file1, file2)
134 135 136 137 138 139 140 141 142 143 144 145
    if file1.has_same_content_as(file2):
        logger.debug('same content, skipping')
        return None
    specialize(file1)
    specialize(file2)
    if isinstance(file1, NonExistingFile):
        file1.other_file = file2
    elif isinstance(file2, NonExistingFile):
        file2.other_file = file1
    elif file1.__class__.__name__ != file2.__class__.__name__:
        return file1.compare_bytes(file2, source)
    return file1.compare(file2, source)
146

147 148 149 150
def compare_commented_files(file1, file2, comment=None, source=None):
    difference = compare_files(file1, file2, source=source)
    if comment:
        if difference is None:
151
            difference = Difference(None, file1.name, file2.name)
152 153 154
        difference.add_comment(comment)
    return difference

155 156
def specialize(file):
    for cls in FILE_CLASSES:
157 158
        # Uncomment the below to see which comparisons take ages to run "identify"
        #logger.debug("testing for %s", cls)
159 160 161 162 163 164 165 166
        if isinstance(file, cls):
            logger.debug("%s is already specialized", file.name)
            return file
        if cls.recognizes(file):
            logger.debug("Using %s for %s", cls.__name__, file.name)
            new_cls = type(cls.__name__, (cls, type(file)), {})
            file.__class__ = new_cls
            return file
167
    logger.debug('Unidentified file. Magic says: %s', file.magic_file_type)
168
    return file
169 170


171
def perform_fuzzy_matching(members1, members2):
172
    if tlsh == None or Config().fuzzy_threshold == 0:
173
        return
174
    already_compared = set()
175 176 177
    # Perform local copies because they will be modified by consumer
    members1 = dict(members1)
    members2 = dict(members2)
Jérémy Bobbio's avatar
Jérémy Bobbio committed
178
    for name1, file1 in members1.items():
179
        if file1.is_directory() or not file1.fuzzy_hash:
Jérémy Bobbio's avatar
Jérémy Bobbio committed
180 181
            continue
        comparisons = []
Jérémy Bobbio's avatar
Jérémy Bobbio committed
182
        for name2, file2 in members2.items():
183
            if name2 in already_compared or file2.is_directory() or not file2.fuzzy_hash:
Jérémy Bobbio's avatar
Jérémy Bobbio committed
184
                continue
185
            comparisons.append((tlsh.diff(file1.fuzzy_hash, file2.fuzzy_hash), name2))
186 187
        if comparisons:
            comparisons.sort(key=operator.itemgetter(0))
188 189
            score, name2 = comparisons[0]
            logger.debug('fuzzy top match %s %s: %d difference score', name1, name2, score)
190
            if score < Config().fuzzy_threshold:
191 192
                yield name1, name2, score
                already_compared.add(name2)
193 194

FILE_CLASSES = import_comparators(COMPARATORS)