Commits (34)
......@@ -12,6 +12,7 @@ stages:
- apt-get -q -y install --no-install-recommends aspcud apt-cudf
- apt-get -q -y --solver aspcud -o APT::Solver::Strict-Pinning=0
-o Debug::pkgProblemResolver=yes build-dep .
- apt-get -q -y install --no-install-recommends radare2 || true
- py.test-3 -vv -l -r a --cov=diffoscope --cov-report=term-missing
unstable:
......
......@@ -29,7 +29,7 @@ _diffoscope() {
'--max-page-diff-block-lines=[Maximum number of lines output per unified-diff block on the top-level (--html-dir) or sole (--html) page, before spilling it into a child page (--html-dir) or skipping the rest of the diff block. (default: %(default)s, remains in effect even with --no-default-limits)]:' \
'--new-file[Treat absent files as empty]' \
'--exclude=[Exclude files whose names (including any directory part) match %(metavar)s. Use this option to ignore files based on their names.]:' \
'--exclude-command=[Exclude commands that match %(metavar)s. For example "^readelf.*\s--debug-dump=info" can take a long time and differences here are likely secondary differences caused by something represented elsewhere. Use this option to disable commands that use a lot of resources.]:' \
'--exclude-command=[Exclude commands that match %(metavar)s. For example "^readelf.*\s--debug-dump=info" and '^radare2.*' can take a long time and differences here are likely secondary differences caused by something represented elsewhere. Use this option to disable commands that use a lot of resources.]:' \
'--exclude-directory-metadata=[Exclude directory metadata. Useful if comparing files whose filesystem-level metadata is not intended to be distributed to other systems. This is true for most distributions package builders, but not true for the output of commands such as `make install`. Metadata of archive members remain un-excluded except if "recursive" choice is set. Use this option to ignore permissions, timestamps, xattrs etc. Default: False if comparing two directories, else True. Note that "file" metadata actually a property of its containing directory, and is not relevant when distributing the file across systems.]:--exclude-directory-metadata :(auto yes no recursive)' \
'--diff-mask=[Replace/unify substrings that match regular expression %(metavar)s from output strings before applying diff. For example, to filter out a version number or changed path.]:' \
'--fuzzy-threshold=[Threshold for fuzzy-matching (0 to disable, %(default)s is default, 400 is high fuzziness)]:' \
......
......@@ -42,6 +42,7 @@ class ComparatorManager:
("debian.DotBuildinfoFile", "debian_fallback.DotBuildinfoFile"),
("deb.Md5sumsFile",),
("deb.DebDataTarFile",),
("decompile.AsmFunction",),
("elf.ElfSection",),
("binwalk.BinwalkFile",),
("ps.PsFile",),
......
# -*- coding: utf-8 -*-
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2020 Jean-Romain Garnier <salsa@jean-romain.com>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import re
import sys
import abc
import logging
from .utils.file import File
from .utils.operation import Operation
from .utils.container import Container
from diffoscope.config import Config
from diffoscope.difference import Difference
from diffoscope.excludes import operation_excluded
from diffoscope.tools import (
tool_required,
tool_check_installed,
python_module_missing,
)
try:
import tlsh
except:
tlsh = None
try:
import r2pipe
except:
python_module_missing("r2pipe")
r2pipe = None
logger = logging.getLogger(__name__)
if not tool_check_installed("radare2"):
r2pipe = None
logger.debug("radare2 not found, disabling decompiler")
class Decompile(Operation, metaclass=abc.ABCMeta):
def __init__(self, file, *args, **kwargs):
super().__init__(file.path, *args, **kwargs)
self.file = file
def start(self):
logger.debug("Executing %s", self.full_name())
if not isinstance(self.file, AsmFunction):
self._stdout = ""
return
self._decompile()
@abc.abstractmethod
def _decompile(self):
raise NotImplementedError()
def should_show_error(self):
return False
@property
def output(self):
return self._stdout.encode("utf-8").splitlines(True)
class DecompileGhidra(Decompile):
# Remove addresses from warnings as they can create a lot of irrelevant noise
_JUMPTABLE_WARNING_RE = re.compile(rb"(^\s*// WARNING:.*)(0x[0-9a-f]+)")
def _run_r2_command(self):
self.file.decompiler.jump(self.file.offset)
output = self.file.decompiler.r2.cmdj("pdgj")
if not output:
# Output is None if the pdg command doesn't exist
output = {
"errors": [
'Missing r2ghidra-dec, install it with "r2pm install r2ghidra-dec"'
]
}
return output
@tool_required("radare2")
def _decompile(self):
ghidra_output = self._run_r2_command()
try:
self._stdout = ghidra_output["code"]
except KeyError:
# Show errors on stdout so a failed decompilation for 1 function
# doesn't stop the diff for the whole file
self._stdout = "\n".join(ghidra_output["errors"])
logger.debug(
"r2ghidra decompiler error for %s: %s",
self.file.signature,
self._stdout,
)
def name(self):
return "r2ghidra"
def full_name(self, *args, **kwargs):
return "radare2 r2ghidra"
def filter(self, line):
return self._JUMPTABLE_WARNING_RE.sub(rb"\g<1>0xX", line)
class DecompileRadare2(Decompile):
"""
Significantly faster than the ghidra decompiler, but still outputs assembly
code, with added comments to make it more readable
"""
def _run_r2_command(self):
self.file.decompiler.jump(self.file.offset)
return self.file.decompiler.r2.cmd("pdc")
@tool_required("radare2")
def _decompile(self):
self._stdout = self._run_r2_command()
def name(self):
return "disass"
def full_name(self, *args, **kwargs):
return "radare2 disass"
class AsmFunction(File):
DESCRIPTION = "ASM Function"
# Mapping between the Config().decompiler option and the command class
DECOMPILE_OPERATIONS = [
DecompileGhidra,
DecompileRadare2,
]
def __init__(self, decompiler, data_dict):
super().__init__(container=decompiler)
self.data_dict = data_dict
self.decompiler = decompiler
self._name = self.func_name
@property
def name(self):
# Multiple functions can have the same name but a different signature,
# so use the signature as name for diffoscope
return self.signature
@property
def progress_name(self):
return "{} [{}]".format(
self.container.source.progress_name, super().progress_name
)
@property
def path(self):
return self.container.source.path
def is_directory(self):
return False
def is_symlink(self):
return False
def is_device(self):
return False
if tlsh:
@property
def fuzzy_hash(self):
if not hasattr(self, "_fuzzy_hash"):
try:
hex_digest = tlsh.hash(self.asm.encode())
except ValueError:
# File must contain a certain amount of randomness
return None
# For short files, the hex_digest is an empty string, so turn
# it into None
self._fuzzy_hash = hex_digest or None
return self._fuzzy_hash
def has_same_content_as(self, other):
logger.debug("has_same_content: %s %s", self, other)
try:
return self.hex_dump == other.hex_dump
except AttributeError:
# 'other' is not a function.
logger.debug("has_same_content: Not an asm function: %s", other)
return False
@classmethod
def recognizes(cls, file):
# No file should be recognized as an asm function
return False
def compare(self, other, source=None):
"""
Override file's compare method to get rid of the binary diff fallback,
as it would be redundant with other outputs
"""
details = self.compare_details(other, source)
details = [x for x in details if x]
if not details:
return None
difference = Difference(None, self.name, other.name, source=source)
difference.add_details(details)
return difference
def compare_details(self, other, source=None):
return [
Difference.from_operation(x, self, other)
for x in list(self.DECOMPILE_OPERATIONS)
]
@property
def func_name(self):
return self.data_dict["name"]
@property
def offset(self):
return self.data_dict["offset"]
@property
def size(self):
return self.data_dict["size"]
@property
def signature(self):
return self.data_dict["signature"]
@property
def hex_dump(self):
if not hasattr(self, "_hex_dump"):
self._hex_dump = self.decompiler.dump(self.offset, self.size)
return self._hex_dump
@property
def asm(self):
if not hasattr(self, "_asm"):
ops = self.decompiler.disassemble(self.offset)
self._asm = ""
for instr in ops:
try:
self._asm += instr["disasm"] + "\n"
except KeyError:
# Invalid instruction
self._asm += "invalid\n"
return self._asm
def all_decompile_operations_are_excluded(file):
for klass in AsmFunction.DECOMPILE_OPERATIONS:
name = " ".join(klass(file).full_name())
if not operation_excluded(name):
return False
return True
class DecompilableContainer(Container):
auto_diff_metadata = False
# Don't use @tool_required here so subclassing DecompilableContainer
# doesn't block the new subclass from doing its work if radare2
# isn't installed
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
logger.debug("Creating DecompileContainer for %s", self.source.path)
self._functions = {}
# Skip disassembly (and decompilation) if a dependency is missing
# or if radare2 commands are excluded
if r2pipe is None or all_decompile_operations_are_excluded(
self.source
):
return
# Use "-2" flag to silence radare2 warnings
self.r2 = r2pipe.open(self.source.path, flags=["-2"])
# Run radare2 command which finds the functions in the executable
self.r2.cmd("aa") # Analyse all
# Hide offset in asm as it serves the same purpose as line numbers,
# which shouldn't be diffed
self.r2.cmd("e asm.offset = false")
# In hex dump of function, hide everything but the hex values
self.r2.cmd(
"e hex.offset = false;e hex.header = false;e hex.ascii = false"
)
# Use radare2 to get the list of functions
# If there aren't any, cmdj returns None
functions = self.r2.cmdj("aj") or []
for f in functions:
func = AsmFunction(self, f)
self._functions[func.signature] = func
logger.debug("Adding function %s", func.signature)
def cleanup(self):
self.r2.quit()
def get_member_names(self):
return self._functions.keys()
def get_member(self, member_name):
return self._functions[member_name]
def jump(self, offset):
self.r2.cmd("s {}".format(offset))
def dump(self, offset, size):
self.jump(offset)
return self.r2.cmd("px {}".format(size)).strip()
def disassemble(self, offset):
self.jump(offset)
return self.r2.cmdj("pdfj")["ops"]
......@@ -31,6 +31,7 @@ from diffoscope.tempfiles import get_named_temporary_file
from diffoscope.difference import Difference
from .deb import DebFile, get_build_id_map
from .decompile import DecompilableContainer
from .utils.file import File
from .utils.command import Command, our_check_output
from .utils.container import Container
......@@ -425,7 +426,7 @@ def get_debug_link(path):
return m.group(1)
class ElfContainer(Container):
class ElfContainer(DecompilableContainer):
auto_diff_metadata = False
SECTION_FLAG_MAPPING = {
......@@ -613,10 +614,16 @@ class ElfContainer(Container):
logger.debug("Installed debug symbols at %s", dest_path)
def get_member_names(self):
return self._sections.keys()
decompiled_members = super().get_member_names()
return list(decompiled_members) + list(self._sections.keys())
def get_member(self, member_name):
return self._sections[member_name]
try:
return self._sections[member_name]
except KeyError:
# Raised when the member name is not one of ours, which means
# it was part of the decompiler's output (aka super)
return super().get_member(member_name)
class Strings(Command):
......
......@@ -165,6 +165,7 @@ EXTERNAL_TOOLS = {
"FreeBSD": "ghostscript9-base",
"guix": "ghostscript",
},
"radare2": {"debian": "radare2", "arch": "radare2", "guix": "radare2"},
"readelf": {
"debian": "binutils-multiarch",
"arch": "binutils",
......
......@@ -293,11 +293,10 @@ def create_parser():
action="append",
default=[],
help="Exclude commands that match %(metavar)s. For "
"example '^readelf.*\\s--debug-dump=info' can take a "
"long time and differences here are likely secondary "
"differences caused by something represented "
"elsewhere. Use this option to disable commands that "
"use a lot of resources.",
"example '^readelf.*\\s--debug-dump=info' and '^radare2.*' can take"
"a long time and differences here are likely secondary differences "
"caused by something represented elsewhere. Use this option to "
"disable commands that use a lot of resources.",
)
group3.add_argument(
"--exclude-directory-metadata",
......
......@@ -46,6 +46,13 @@ ignore_readelf_errors1 = load_fixture("test1.debug")
ignore_readelf_errors2 = load_fixture("test2.debug")
@pytest.fixture(scope="function", autouse=True)
def init_tests(request, monkeypatch):
# Ignore radare2 commands so decompiling is skipped
# See test_elf_decompiler.py for tests related to decompiler
monkeypatch.setattr(Config(), "exclude_commands", ["^radare2.*"])
def readelf_version():
try:
out = subprocess.check_output(["readelf", "--version"])
......
# -*- coding: utf-8 -*-
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2015-2020 Chris Lamb <lamby@debian.org>
# Copyright © 2020 Jean-Romain Garnier <salsa@jean-romain.com>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import pytest
from diffoscope.config import Config
from diffoscope.comparators.missing_file import MissingFile
from ..utils.data import load_fixture, assert_diff
from ..utils.tools import (
skipif,
tools_missing,
skip_unless_tools_exist,
skip_unless_module_exists,
)
def radare2_command_is_undefined(x):
if tools_missing("radare2"):
return True
try:
# Open any file with radare2 and try to execute the given command
# If it returns None, then the command doesn't exist
import r2pipe
r2 = r2pipe.open("/dev/null", flags=["-2"])
return r2.cmdj(x) is None
except ImportError:
return True
def skip_unless_radare2_command_exists(command):
return skipif(
radare2_command_is_undefined(command),
reason=f"radare2 didn't recognize {command} command",
tools=(f"{command}_radare2_command",),
)
def exclude_commands(monkeypatch, patterns):
excluded = list(Config().exclude_commands)
excluded += patterns
monkeypatch.setattr(Config(), "exclude_commands", patterns)
@pytest.fixture(scope="function", autouse=True)
def init_tests(request, monkeypatch):
# Ignore readelf and objdump as they are already tested by test_elf.py
exclude_commands(monkeypatch, ["^readelf.*", "^objdump.*"])
obj1 = load_fixture("test1.o")
obj2 = load_fixture("test2.o")
@pytest.fixture
def obj_differences(obj1, obj2):
return obj1.compare(obj2).details
@skip_unless_tools_exist("radare2")
@skip_unless_module_exists("r2pipe")
@skip_unless_radare2_command_exists("pdgj")
def test_obj_compare_non_existing(monkeypatch, obj1):
monkeypatch.setattr(Config(), "new_file", True)
difference = obj1.compare(MissingFile("/nonexisting", obj1))
assert difference.source2 == "/nonexisting"
assert len(difference.details) > 0
@skip_unless_tools_exist("radare2")
@skip_unless_module_exists("r2pipe")
@skip_unless_radare2_command_exists("pdgj")
def test_ghidra_diff(monkeypatch, obj1, obj2):
exclude_commands(monkeypatch, ["disass.*"])
obj_differences = obj1.compare(obj2).details[0].details
assert len(obj_differences) == 1
assert_diff(obj_differences[0], "elf_obj_ghidra_expected_diff")
@skip_unless_tools_exist("radare2")
@skip_unless_module_exists("r2pipe")
def test_radare2_diff(monkeypatch, obj1, obj2):
exclude_commands(monkeypatch, ["r2ghidra.*"])
obj_differences = obj1.compare(obj2).details[0].details
assert len(obj_differences) == 1
assert_diff(obj_differences[0], "elf_obj_radare2_expected_diff")
......@@ -22,6 +22,7 @@
import pytest
import subprocess
from diffoscope.config import Config
from distutils.version import LooseVersion
from diffoscope.comparators.ar import ArFile
......@@ -39,6 +40,12 @@ rlib1 = load_fixture("test1.rlib")
rlib2 = load_fixture("test2.rlib")
@pytest.fixture(scope="function", autouse=True)
def init_tests(request, monkeypatch):
# Make sure decompilation is disabled so that tests don't break
monkeypatch.setattr(Config(), "exclude_commands", ["^radare2.*"])
def llvm_version():
return (
subprocess.check_output(["llvm-config", "--version"])
......
@@ -1,6 +1,6 @@
undefined8 sym.f(void)
{
// [01] -r-x section size 11 named .text
- return 0x2a;
+ return 0xffffffff;
}
@@ -1,13 +1,13 @@
function sym.f () {
// 1 basic blocks
loc_0x8000040:
push rbp //[01] -r-x section size 11 named .text
rbp = rsp
- eax = 0x2a //'*' ; 42
+ eax = 0xffffffff //-1
//rsp ; rsp
return
(break)
}
......@@ -197,8 +197,8 @@ def module_is_not_importable(x):
def skip_unless_module_exists(name):
return skipif(
module_is_not_importable(name),
reason="requires {} Python module".format(name),
tools=("{}_module".format(name)),
reason=f"requires {name} Python module",
tools=(f"{name}_module",),
)
......