Commit 21dc4a74 authored by Clemens Lang's avatar Clemens Lang Committed by Jérémy Bobbio

Add comparator for OS X binary files

Implement a simple comparator for OS X' Mach-O binary file format used
for libraries, executables and loadable modules. Supports listing
differences in multiple architectures and currently lists text segment,
data segment, linked libraries and library ID, and the Mach-O header.

Requires the otool(1) and lipo(1) utilities and is thus probably not
useful on any platform that does not have them (i.e., ! OS X).
parent 98aa0a14
......@@ -55,6 +55,7 @@ from diffoscope.comparators.haskell import HiFile
from diffoscope.comparators.image import ImageFile
from diffoscope.comparators.ipk import IpkFile
from diffoscope.comparators.iso9660 import Iso9660File
from diffoscope.comparators.macho import MachoFile
from diffoscope.comparators.mono import MonoExeFile
from diffoscope.comparators.pdf import PdfFile
from diffoscope.comparators.png import PngFile
......@@ -133,6 +134,7 @@ FILE_CLASSES = (
DebFile,
DexFile,
ElfFile,
MachoFile,
FsImageFile,
StaticLibFile,
Sqlite3Database,
......
# -*- coding: utf-8 -*-
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2014-2015 Jérémy Bobbio <lunar@debian.org>
# Copyright © 2015 Clemens Lang <cal@macports.org>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <http://www.gnu.org/licenses/>.
import os.path
import re
import subprocess
from diffoscope import tool_required
from diffoscope.comparators.binary import File
from diffoscope.comparators.utils import Command
from diffoscope.difference import Difference
class Otool(Command):
def __init__(self, path, arch, *args, **kwargs):
self._path = path
self._arch = arch
super().__init__(path, *args, **kwargs)
@tool_required('otool')
def cmdline(self):
return ['otool'] + self.otool_options() + [self.path]
def otool_options(self):
return ['-arch', self._arch]
def filter(self, line):
try:
# Strip the filename itself, it's in the first line on its own, terminated by a colon
if line and line.decode('utf-8').strip() == self._path + ':':
return b""
return line
except UnicodeDecodeError:
return line
class OtoolHeaders(Otool):
def otool_options(self):
return super().otool_options() + ['-h']
class OtoolLibraries(Otool):
def otool_options(self):
return super().otool_options() + ['-L']
class OtoolDisassemble(Otool):
def otool_options(self):
return super().otool_options() + ['-tdvV']
class MachoFile(File):
RE_FILE_TYPE = re.compile(r'^Mach-O ')
RE_EXTRACT_ARCHS = re.compile(r'^(?:Architectures in the fat file: .* are|Non-fat file: .* is architecture): (.*)$')
@staticmethod
def recognizes(file):
return MachoFile.RE_FILE_TYPE.match(file.magic_file_type)
@staticmethod
@tool_required('lipo')
def get_arch_from_macho(path):
lipo_output = subprocess.check_output(['lipo', '-info', path]).decode('utf-8')
lipo_match = MachoFile.RE_EXTRACT_ARCHS.match(lipo_output)
if lipo_match is None:
raise ValueError('lipo -info on Mach-O file %s did not produce expected output. Output was: %s' % path, lipo_output)
return lipo_match.group(1).split()
def compare_details(self, other, source=None):
differences = []
# Check for fat binaries, trigger a difference if the architectures differ
my_archs = MachoFile.get_arch_from_macho(self.path)
other_archs = MachoFile.get_arch_from_macho(other.path)
differences.append(Difference.from_text('\n'.join(my_archs),
'\n'.join(other_archs),
self.name, other.name, source='architectures'))
# Compare common architectures for differences
for common_arch in set(my_archs) & set(other_archs):
differences.append(Difference.from_command(OtoolHeaders, self.path, other.path, command_args=[common_arch],
comment="Mach-O headers for architecture %s" % comon_arch))
differences.append(Difference.from_command(OtoolLibraries, self.path, other.path, command_args=[common_arch],
comment="Mach-O load commands for architecture %s" % common_arch))
differences.append(Difference.from_command(OtoolDisassemble, self.path, other.path, command_args=[common_arch],
comment="Code for architecture %s" % common_arch))
return differences
# -*- coding: utf-8 -*-
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2015 Jérémy Bobbio <lunar@debian.org>
# Copyright © 2015 Clemens Lang <cal@macports.org>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <http://www.gnu.org/licenses/>.
import os.path
import pytest
from diffoscope.comparators import specialize
from diffoscope.comparators.binary import FilesystemFile, NonExistingFile
from diffoscope.comparators.macho import MachoFile
from diffoscope.config import Config
from conftest import tool_missing
TEST_OBJ1_PATH = os.path.join(os.path.dirname(__file__), '../data/test1.macho')
TEST_OBJ2_PATH = os.path.join(os.path.dirname(__file__), '../data/test2.macho')
@pytest.fixture
def obj1():
return specialize(FilesystemFile(TEST_OBJ1_PATH))
@pytest.fixture
def obj2():
return specialize(FilesystemFile(TEST_OBJ2_PATH))
def test_obj_identification(obj1):
assert isinstance(obj1, MachoFile)
def test_obj_no_differences(obj1):
difference = obj1.compare(obj1)
assert difference is None
@pytest.fixture
def obj_differences(obj1, obj2):
return obj1.compare(obj2).details
@pytest.mark.skipif(tool_missing('otool') or tool_missing('lipo'), reason='missing otool or lipo')
def test_obj_compare_non_existing(monkeypatch, obj1):
monkeypatch.setattr(Config, 'new_file', True)
difference = obj1.compare(NonExistingFile('/nonexisting', obj1))
assert difference.source2 == '/nonexisting'
@pytest.mark.skipif(tool_missing('otool') or tool_missing('lipo'), reason='missing otool or lipo')
def test_diff(obj_differences):
assert len(obj_differences) == 4
l = ['macho_expected_diff_arch', 'macho_expected_diff_headers', 'macho_expected_diff_loadcommands', 'macho_expected_diff_disassembly']
for idx, diff in enumerate(obj_differences):
with open(os.path.join(os.path.dirname(__file__), '../data', l[idx]), 'w') as f:
print(diff.unified_diff, file=f)
expected_diff = open(os.path.join(os.path.dirname(__file__), '../data/macho_expected_diff')).read()
assert obj_differences[0].unified_diff == expected_diff
@@ -1,2 +1 @@
-i386
x86_64
@@ -1,18 +1,15 @@
(__TEXT,__text) section
_main:
-0000000100000f20 pushq %rbp
-0000000100000f21 movq %rsp, %rbp
-0000000100000f24 subq $0x10, %rsp
-0000000100000f28 leaq 0x43(%rip), %rdi ## literal pool for: "%s %s\n"
-0000000100000f2f leaq 0x43(%rip), %rsi ## literal pool for: "17:31:50"
-0000000100000f36 leaq 0x45(%rip), %rdx ## literal pool for: "Wed Dec 2 17:31:49 2015"
-0000000100000f3d movb $0x0, %al
-0000000100000f3f callq 0x100000f52 ## symbol stub for: _printf
-0000000100000f44 xorl %ecx, %ecx
-0000000100000f46 movl %eax, -0x4(%rbp)
-0000000100000f49 movl %ecx, %eax
-0000000100000f4b addq $0x10, %rsp
-0000000100000f4f popq %rbp
-0000000100000f50 retq
-(__DATA,__data) section
-0000000100001018 04 00 00 00
+0000000100000f40 pushq %rbp
+0000000100000f41 movq %rsp, %rbp
+0000000100000f44 subq $0x10, %rsp
+0000000100000f48 leaq 0x3b(%rip), %rdi ## literal pool for: "%s\n"
+0000000100000f4f leaq 0x38(%rip), %rsi ## literal pool for: "15:52:34"
+0000000100000f56 movb $0x0, %al
+0000000100000f58 callq 0x100000f6a ## symbol stub for: _printf
+0000000100000f5d xorl %ecx, %ecx
+0000000100000f5f movl %eax, -0x4(%rbp)
+0000000100000f62 movl %ecx, %eax
+0000000100000f64 addq $0x10, %rsp
+0000000100000f68 popq %rbp
+0000000100000f69 retq
@@ -1,3 +1,3 @@
Mach header
magic cputype cpusubtype caps filetype ncmds sizeofcmds flags
- 0xfeedfacf 16777223 3 0x80 2 16 1416 0x00200085
+ 0xfeedfacf 16777223 3 0x80 2 15 1280 0x00200085
@@ -1,2 +1 @@
- /usr/lib/libxml2.2.dylib (compatibility version 10.0.0, current version 10.9.0)
/usr/lib/libSystem.B.dylib (compatibility version 1.0.0, current version 1225.1.1)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment