Commit 1c268130 authored by Chris Lamb's avatar Chris Lamb 💬

Add optional support for binwalking to find (eg.) concatenated CPIO archives. (Closes: #820631)

parent 23703113
......@@ -45,6 +45,7 @@ Build-Depends:
poppler-utils <!nocheck>,
python-argcomplete,
python3-all,
python3-binwalk <!nocheck>,
python3-debian <!nocheck>,
python3-distro <!nocheck>,
python3-docutils,
......
......@@ -57,6 +57,7 @@ override_dh_python3:
dh_python3 -p diffoscope \
--depends=distro \
--recommends=argcomplete \
--recommends=binwalk \
--recommends=defusedxml \
--recommends=guestfs \
--recommends=progressbar \
......
......@@ -37,6 +37,7 @@ class ComparatorManager(object):
('deb.Md5sumsFile',),
('deb.DebDataTarFile',),
('elf.ElfSection',),
('binwalk.BinwalkFile',),
('ps.PsFile',),
('javascript.JavaScriptFile',),
('json.JSONFile',),
......
# -*- coding: utf-8 -*-
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2017 Chris Lamb <lamby@debian.org>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import os
import re
import glob
import logging
from diffoscope.tempfiles import get_temporary_directory
from .utils.file import File
from .utils.archive import Archive
try:
import binwalk
except ImportError:
binwalk = None
logger = logging.getLogger(__name__)
class BinwalkFileContainer(Archive):
def open_archive(self):
return self
def close_archive(self):
self.source._unpacked.cleanup()
def get_member_names(self):
return sorted(self.source._members.keys())
def extract(self, member_name, dest_dir):
return self.source._members[member_name]
class BinwalkFile(File):
FILE_TYPE_RE = re.compile(r'\bcpio archive\b')
CONTAINER_CLASS = BinwalkFileContainer
@classmethod
def recognizes(cls, file):
if binwalk is None:
return False
if not super().recognizes(file):
return False
# Don't recurse; binwalk has already found everything
if isinstance(file.container, cls.CONTAINER_CLASS):
return False
unpacked = get_temporary_directory(prefix='binwalk')
logger.debug("Extracting %s to %s", file.path, unpacked.name)
binwalk.scan(
file.path,
dd='cpio:cpio',
carve=True,
quiet=True,
signature=True,
directory=unpacked.name,
)
members = {
os.path.basename(x): x
for x in glob.glob(os.path.join(unpacked.name, '*/*'))
}
logger.debug("Found %d embedded member(s)", len(members))
if not members:
unpacked.cleanup()
return False
file._members = members
file._unpacked = unpacked
return True
# -*- coding: utf-8 -*-
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2017 Chris Lamb <lamby@debian.org>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import pytest
from diffoscope.comparators.binwalk import BinwalkFile
from ..utils.data import load_fixture, get_data
from ..utils.tools import skip_unless_tools_exist, skip_unless_module_exists
from ..utils.nonexisting import assert_non_existing
binwalk1 = load_fixture('test1.binwalk')
binwalk2 = load_fixture('test2.binwalk')
@skip_unless_module_exists('binwalk')
def test_identification(binwalk1, binwalk2):
assert isinstance(binwalk1, BinwalkFile)
assert isinstance(binwalk2, BinwalkFile)
@skip_unless_module_exists('binwalk')
def test_no_differences(binwalk1):
difference = binwalk1.compare(binwalk1)
assert difference is None
@pytest.fixture
def differences(binwalk1, binwalk2):
return binwalk1.compare(binwalk2).details
@skip_unless_tools_exist('cpio')
@skip_unless_module_exists('binwalk')
def test_listing(differences):
assert differences[0].source1 == '0.cpio'
assert differences[1].source2 == '600.cpio'
expected_diff = get_data('binwalk_expected_diff')
assert differences[0].details[0].unified_diff == expected_diff
@skip_unless_tools_exist('cpio')
@skip_unless_module_exists('binwalk')
def test_symlink(differences):
assert differences[0].details[1].source1 == 'dir/link'
assert differences[0].details[1].comment == 'symlink'
expected_diff = get_data('symlink_expected_diff')
assert differences[0].details[1].unified_diff == expected_diff
@skip_unless_tools_exist('cpio')
@skip_unless_module_exists('binwalk')
def test_compare_non_existing(monkeypatch, binwalk1):
assert_non_existing(monkeypatch, binwalk1)
@@ -1,4 +1,4 @@
-drwxr-xr-x 3 1000 1000 0 2017-09-09 09:59:40.000000 .
-drwxr-xr-x 2 1000 1000 0 2017-09-09 09:59:40.000000 dir
-lrwxrwxrwx 1 1000 1000 6 2017-09-09 09:59:40.000000 dir/link -> broken
--rw-r--r-- 1 1000 1000 446 2017-09-09 09:59:40.000000 dir/text
+drwxr-xr-x 3 1000 1000 0 2017-09-09 09:59:51.000000 .
+drwxr-xr-x 2 1000 1000 0 2017-09-09 09:59:51.000000 dir
+lrwxrwxrwx 1 1000 1000 13 2017-09-09 09:59:51.000000 dir/link -> really-broken
+-rw-r--r-- 1 1000 1000 671 2017-09-09 09:59:51.000000 dir/text
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment