Commit 2d362505 authored by Jérémy Bobbio's avatar Jérémy Bobbio

Comparators now return a single Difference

The forest approach was often clumsy and ill-specified. Now
comparators are expected to return a single Difference, or None.

To make it easy for comparators who are producing details, a new decorator
`returns_details` will create a wrapping Difference object for free. This
was previously a side-effect of using the `binary_fallback` decorator.
This new decorator will filter None from the list of differences,
removing some boilerplate from the comparators.
parent a04f9b8a
......@@ -104,16 +104,16 @@ def main():
if parsed_args.debug:
logger.setLevel(logging.DEBUG)
set_locale()
differences = debbindiff.comparators.compare_files(
difference = debbindiff.comparators.compare_files(
parsed_args.file1, parsed_args.file2)
if len(differences) > 0:
if difference:
if parsed_args.html_output:
with make_printer(parsed_args.html_output) as print_func:
output_html(differences, css_url=parsed_args.css_url, print_func=print_func,
output_html(difference, css_url=parsed_args.css_url, print_func=print_func,
max_page_size=parsed_args.max_report_size)
if (parsed_args.text_output and parsed_args.text_output != parsed_args.html_output) or not parsed_args.html_output:
with make_printer(parsed_args.text_output or '-') as print_func:
output_text(differences, print_func=print_func)
output_text(difference, print_func=print_func)
return 1
return 0
......
......@@ -49,11 +49,11 @@ except ImportError as ex:
raise
def compare_rpm_files(path1, path2, source=None):
logger.info("Python rpm module not found.")
differences = compare_binary_files(path1, path2, source)
if differences:
differences[0].comment = (differences[0].comment or '') + \
difference = compare_binary_files(path1, path2, source)
if difference:
difference.comment = (difference.comment or '') + \
'\nUnable to find Python rpm module. Falling back to binary comparison.'
return differences
return difference
from debbindiff.comparators.squashfs import compare_squashfs_files
from debbindiff.comparators.text import compare_text_files
from debbindiff.comparators.tar import compare_tar_files
......@@ -137,8 +137,8 @@ def compare_files(path1, path2, source=None):
text2 = "[ No symlink ]"
if dest1 and dest2 and dest1 == dest2:
return []
return [Difference.from_unicode(text1, text2, path1, path2, source=source, comment="symlink")]
return None
return Difference.from_unicode(text1, text2, path1, path2, source=source, comment="symlink")
if os.path.isdir(path1) and os.path.isdir(path2):
return compare_directories(path1, path2, source)
......@@ -153,7 +153,7 @@ def compare_files(path1, path2, source=None):
size2 = os.path.getsize(path2)
if size1 == size2 and size1 <= SMALL_FILE_THRESHOLD:
if file(path1).read() == file(path2).read():
return []
return None
# ok, let's do the full thing
mime_type1 = guess_mime_type(path1)
mime_type2 = guess_mime_type(path2)
......
......@@ -49,15 +49,12 @@ def compare_binary_files(path1, path2, source=None):
try:
with xxd(path1) as xxd1:
with xxd(path2) as xxd2:
difference = Difference.from_file(xxd1, xxd2, path1, path2, source)
return Difference.from_file(xxd1, xxd2, path1, path2, source)
except RequiredToolNotFound:
hexdump1 = hexdump_fallback(path1)
hexdump2 = hexdump_fallback(path2)
comment = 'xxd not available in path. Falling back to Python hexlify.\n'
difference = Difference.from_unicode(hexdump1, hexdump2, path1, path2, source, comment)
if not difference:
return []
return [difference]
return Difference.from_unicode(hexdump1, hexdump2, path1, path2, source, comment)
@tool_required('cmp')
......
......@@ -21,7 +21,7 @@ from contextlib import contextmanager
import os.path
import subprocess
import debbindiff.comparators
from debbindiff.comparators.utils import binary_fallback, make_temp_directory
from debbindiff.comparators.utils import binary_fallback, returns_details, make_temp_directory
from debbindiff.difference import get_source
from debbindiff import tool_required
......@@ -42,9 +42,10 @@ def decompress_bzip2(path):
@binary_fallback
@returns_details
def compare_bzip2_files(path1, path2, source=None):
with decompress_bzip2(path1) as new_path1:
with decompress_bzip2(path2) as new_path2:
return debbindiff.comparators.compare_files(
return [debbindiff.comparators.compare_files(
new_path1, new_path2,
source=[os.path.basename(new_path1), os.path.basename(new_path2)])
source=[os.path.basename(new_path1), os.path.basename(new_path2)])]
......@@ -21,7 +21,7 @@ import subprocess
import os.path
import debbindiff.comparators
from debbindiff import logger, tool_required
from debbindiff.comparators.utils import binary_fallback, make_temp_directory, Command
from debbindiff.comparators.utils import binary_fallback, returns_details, make_temp_directory, Command
from debbindiff.difference import Difference
class CpioContent(Command):
......@@ -49,13 +49,12 @@ def extract_cpio_archive(path, destdir):
@binary_fallback
@returns_details
def compare_cpio_files(path1, path2, source=None):
differences = []
difference = Difference.from_command(
CpioContent, path1, path2, source="file list")
if difference:
differences.append(difference)
differences.append(Difference.from_command(
CpioContent, path1, path2, source="file list"))
# compare files contained in archive
content1 = get_cpio_names(path1)
......@@ -69,7 +68,7 @@ def compare_cpio_files(path1, path2, source=None):
in_path2 = os.path.join(temp_dir2, member)
if not os.path.isfile(in_path1) or not os.path.isfile(in_path2):
continue
differences.extend(debbindiff.comparators.compare_files(
differences.append(debbindiff.comparators.compare_files(
in_path1, in_path2, source=member))
return differences
......@@ -26,10 +26,11 @@ from debbindiff.difference import Difference, get_source
import debbindiff.comparators
from debbindiff.comparators.binary import are_same_binaries
from debbindiff.comparators.utils import \
binary_fallback, make_temp_directory, get_ar_content
binary_fallback, returns_details, make_temp_directory, get_ar_content
@binary_fallback
@returns_details
def compare_deb_files(path1, path2, source=None):
differences = []
# look up differences in content
......@@ -50,7 +51,7 @@ def compare_deb_files(path1, path2, source=None):
f1.write(member1.read())
with open(in_path2, 'w') as f2:
f2.write(member2.read())
differences.extend(
differences.append(
debbindiff.comparators.compare_files(
in_path1, in_path2, source=name))
os.unlink(in_path1)
......@@ -58,16 +59,14 @@ def compare_deb_files(path1, path2, source=None):
# look up differences in file list and file metadata
content1 = get_ar_content(path1)
content2 = get_ar_content(path2)
difference = Difference.from_unicode(
content1, content2, path1, path2, source="metadata")
if difference:
differences.append(difference)
differences.append(Difference.from_unicode(
content1, content2, path1, path2, source="metadata"))
return differences
def compare_md5sums_files(path1, path2, source=None):
if are_same_binaries(path1, path2):
return []
return [Difference(None, path1, path2,
source=get_source(path1, path2),
comment="Files in package differs")]
return None
return Difference(None, path1, path2,
source=get_source(path1, path2),
comment="Files in package differs")
......@@ -23,7 +23,7 @@ import subprocess
from debbindiff import logger, tool_required, RequiredToolNotFound
from debbindiff.difference import Difference
import debbindiff.comparators
from debbindiff.comparators.utils import Command
from debbindiff.comparators.utils import returns_details, Command
def ls(path):
......@@ -96,6 +96,7 @@ def compare_meta(path1, path2):
@tool_required('ls')
@returns_details
def compare_directories(path1, path2, source=None):
differences = []
logger.debug('path1 files: %s' % sorted(set(os.listdir(path1))))
......@@ -104,26 +105,20 @@ def compare_directories(path1, path2, source=None):
logger.debug('compare %s' % name)
in_path1 = os.path.join(path1, name)
in_path2 = os.path.join(path2, name)
in_differences = debbindiff.comparators.compare_files(
in_path1, in_path2, source=name)
in_difference = debbindiff.comparators.compare_files(
in_path1, in_path2, source=name)
if not os.path.isdir(in_path1):
if in_differences:
in_differences[0].add_details(compare_meta(in_path1, in_path2))
if in_difference:
in_difference.add_details(compare_meta(in_path1, in_path2))
else:
details = compare_meta(in_path1, in_path2)
if details:
d = Difference(None, path1, path2, source=name)
d.add_details(details)
in_differences = [d]
differences.extend(in_differences)
in_difference = d
differences.append(in_difference)
ls1 = ls(path1)
ls2 = ls(path2)
difference = Difference.from_unicode(ls1, ls2, path1, path2, source="ls")
if difference:
differences.append(difference)
differences.append(Difference.from_unicode(ls1, ls2, path1, path2, source="ls"))
differences.extend(compare_meta(path1, path2))
if differences:
d = Difference(None, path1, path2, source=source)
d.add_details(differences)
return [d]
return []
return differences
......@@ -21,7 +21,7 @@ import os.path
import re
import subprocess
from debbindiff import tool_required
from debbindiff.comparators.utils import binary_fallback, get_ar_content, Command
from debbindiff.comparators.utils import binary_fallback, returns_details, get_ar_content, Command
from debbindiff.difference import Difference
......@@ -62,32 +62,26 @@ class ObjdumpDisassemble(Command):
# by both compare_elf_files and compare_static_lib_files
def _compare_elf_data(path1, path2, source=None):
differences = []
difference = Difference.from_command(ReadelfAll, path1, path2)
if difference:
differences.append(difference)
difference = Difference.from_command(ReadelfDebugDump, path1, path2)
if difference:
differences.append(difference)
difference = Difference.from_command(ObjdumpDisassemble, path1, path2)
if difference:
differences.append(difference)
differences.append(Difference.from_command(ReadelfAll, path1, path2))
differences.append(Difference.from_command(ReadelfDebugDump, path1, path2))
differences.append(Difference.from_command(ObjdumpDisassemble, path1, path2))
return differences
@binary_fallback
@returns_details
def compare_elf_files(path1, path2, source=None):
return _compare_elf_data(path1, path2, source=None)
@binary_fallback
@returns_details
def compare_static_lib_files(path1, path2, source=None):
differences = []
# look up differences in metadata
content1 = get_ar_content(path1)
content2 = get_ar_content(path2)
difference = Difference.from_unicode(
content1, content2, path1, path2, source="metadata")
if difference:
differences.append(difference)
differences.append(Difference.from_unicode(
content1, content2, path1, path2, source="metadata"))
differences.extend(_compare_elf_data(path1, path2, source))
return differences
......@@ -20,7 +20,7 @@
import locale
import subprocess
from debbindiff import tool_required
from debbindiff.comparators.utils import binary_fallback, Command
from debbindiff.comparators.utils import binary_fallback, returns_details, Command
from debbindiff.difference import Difference
......@@ -33,8 +33,6 @@ class Showttf(Command):
return line.decode('latin-1').encode('utf-8')
@binary_fallback
@returns_details
def compare_ttf_files(path1, path2, source=None):
difference = Difference.from_command(Showttf, path1, path2)
if not difference:
return []
return [difference]
return [Difference.from_command(Showttf, path1, path2)]
......@@ -21,7 +21,7 @@ import re
import subprocess
from StringIO import StringIO
from debbindiff import tool_required
from debbindiff.comparators.utils import binary_fallback, Command
from debbindiff.comparators.utils import binary_fallback, returns_details, Command
from debbindiff.difference import Difference
from debbindiff import logger
......@@ -57,8 +57,6 @@ class Msgunfmt(Command):
@binary_fallback
@returns_details
def compare_mo_files(path1, path2, source=None):
difference = Difference.from_command(Msgunfmt, path1, path2)
if not difference:
return []
return [difference]
return [Difference.from_command(Msgunfmt, path1, path2)]
......@@ -22,7 +22,7 @@ import subprocess
import os.path
import debbindiff.comparators
from debbindiff import tool_required
from debbindiff.comparators.utils import binary_fallback, make_temp_directory
from debbindiff.comparators.utils import binary_fallback, returns_details, make_temp_directory
from debbindiff.difference import Difference, get_source
......@@ -47,19 +47,18 @@ def get_gzip_metadata(path):
@binary_fallback
@returns_details
def compare_gzip_files(path1, path2, source=None):
differences = []
# check metadata
metadata1 = get_gzip_metadata(path1)
metadata2 = get_gzip_metadata(path2)
difference = Difference.from_unicode(
metadata1, metadata2, path1, path2, source='metadata')
if difference:
differences.append(difference)
differences.append(Difference.from_unicode(
metadata1, metadata2, path1, path2, source='metadata'))
# check content
with decompress_gzip(path1) as new_path1:
with decompress_gzip(path2) as new_path2:
differences.extend(debbindiff.comparators.compare_files(
differences.append(debbindiff.comparators.compare_files(
new_path1, new_path2,
source=[os.path.basename(new_path1), os.path.basename(new_path2)]))
return differences
......@@ -19,7 +19,7 @@
import subprocess
from debbindiff import tool_required
from debbindiff.comparators.utils import binary_fallback, Command
from debbindiff.comparators.utils import binary_fallback, returns_details, Command
from debbindiff.difference import Difference
......@@ -30,8 +30,6 @@ class ShowIface(Command):
@binary_fallback
@returns_details
def compare_hi_files(path1, path2, source=None):
difference = Difference.from_command(ShowIface, path1, path2)
if not difference:
return []
return [difference]
return [Difference.from_command(ShowIface, path1, path2)]
......@@ -18,12 +18,13 @@
# along with debbindiff. If not, see <http://www.gnu.org/licenses/>.
import os.path
from debbindiff.comparators.utils import binary_fallback
from debbindiff.comparators.utils import binary_fallback, returns_details
from debbindiff.comparators.tar import compare_tar_files
from debbindiff.comparators.gzip import decompress_gzip, get_gzip_metadata
from debbindiff.difference import Difference
@binary_fallback
@returns_details
# ipk packages are just .tar.gz archives
def compare_ipk_files(path1, path2, source=None):
differences = []
......@@ -31,15 +32,13 @@ def compare_ipk_files(path1, path2, source=None):
# metadata
metadata1 = get_gzip_metadata(path1)
metadata2 = get_gzip_metadata(path2)
difference = Difference.from_unicode(
metadata1, metadata2, path1, path2, source='metadata')
if difference:
differences.append(difference)
differences.append(Difference.from_unicode(
metadata1, metadata2, path1, path2, source='metadata'))
# content
with decompress_gzip(path1) as tar1:
with decompress_gzip(path2) as tar2:
differences.extend(compare_tar_files(tar1, tar2,
differences.append(compare_tar_files(tar1, tar2,
source=[os.path.basename(tar1), os.path.basename(tar2)]))
return differences
......@@ -21,7 +21,7 @@ import os.path
import subprocess
import debbindiff.comparators
from debbindiff import logger, tool_required
from debbindiff.comparators.utils import binary_fallback, make_temp_directory, Command
from debbindiff.comparators.utils import binary_fallback, returns_details, make_temp_directory, Command
from debbindiff.difference import Difference
......@@ -67,17 +67,14 @@ def extract_from_iso9660(image_path, in_path, dest):
@binary_fallback
@returns_details
def compare_iso9660_files(path1, path2, source=None):
differences = []
# compare metadata
difference = Difference.from_command(ISO9660PVD, path1, path2)
if difference:
differences.append(difference)
differences.append(Difference.from_command(ISO9660PVD, path1, path2))
for extension in (None, 'joliet', 'rockridge'):
difference = Difference.from_command(ISO9660Listing, path1, path2, command_args=(extension,))
if difference:
differences.append(difference)
differences.append(Difference.from_command(ISO9660Listing, path1, path2, command_args=(extension,)))
# compare files contained in image
files1 = get_iso9660_names(path1)
......@@ -92,7 +89,7 @@ def compare_iso9660_files(path1, path2, source=None):
extract_from_iso9660(path1, name, dest)
with open(in_path2, 'w') as dest:
extract_from_iso9660(path2, name, dest)
differences.extend(debbindiff.comparators.compare_files(
differences.append(debbindiff.comparators.compare_files(
in_path1, in_path2, source=name))
return differences
......@@ -20,7 +20,7 @@
import os.path
import re
from debbindiff import tool_required
from debbindiff.comparators.utils import binary_fallback, Command
from debbindiff.comparators.utils import binary_fallback, returns_details, Command
from debbindiff.difference import Difference
......@@ -39,8 +39,6 @@ class Javap(Command):
return line
@binary_fallback
@returns_details
def compare_class_files(path1, path2, source=None):
difference = Difference.from_command(Javap, path1, path2)
if not difference:
return []
return [difference]
return [Difference.from_command(Javap, path1, path2)]
......@@ -19,7 +19,7 @@
import subprocess
from debbindiff import tool_required
from debbindiff.comparators.utils import binary_fallback, Command
from debbindiff.comparators.utils import binary_fallback, returns_details, Command
from debbindiff.difference import Difference, get_source
......@@ -39,12 +39,9 @@ class Pdftk(Command):
@binary_fallback
@returns_details
def compare_pdf_files(path1, path2, source=None):
differences = []
difference = Difference.from_command(Pdftotext, path1, path2)
if difference:
differences.append(difference)
difference = Difference.from_command(Pdftk, path1, path2)
if difference:
differences.append(difference)
differences.append(Difference.from_command(Pdftotext, path1, path2))
differences.append(Difference.from_command(Pdftk, path1, path2))
return differences
......@@ -20,7 +20,7 @@
from functools import partial
import subprocess
from debbindiff import tool_required
from debbindiff.comparators.utils import binary_fallback, Command
from debbindiff.comparators.utils import binary_fallback, returns_details, Command
from debbindiff.difference import Difference
......@@ -39,9 +39,7 @@ class Sng(Command):
@binary_fallback
@returns_details
def compare_png_files(path1, path2, source=None):
difference = Difference.from_command(Sng, path1, path2, source='sng')
if not difference:
return []
return [difference]
return [Difference.from_command(Sng, path1, path2, source='sng')]
......@@ -24,7 +24,7 @@ from contextlib import contextmanager
import rpm
import debbindiff.comparators
from debbindiff import logger, tool_required
from debbindiff.comparators.utils import binary_fallback, make_temp_directory
from debbindiff.comparators.utils import binary_fallback, returns_details, make_temp_directory
from debbindiff.difference import Difference, get_source
def get_rpm_header(path, ts):
......@@ -65,6 +65,7 @@ def extract_rpm_payload(path):
@binary_fallback
@returns_details
def compare_rpm_files(path1, path2, source=None):
differences = []
......@@ -75,15 +76,13 @@ def compare_rpm_files(path1, path2, source=None):
ts.setVSFlags(-1)
header1 = get_rpm_header(path1, ts)
header2 = get_rpm_header(path2, ts)
difference = Difference.from_unicode(
header1, header2, path1, path2, source="header")
if difference:
differences.append(difference)
differences.append(Difference.from_unicode(
header1, header2, path1, path2, source="header"))
# extract cpio archive
with extract_rpm_payload(path1) as archive1:
with extract_rpm_payload(path2) as archive2:
differences.extend(debbindiff.comparators.compare_files(
differences.append(debbindiff.comparators.compare_files(
archive1, archive2, source=get_source(archive1, archive2)))
return differences
......@@ -22,7 +22,7 @@ import subprocess
import os.path
import debbindiff.comparators
from debbindiff import logger, tool_required
from debbindiff.comparators.utils import binary_fallback, make_temp_directory, Command
from debbindiff.comparators.utils import binary_fallback, returns_details, make_temp_directory, Command
from debbindiff.difference import Difference
......@@ -61,16 +61,13 @@ def extract_squashfs(path, destdir):
@binary_fallback
@returns_details
def compare_squashfs_files(path1, path2, source=None):
differences = []
# compare metadata
difference = Difference.from_command(SquashfsSuperblock, path1, path2)
if difference:
differences.append(difference)
difference = Difference.from_command(SquashfsListing, path1, path2)
if difference:
differences.append(difference)
differences.append(Difference.from_command(SquashfsSuperblock, path1, path2))
differences.append(Difference.from_command(SquashfsListing, path1, path2))
# compare files contained in archive
files1 = get_squashfs_names(path1)
......@@ -84,7 +81,7 @@ def compare_squashfs_files(path1, path2, source=None):
in_path2 = os.path.join(temp_dir2, member)
if not os.path.isfile(in_path1) or not os.path.isfile(in_path2):
continue
differences.extend(debbindiff.comparators.compare_files(
differences.append(debbindiff.comparators.compare_files(
in_path1, in_path2, source=member))
return differences
......@@ -24,7 +24,7 @@ import tarfile
from debbindiff import logger
from debbindiff.difference import Difference
import debbindiff.comparators
from debbindiff.comparators.utils import binary_fallback, make_temp_directory
from debbindiff.comparators.utils import binary_fallback, returns_details, make_temp_directory
def get_tar_content(tar):
......@@ -39,6 +39,7 @@ def get_tar_content(tar):
@binary_fallback
@returns_details
def compare_tar_files(path1, path2, source=None):
differences = []
with tarfile.open(path1, 'r') as tar1:
......@@ -59,7 +60,7 @@ def compare_tar_files(path1, path2, source=None):
tar2.extract(name, temp_dir2)
in_path1 = os.path.join(temp_dir1, name).decode('utf-8')
in_path2 = os.path.join(temp_dir2, name).decode('utf-8')
differences.extend(
differences.append(
debbindiff.comparators.compare_files(
in_path1, in_path2,
source=name.decode('utf-8')))
......@@ -68,8 +69,6 @@ def compare_tar_files(path1, path2, source=None):
# look up differences in file list and file metadata
content1 = get_tar_content(tar1).decode('utf-8')
content2 = get_tar_content(tar2).decode('utf-8')
difference = Difference.from_unicode(
content1, content2, path1, path2, source="metadata")
if difference:
differences.append(difference)
differences.append(Difference.from_unicode(
content1, content2, path1, path2, source="metadata"))
return differences
......@@ -28,10 +28,7 @@ def compare_text_files(path1, path2, encoding=None, source=None):
try:
file1 = codecs.open(path1, 'r', encoding=encoding)
file2 = codecs.open(path2, 'r', encoding=encoding)
difference = Difference.from_file(file1, file2, path1, path2, source)
return Difference.from_file(file1, file2, path1, path2, source)
except (LookupError, UnicodeDecodeError):
# unknown or misdetected encoding
return compare_binary_files(path1, path2, source)
if not difference:
return []
return [difference]
......@@ -39,35 +39,44 @@ from debbindiff import logger, RequiredToolNotFound
def binary_fallback(original_function):
def with_fallback(path1, path2, source=None):
if are_same_binaries(path1, path2):
return []
return None
try:
inside_differences = original_function(path1, path2, source)
difference = original_function(path1, path2, source)
# no differences detected inside? let's at least do a binary diff
if len(inside_differences) == 0:
difference = compare_binary_files(path1, path2, source=source)[0]
if difference is None:
difference = compare_binary_files(path1, path2, source=source)
difference.comment = (difference.comment or '') + \
"No differences found inside, yet data differs"
else:
difference = Difference(None, path1, path2, source=source)
difference.add_details(inside_differences)
except subprocess.CalledProcessError as e:
difference = compare_binary_files(path1, path2, source=source)[0]
difference = compare_binary_files(path1, path2, source=source)
output = re.sub(r'^', ' ', e.output, flags=re.MULTILINE)
cmd = ' '.join(e.cmd)
difference.comment = (difference.comment or '') + \
"Command `%s` exited with %d. Output:\n%s" \
% (cmd, e.returncode, output)
except RequiredToolNotFound as e:
difference = compare_binary_files(path1, path2, source=source)[0]
difference = compare_binary_files(path1, path2, source=source)
difference.comment = (difference.comment or '') + \
"'%s' not available in path. Falling back to binary comparison." % e.command
package = e.get_package()
if package:
difference.comment += "\nInstall '%s' to get a better output." % package
return [difference]
return difference
return with_fallback
# decorator that will group multiple differences as details of an empty Difference
# are detected or if an external tool fails
def returns_details(original_function):
def wrap_details(path1, path2, source=None):
details = [d for d in original_function(path1, path2, source) if d is not None]
if len(details) == 0:
return None
difference = Difference(None, path1, path2, source=source)
difference.add_details(details)
return difference
return wrap_details
@contextmanager
def make_temp_directory():
temp_dir = tempfile.mkdtemp(suffix='debbindiff')
......
......@@ -22,7 +22,7 @@ import os.path
import subprocess
import debbindiff.comparators
from debbindiff import tool_required
from debbindiff.comparators.utils import binary_fallback, make_temp_directory
from debbindiff.comparators.utils import binary_fallback, returns_details, make_temp_directory
from debbindiff.difference import get_source
......@@ -42,9 +42,10 @@ def decompress_xz(path):
@binary_fallback