Commit e9d72ec4 authored by Jérémy Bobbio's avatar Jérémy Bobbio

Refactor Difference constructor

Difference() now takes an unified diff directly. Computing the diff is
moved to a new static method from_content() which returns None when there are
no differences.

This paves the way for passing file descriptors to from_content() to avoid
loading entire outputs in memory.
parent da8af70e
......@@ -45,6 +45,7 @@ def compare_binary_files(path1, path2, source=None):
hexdump1 = hexdump_fallback(path1)
hexdump2 = hexdump_fallback(path2)
comment = 'xxd not available in path. Falling back to Python hexlify.\n'
if hexdump1 == hexdump2:
difference = Difference.from_content(hexdump1, hexdump2, path1, path2, source, comment)
if not difference:
return []
return [Difference(hexdump1, hexdump2, path1, path2, source, comment)]
return [difference]
......@@ -47,21 +47,21 @@ def compare_changes_files(path1, path2, source=None):
if dot_changes1[field] != dot_changes2[field]:
content1 = "%s: %s" % (field, dot_changes1[field])
content2 = "%s: %s" % (field, dot_changes2[field])
differences.append(Difference(
content1, content2,
dot_changes1.get_changes_file(),
dot_changes2.get_changes_file(),
source=source))
difference = Difference.from_content(
content1, content2,
dot_changes1.get_changes_file(),
dot_changes2.get_changes_file(),
source=source)
if difference:
differences.append(difference)
# This will handle differences in the list of files, checksums, priority
# and section
files1 = dot_changes1.get('Files')
files2 = dot_changes2.get('Files')
logger.debug(dot_changes1.get_as_string('Files'))
if files1 == files2:
return differences
files_difference = Difference(
files_difference = Difference.from_content(
dot_changes1.get_as_string('Files'),
dot_changes2.get_as_string('Files'),
dot_changes1.get_changes_file(),
......@@ -69,6 +69,9 @@ def compare_changes_files(path1, path2, source=None):
source=source,
comment="List of files does not match")
if not files_difference:
return differences
files1 = dict([(d['name'], d) for d in files1])
files2 = dict([(d['name'], d) for d in files2])
......
......@@ -52,9 +52,10 @@ def compare_cpio_files(path1, path2, source=None):
# compare metadata
content1 = get_cpio_content(path1, verbose=True)
content2 = get_cpio_content(path2, verbose=True)
if content1 != content2:
differences.append(Difference(
content1, content2, path1, path2, source="metadata"))
difference = Difference.from_content(
content1, content2, path1, path2, source="metadata")
if difference:
differences.append(difference)
# compare files contained in archive
content1 = get_cpio_content(path1, verbose=False)
......
......@@ -55,15 +55,16 @@ def compare_deb_files(path1, path2, source=None):
# look up differences in file list and file metadata
content1 = get_ar_content(path1)
content2 = get_ar_content(path2)
if content1 != content2:
differences.append(Difference(
content1, content2, path1, path2, source="metadata"))
difference = Difference.from_content(
content1, content2, path1, path2, source="metadata")
if difference:
differences.append(difference)
return differences
def compare_md5sums_files(path1, path2, source=None):
if are_same_binaries(path1, path2):
return []
return [Difference(None, None, path1, path2,
return [Difference(None, path1, path2,
source=get_source(path1, path2),
comment="Files in package differs")]
......@@ -60,27 +60,30 @@ def compare_meta(path1, path2):
try:
stat1 = stat(path1)
stat2 = stat(path2)
if stat1 != stat2:
differences.append(Difference(
stat1, stat2, path1, path2, source="stat"))
difference = Difference.from_content(
stat1, stat2, path1, path2, source="stat")
if difference:
differences.append(difference)
except RequiredToolNotFound:
logger.warn("'stat' not found! Is PATH wrong?")
try:
lsattr1 = lsattr(path1)
lsattr2 = lsattr(path2)
if lsattr1 != lsattr2:
differences.append(Difference(
lsattr1, lsattr2, path1, path2, source="lattr"))
difference = Difference.from_content(
lsattr1, lsattr2, path1, path2, source="lattr")
if difference:
differences.append(difference)
except RequiredToolNotFound:
logger.info("Unable to find 'lsattr'.")
try:
acl1 = getfacl(path1)
acl2 = getfacl(path2)
if acl1 != acl2:
differences.append(Difference(
acl1, acl2, path1, path2, source="getfacl"))
difference = Difference.from_content(
acl1, acl2, path1, path2, source="getfacl")
if difference:
differences.append(difference)
except RequiredToolNotFound:
logger.info("Unable to find 'getfacl'.")
return differences
......@@ -101,17 +104,18 @@ def compare_directories(path1, path2, source=None):
if in_differences:
in_differences[0].add_details(compare_meta(in_path1, in_path2))
else:
d = Difference(None, None, path1, path2, source=name)
d = Difference(None, path1, path2, source=name)
d.add_details(compare_meta(in_path1, in_path2))
in_differences = [d]
differences.extend(in_differences)
ls1 = sorted(ls(path1))
ls2 = sorted(ls(path2))
if ls1 != ls2:
differences.append(Difference(ls1, ls2, path1, path2, source="ls"))
difference = Difference.from_content(ls1, ls2, path1, path2, source="ls")
if difference:
differences.append(difference)
differences.extend(compare_meta(path1, path2))
if differences:
d = Difference(None, None, path1, path2, source=source)
d = Difference(None, path1, path2, source=source)
d.add_details(differences)
return [d]
return []
......@@ -58,21 +58,24 @@ def _compare_elf_data(path1, path2, source=None):
differences = []
all1 = readelf_all(path1)
all2 = readelf_all(path2)
if all1 != all2:
differences.append(Difference(
all1, all2, path1, path2, source='readelf --all'))
difference = Difference.from_content(
all1, all2, path1, path2, source='readelf --all')
if difference:
differences.append(difference)
debug_dump1 = readelf_debug_dump(path1)
debug_dump2 = readelf_debug_dump(path2)
if debug_dump1 != debug_dump2:
differences.append(Difference(
debug_dump1, debug_dump2,
path1, path2, source='readelf --debug-dump'))
difference = Difference.from_content(
debug_dump1, debug_dump2,
path1, path2, source='readelf --debug-dump')
if difference:
differences.append(difference)
objdump1 = objdump_disassemble(path1)
objdump2 = objdump_disassemble(path2)
if objdump1 != objdump2:
differences.append(Difference(
objdump1, objdump2,
path1, path2, source='objdump --disassemble --full-contents'))
difference = Difference.from_content(
objdump1, objdump2,
path1, path2, source='objdump --disassemble --full-contents')
if difference:
differences.append(difference)
return differences
......@@ -87,8 +90,9 @@ def compare_static_lib_files(path1, path2, source=None):
# look up differences in metadata
content1 = get_ar_content(path1)
content2 = get_ar_content(path2)
if content1 != content2:
differences.append(Difference(
content1, content2, path1, path2, source="metadata"))
difference = Difference.from_content(
content1, content2, path1, path2, source="metadata")
if difference:
differences.append(difference)
differences.extend(_compare_elf_data(path1, path2, source))
return differences
......@@ -34,6 +34,7 @@ def show_ttf(path):
def compare_ttf_files(path1, path2, source=None):
ttf1 = show_ttf(path1)
ttf2 = show_ttf(path2)
if ttf1 != ttf2:
return [Difference(ttf1, ttf2, path1, path2, source='showttf')]
return []
difference = Difference.from_content(ttf1, ttf2, path1, path2, source='showttf')
if not difference:
return []
return [difference]
......@@ -41,6 +41,7 @@ def msgunfmt(path):
def compare_mo_files(path1, path2, source=None):
mo1 = msgunfmt(path1)
mo2 = msgunfmt(path2)
if mo1 != mo2:
return [Difference(mo1, mo2, path1, path2, source='msgunfmt')]
return []
difference = Difference.from_content(mo1, mo2, path1, path2, source='msgunfmt')
if not difference:
return []
return [difference]
......@@ -52,9 +52,10 @@ def compare_gzip_files(path1, path2, source=None):
# check metadata
metadata1 = get_gzip_metadata(path1)
metadata2 = get_gzip_metadata(path2)
if metadata1 != metadata2:
differences.append(Difference(
metadata1, metadata2, path1, path2, source='metadata'))
difference = Difference.from_content(
metadata1, metadata2, path1, path2, source='metadata')
if difference:
differences.append(difference)
# check content
with decompress_gzip(path1) as new_path1:
with decompress_gzip(path2) as new_path2:
......
......@@ -32,7 +32,8 @@ def show_iface(path):
def compare_hi_files(path1, path2, source=None):
iface1 = show_iface(path1)
iface2 = show_iface(path2)
if iface1 != iface2:
return [Difference(iface1, iface2,
path1, path2, source='ghc --show-iface')]
return []
difference = Difference.from_content(
iface1, iface2, path1, path2, source='ghc --show-iface')
if not difference:
return []
return [difference]
......@@ -44,14 +44,14 @@ def compare_pdf_files(path1, path2, source=None):
src = get_source(path1, path2) or 'FILE'
text1 = pdftotext(path1)
text2 = pdftotext(path2)
if text1 != text2:
differences.append(
Difference(text1, text2, path1, path2,
source="pdftotext %s" % src))
difference = Difference.from_content(text1, text2, path1, path2,
source="pdftotext %s" % src)
if difference:
differences.append(difference)
uncompressed1 = uncompress(path1)
uncompressed2 = uncompress(path2)
if uncompressed1 != uncompressed2:
differences.append(
Difference(uncompressed1, uncompressed2, path1, path2,
source="pdftk %s output - uncompress" % src))
difference = Difference.from_content(uncompressed1, uncompressed2, path1, path2,
source="pdftk %s output - uncompress" % src)
if difference:
differences.append(difference)
return differences
......@@ -38,7 +38,8 @@ def sng(path):
def compare_png_files(path1, path2, source=None):
sng1 = sng(path1)
sng2 = sng(path2)
if sng1 != sng2:
return [Difference(sng1, sng2, path1, path2, source='sng')]
return []
difference = Difference.from_content(sng1, sng2, path1, path2, source='sng')
if not difference:
return []
return [difference]
......@@ -80,9 +80,10 @@ def compare_rpm_files(path1, path2, source=None):
ts.setVSFlags(-1)
header1 = get_rpm_header(path1, ts)
header2 = get_rpm_header(path2, ts)
if header1 != header2:
differences.append(Difference(
header1, header2, path1, path2, source="header"))
difference = Difference.from_content(
header1, header2, path1, path2, source="header")
if difference:
differences.append(difference)
# extract cpio archive
with extract_rpm_payload(path1) as archive1:
......
......@@ -56,9 +56,10 @@ def compare_squashfs_files(path1, path2, source=None):
# compare metadata
content1 = get_squashfs_content(path1)
content2 = get_squashfs_content(path2)
if content1 != content2:
differences.append(Difference(
content1, content2, path1, path2, source="metadata"))
difference = Difference.from_content(
content1, content2, path1, path2, source="metadata")
if difference:
differences.append(difference)
# compare files contained in archive
content1 = get_squashfs_content(path1, verbose=False)
......
......@@ -68,7 +68,8 @@ def compare_tar_files(path1, path2, source=None):
# look up differences in file list and file metadata
content1 = get_tar_content(tar1).decode('utf-8')
content2 = get_tar_content(tar2).decode('utf-8')
if content1 != content2:
differences.append(Difference(
content1, content2, path1, path2, source="metadata"))
difference = Difference.from_content(
content1, content2, path1, path2, source="metadata")
if difference:
differences.append(difference)
return differences
......@@ -31,6 +31,7 @@ def compare_text_files(path1, path2, encoding, source=None):
except (LookupError, UnicodeDecodeError):
# unknown or misdetected encoding
return compare_binary_files(path1, path2, source)
if content1 == content2:
difference = Difference.from_content(content1, content2, path1, path2, source)
if not difference:
return []
return [Difference(content1, content2, path1, path2, source)]
return [difference]
......@@ -62,7 +62,7 @@ def binary_fallback(original_function):
difference.comment = (difference.comment or '') + \
"No differences found inside, yet data differs"
else:
difference = Difference(None, None, path1, path2, source=source)
difference = Difference(None, path1, path2, source=source)
difference.add_details(inside_differences)
except subprocess.CalledProcessError as e:
difference = compare_binary_files(path1, path2, source=source)[0]
......
......@@ -70,7 +70,8 @@ def compare_zip_files(path1, path2, source=None):
# search harder
zipinfo1 = get_zipinfo(path1, verbose=True)
zipinfo2 = get_zipinfo(path2, verbose=True)
if zipinfo1 != zipinfo2:
differences.append(Difference(
zipinfo1, zipinfo2, path1, path2, source="metadata"))
difference = Difference.from_content(
zipinfo1, zipinfo2, path1, path2, source="metadata")
if difference:
differences.append(difference)
return differences
......@@ -169,25 +169,15 @@ def diff(content1, content2):
p.wait()
if not parser.success and p.returncode not in (0, 1):
raise subprocess.CalledProcessError(cmd, p.returncode, output=diff)
if p.returncode == 0:
return None
return parser.diff
class Difference(object):
def __init__(self, content1, content2, path1, path2, source=None,
comment=None):
def __init__(self, unified_diff, path1, path2, source=None, comment=None):
self._comment = comment
if content1 and type(content1) is not unicode:
raise UnicodeError('content1 has not been decoded')
if content2 and type(content2) is not unicode:
raise UnicodeError('content2 has not been decoded')
self._unified_diff = None
if content1 is not None and content2 is not None:
try:
self._unified_diff = diff(content1, content2)
except RequiredToolNotFound:
self._comment = 'diff is not available!'
if comment:
self._comment += '\n\n' + comment
self._unified_diff = unified_diff
# allow to override declared file paths, useful when comparing
# tempfiles
if source:
......@@ -201,6 +191,25 @@ class Difference(object):
self._source2 = path2
self._details = []
@staticmethod
def from_content(content1, content2, path1, path2, source=None,
comment=None):
actual_comment = comment
if content1 and type(content1) is not unicode:
raise UnicodeError('content1 has not been decoded')
if content2 and type(content2) is not unicode:
raise UnicodeError('content2 has not been decoded')
unified_diff = None
try:
unified_diff = diff(content1, content2)
except RequiredToolNotFound:
actual_comment = 'diff is not available!'
if comment:
actual_comment += '\n\n' + orig_comment
if not unified_diff:
return None
return Difference(unified_diff, path1, path2, source, actual_comment)
@property
def comment(self):
return self._comment
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment