Commit 0c6710bd authored by Maria Glukhova's avatar Maria Glukhova Committed by Chris Lamb

Add detection of order-only difference in plain text format. (Closes: #848049)

Detect if the text files' contents differ only in line ordering, and give an appropriate comment.
Signed-off-by: Chris Lamb's avatarChris Lamb <lamby@debian.org>
parent 043a0d30
......@@ -24,6 +24,17 @@ from diffoscope.difference import Difference
from diffoscope.comparators.binary import File
def order_only_difference(unified_diff):
diff_lines = unified_diff.splitlines()
added_lines = [line[1:] for line in diff_lines if line.startswith('+')]
removed_lines = [line[1:] for line in diff_lines if line.startswith('-')]
# Faster check: does number of lines match?
if len(added_lines) != len(removed_lines):
return False
# Counter stores line and number of its occurrences.
return sorted(added_lines) == sorted(removed_lines)
class TextFile(File):
RE_FILE_TYPE = re.compile(r'\btext\b')
......@@ -44,6 +55,9 @@ class TextFile(File):
with codecs.open(self.path, 'r', encoding=my_encoding) as my_content, \
codecs.open(other.path, 'r', encoding=other_encoding) as other_content:
difference = Difference.from_text_readers(my_content, other_content, self.name, other.name, source)
# Check if difference is only in line order.
if difference and order_only_difference(difference.unified_diff):
difference.add_comment("ordering differences only")
if my_encoding != other_encoding:
if difference is None:
difference = Difference(None, self.path, other.path, source)
......
......@@ -65,3 +65,11 @@ def test_difference_between_iso88591_and_unicode_only(iso8859, tmpdir):
def test_compare_non_existing(monkeypatch, ascii1):
assert_non_existing(monkeypatch, ascii1, has_null_source=False, has_details=False)
text_order1 = load_fixture(data('text_order1'))
text_order2 = load_fixture(data('text_order2'))
def test_ordering_differences(text_order1, text_order2):
difference = text_order1.compare(text_order2)
assert difference.comments == ['ordering differences only']
assert difference.unified_diff == open(data('text_order_expected_diff')).read()
These
lines
follow
in
some
order
.
These
some
order
follow
in
lines
.
@@ -1,7 +1,7 @@
These
-lines
-follow
-in
some
order
+follow
+in
+lines
.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment