Commit f19e1ed1 authored by Ximin Luo's avatar Ximin Luo

Add various traverse_* methods to Difference

parent d5b71fab
......@@ -17,6 +17,8 @@
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <>.
import hashlib
import heapq
import re
import logging
......@@ -102,13 +104,16 @@ class Difference(object):
def size(self):
if self._size_cache is None:
self._size_cache = (len(self.unified_diff) +
len(self.source1) +
len(self.source2) +
self._size_cache = sum(d.size_self() for d in self.traverse_depth())
return self._size_cache
def size_self(self):
"""Size, excluding children."""
return ((len(self.unified_diff) if self.unified_diff else 0) +
(len(self.source1) if self.source1 else 0) +
(len(self.source2) if self.source2 else 0) +
sum(map(len, self.comments)) +
sum(d.size() for d in self._details) +
sum(v.size() for v in self._visuals))
return self._size_cache
def has_children(self):
......@@ -119,6 +124,37 @@ class Difference(object):
return self._unified_diff is not None or self._details or self._visuals
def traverse_depth(self, depth=-1):
yield self
if depth != 0:
for d in self._details:
yield from d.traverse_depth(depth-1)
def traverse_breadth(self, queue=None):
queue = queue if queue is not None else [self]
if queue:
top = queue.pop(0)
yield top
yield from self.traverse_breadth(queue)
def traverse_heapq(self, scorer, queue=None):
"""Traverse the difference tree using a priority queue, where each node
is scored according to a user-supplied function, and nodes with smaller
scores are traversed first (after they have been added to the queue).
The function `scorer` takes two arguments, a node to score and the
score of its parent node (or None if there is no parent). It should
return the score of the input node.
queue = queue if queue is not None else [(scorer(self, None), self)]
if queue:
val, top = heapq.heappop(queue)
yield top
for d in top._details:
heapq.heappush(queue, (scorer(d, val), d))
yield from self.traverse_heapq(scorer, queue)
def from_feeder(feeder1, feeder2, path1, path2, source=None, comment=None, **kwargs):
......@@ -18,12 +18,19 @@
# along with diffoscope. If not, see <>.
import io
import itertools
import pytest
from diffoscope.config import Config
from diffoscope.difference import Difference
def assert_size(diff, size):
assert size == diff.size()
assert size == sum(d.size_self() for d in diff.traverse_breadth())
g = itertools.count()
assert size == sum(d.size_self() for d in diff.traverse_heapq(lambda x, _: next(g)))
def assert_algebraic_properties(d, size):
assert d.equals(d.get_reverse().get_reverse())
assert d.get_reverse().size() == d.size() == size
......@@ -47,11 +54,38 @@ def test_too_long_diff_block_lines(monkeypatch):
def test_size_updates():
d = Difference("0123456789", "path1", "path2")
assert d.size() == 20
assert_size(d, 20)
d.add_details([Difference("0123456789", "path1/a", "path2/a")])
assert d.size() == 44
assert_size(d, 44)
assert d.size() == 48
assert_size(d, 48)
def test_traverse_heapq():
d0 = Difference("0", "path1/a", "path2/a")
d1 = Difference("012", "path1/b", "path2/b")
d2 = Difference("01", "path1/c", "path2/c")
Difference("012345678", "path1/a/1", "path2/a/1"),
Difference("0123", "path1/a/2", "path2/a/2"),
Difference("012", "path1/a/3", "path2/a/3")])
Difference("01234567", "path1/b/1", "path2/b/1"),
Difference("01234", "path1/b/2", "path2/b/2"),
Difference("012345", "path1/b/3", "path2/b/3")])
Difference("01", "path1/c/1", "path2/c/1"),
Difference("0123456789", "path1/c/2", "path2/c/2"),
Difference("0123456", "path1/c/3", "path2/c/3")])
diff = Difference("0123456789", "path1", "path2")
diff.add_details([d0, d1, d2])
# traverse nodes in depth order, but at a given depth traverse the nodes
# there from smallest diff (counted non-recursively) to largest
def f(node, parscore):
depth = parscore[0] + 1 if parscore else 0
return depth, node.size_self()
assert_size(diff, 284)
results = [d.source1[6:] for d in diff.traverse_heapq(f)]
assert results == ['', 'a', 'c', 'b', 'c/1', 'a/3', 'a/2', 'b/2', 'b/3', 'c/3', 'b/1', 'a/1', 'c/2']
def test_non_str_arguments_to_source1_source2():
for x in (
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment