difference.py 11.9 KB
Newer Older
1 2
# -*- coding: utf-8 -*-
#
Jérémy Bobbio's avatar
Jérémy Bobbio committed
3
# diffoscope: in-depth comparison of files, archives, and directories
4
#
Jérémy Bobbio's avatar
Jérémy Bobbio committed
5
# Copyright © 2014-2015 Jérémy Bobbio <lunar@debian.org>
6
#
Jérémy Bobbio's avatar
Jérémy Bobbio committed
7
# diffoscope is free software: you can redistribute it and/or modify
8 9 10 11
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
Jérémy Bobbio's avatar
Jérémy Bobbio committed
12
# diffoscope is distributed in the hope that it will be useful,
13 14 15 16 17
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
18
# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
19

20
import heapq
21
import logging
Chris Lamb's avatar
Chris Lamb committed
22

23
from . import feeders
24
from .exc import RequiredToolNotFound
25
from .diff import diff, reverse_unified_diff, diff_split_lines
26
from .excludes import command_excluded
27

28
logger = logging.getLogger(__name__)
29 30


31
class Difference(object):
32 33
    def __init__(self, unified_diff, path1, path2, source=None, comment=None,
                 has_internal_linenos=False, details=None, visuals=None):
34 35
        self._unified_diff = unified_diff

36 37
        self._comments = []
        if comment:
38 39 40 41
            if type(comment) is list:
                self._comments.extend(comment)
            else:
                self._comments.append(comment)
42 43

        # Allow to override declared file paths, useful when comparing
44
        # tempfiles
45
        if source:
46 47 48 49 50
            if type(source) is list:
                self._source1, self._source2 = source
            else:
                self._source1 = source
                self._source2 = source
51 52 53
        else:
            self._source1 = path1
            self._source2 = path2
54

55 56 57 58 59
        # Ensure renderable types
        if not isinstance(self._source1, str):
            raise TypeError("path1/source[0] is not a string")
        if not isinstance(self._source2, str):
            raise TypeError("path2/source[1] is not a string")
60

61 62
        # Whether the unified_diff already contains line numbers inside itself
        self._has_internal_linenos = has_internal_linenos
63
        self._details = details or []
64
        self._visuals = visuals or []
65
        self._size_cache = None
66

67
    def __repr__(self):
68 69 70 71 72
        return "<Difference %s -- %s %s>" % (
            self._source1,
            self._source2,
            self._details,
        )
73

74 75 76
    def map_lines(self, f_diff, f_comment):
        unified_diff = self.unified_diff
        return self.__class__(
77 78
            "".join(map(f_diff, diff_split_lines(unified_diff))
                    ) if unified_diff is not None else None,
79 80
            self.source1,
            self.source2,
81 82
            comment=["".join(map(f_comment, diff_split_lines(comment)))
                             for comment in self._comments],
83
            has_internal_linenos=self.has_internal_linenos,
84 85
            details=self._details[:],
            visuals=self._visuals[:],
86
        )
87

88 89 90 91 92
    def fmap(self, f):
        return f(self.__class__(
            self.unified_diff,
            self.source1,
            self.source2,
93
            comment=self._comments[:],
94 95
            has_internal_linenos=self.has_internal_linenos,
            details=[d.fmap(f) for d in self._details],
96
            visuals=self._visuals[:],
97
        ))
98

99
    def _reverse_self(self):
100 101 102 103 104
        # assumes we're being called from get_reverse()
        if self._visuals:
            raise NotImplementedError(
                "_reverse_self on VisualDifference is not yet implemented",
            )
105
        return self.__class__(
106 107
            reverse_unified_diff(
                self.unified_diff) if self.unified_diff is not None else None,
108 109
            self.source2,
            self.source1,
110
            comment=self._comments,  # already copied by fmap in get_reverse
111
            has_internal_linenos=self.has_internal_linenos,
112
            details=self._details,  # already reversed by fmap in get_reverse, no need to copy
113
        )
114

115 116 117 118
    def get_reverse(self):
        logger.debug("Reverse orig %s %s", self.source1, self.source2)
        return self.fmap(Difference._reverse_self)

119 120 121 122 123
    def equals(self, other):
        return self == other or (
            self.unified_diff == other.unified_diff and
            self.source1 == other.source1 and
            self.source2 == other.source2 and
124
            self._comments == other._comments and
125
            self.has_internal_linenos == other.has_internal_linenos and
126 127
            all(x.equals(y) for x, y in zip(self._details, other._details)) and
            all(x.equals(y) for x, y in zip(self._visuals, other._visuals)))
128

129 130
    def size(self):
        if self._size_cache is None:
131 132
            self._size_cache = sum(d.size_self()
                                   for d in self.traverse_depth())
133 134 135 136 137 138 139
        return self._size_cache

    def size_self(self):
        """Size, excluding children."""
        return ((len(self.unified_diff) if self.unified_diff else 0) +
                (len(self.source1) if self.source1 else 0) +
                (len(self.source2) if self.source2 else 0) +
140 141 142
                sum(map(len, self.comments)) +
                sum(v.size() for v in self._visuals))

143
    def has_visible_children(self):
144
        """
145
        Whether there are visible children.
146 147 148

        Useful for e.g. choosing whether to display [+]/[-] controls.
        """
149 150
        return (self._unified_diff is not None or
                self._comments or self._details or self._visuals)
151

152 153 154 155
    def traverse_depth(self, depth=-1):
        yield self
        if depth != 0:
            for d in self._details:
156
                yield from d.traverse_depth(depth - 1)
157 158 159 160 161 162 163 164 165

    def traverse_breadth(self, queue=None):
        queue = queue if queue is not None else [self]
        if queue:
            top = queue.pop(0)
            yield top
            queue.extend(top._details)
            yield from self.traverse_breadth(queue)

166
    def traverse_heapq(self, scorer, yield_score=False, queue=None):
167 168 169 170 171 172 173 174 175
        """Traverse the difference tree using a priority queue, where each node
        is scored according to a user-supplied function, and nodes with smaller
        scores are traversed first (after they have been added to the queue).

        The function `scorer` takes two arguments, a node to score and the
        score of its parent node (or None if there is no parent). It should
        return the score of the input node.
        """
        queue = queue if queue is not None else [(scorer(self, None), self)]
176
        while queue:
177
            val, top = heapq.heappop(queue)
178 179 180 181
            prune_descendants = yield ((top, val) if yield_score else top)
            if not prune_descendants:
                for d in top._details:
                    heapq.heappush(queue, (scorer(d, val), d))
182

183
    @staticmethod
184
    def from_feeder(feeder1, feeder2, path1, path2, source=None, comment=None, **kwargs):
185
        try:
186
            unified_diff = diff(feeder1, feeder2)
187 188
            if not unified_diff:
                return None
189 190 191 192 193 194 195 196
            return Difference(
                unified_diff,
                path1,
                path2,
                source,
                comment,
                **kwargs
            )
197
        except RequiredToolNotFound:
198
            difference = Difference(None, path1, path2, source)
199
            difference.add_comment("diff is not available")
200
            if comment:
201 202
                difference.add_comment(comment)
            return difference
203

204
    @staticmethod
205
    def from_text(content1, content2, *args, **kwargs):
206
        return Difference.from_feeder(
207 208
            feeders.from_text(content1),
            feeders.from_text(content2),
209 210 211
            *args,
            **kwargs
        )
212 213 214

    @staticmethod
    def from_raw_readers(file1, file2, *args, **kwargs):
215
        return Difference.from_feeder(
216 217
            feeders.from_raw_reader(file1),
            feeders.from_raw_reader(file2),
218 219 220
            *args,
            **kwargs
        )
221 222

    @staticmethod
223
    def from_text_readers(file1, file2, *args, **kwargs):
224
        return Difference.from_feeder(
225 226
            feeders.from_text_reader(file1),
            feeders.from_text_reader(file2),
227 228 229
            *args,
            **kwargs
        )
230

231
    @staticmethod
232
    def from_command(klass, path1, path2, *args, **kwargs):
233 234 235 236
        return Difference.from_command_exc(klass, path1, path2, *args, **kwargs)[0]

    @staticmethod
    def from_command_exc(klass, path1, path2, *args, **kwargs):
237 238 239 240
        command_args = []
        if 'command_args' in kwargs:
            command_args = kwargs['command_args']
            del kwargs['command_args']
241 242 243 244

        def command_and_feeder(path):
            command = None
            if path == '/dev/null':
245
                feeder = feeders.empty()
246 247
            else:
                command = klass(path, *command_args)
248
                feeder = feeders.from_command(command)
249
                if command_excluded(command.shell_cmdline()):
250
                    return None, None, True
251
                command.start()
252
            return feeder, command, False
253

254 255
        feeder1, command1, excluded1 = command_and_feeder(path1)
        feeder2, command2, excluded2 = command_and_feeder(path2)
256
        if not feeder1 or not feeder2:
257 258
            assert excluded1 or excluded2
            return None, True
259

260
        if 'source' not in kwargs:
261
            source_cmd = command1 or command2
262 263
            kwargs['source'] = source_cmd.shell_cmdline()

264 265 266 267 268 269 270 271
        difference = Difference.from_feeder(
            feeder1,
            feeder2,
            path1,
            path2,
            *args,
            **kwargs
        )
272
        if not difference:
273
            return None, False
274

275
        if command1 and command1.stderr:
276 277 278
            difference.add_comment("stderr from `{}`:".format(
                ' '.join(command1.cmdline()),
            ))
279 280
            difference.add_comment(command1.stderr)
        if command2 and command2.stderr:
281 282 283
            difference.add_comment("stderr from `{}`:".format(
                ' '.join(command2.cmdline()),
            ))
284
            difference.add_comment(command2.stderr)
285

286
        return difference, False
287

288 289
    @property
    def comment(self):
290 291 292 293 294
        return '\n'.join(self._comments)

    @property
    def comments(self):
        return self._comments
295

296
    def add_comment(self, comment):
297 298
        for line in comment.splitlines():
            self._comments.append(line)
299
        self._size_cache = None
300

301 302 303 304 305 306 307 308 309
    @property
    def source1(self):
        return self._source1

    @property
    def source2(self):
        return self._source2

    @property
310 311
    def unified_diff(self):
        return self._unified_diff
312

313 314 315 316
    @property
    def has_internal_linenos(self):
        return self._has_internal_linenos

317 318 319 320
    @property
    def details(self):
        return self._details

321 322 323 324
    @property
    def visuals(self):
        return self._visuals

325
    def add_details(self, differences):
326 327
        if len([d for d in differences if type(d) is not Difference]) > 0:
            raise TypeError("'differences' must contains Difference objects'")
328
        self._details.extend(differences)
329
        self._size_cache = None
330

331 332 333 334
    def add_visuals(self, visuals):
        if any([type(v) is not VisualDifference for v in visuals]):
            raise TypeError("'visuals' must contain VisualDifference objects'")
        self._visuals.extend(visuals)
335
        self._size_cache = None
336

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357

class VisualDifference(object):
    def __init__(self, data_type, content, source):
        self._data_type = data_type
        self._content = content
        self._source = source

    @property
    def data_type(self):
        return self._data_type

    @property
    def content(self):
        return self._content

    @property
    def source(self):
        return self._source

    def size(self):
        return len(self.data_type) + len(self.content) + len(self.source)
358 359 360 361 362 363

    def equals(self, other):
        return self == other or (
            self._data_type == other._data_type and
            self._content == other._content and
            self._source == other._source)