feeders.py 3.24 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
# -*- coding: utf-8 -*-
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2014-2015 Jérémy Bobbio <lunar@debian.org>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.

import signal
import hashlib
import logging
import subprocess

from .config import Config
from .profiling import profile

logger = logging.getLogger(__name__)

DIFF_CHUNK = 4096


33
def from_raw_reader(in_file, filter=None):
34 35 36
    def feeder(out_file):
        max_lines = Config().max_diff_input_lines
        end_nl = False
37 38 39 40
        line_count = 0

        # If we have a maximum size, hash the content as we go along so we can
        # display a nicer message.
41 42 43
        h = None
        if max_lines < float('inf'):
            h = hashlib.sha1()
44

45 46
        for buf in in_file:
            line_count += 1
47
            out = buf if filter is None else filter(buf)
48 49

            if h is not None:
50
                h.update(out)
51

52 53
            if line_count < max_lines:
                out_file.write(out)
54 55 56
                # very long lines can sometimes interact negatively with
                # python buffering; force a flush here to avoid this,
                # see https://bugs.debian.org/870049
57
                out_file.flush()
58 59
            if buf:
                end_nl = buf[-1] == '\n'
60 61

        if h is not None and line_count >= max_lines:
62 63 64 65
            out_file.write("[ Too much input for diff (SHA1: {}) ]\n".format(
                h.hexdigest(),
            ).encode('utf-8'))
            end_nl = True
66

67 68 69 70
        return end_nl
    return feeder


71 72 73 74 75 76 77
def from_text_reader(in_file, filter=None):
    if filter is None:
        def encoding_filter(text_buf):
            return text_buf.encode('utf-8')
    else:
        def encoding_filter(text_buf):
            return filter(text_buf).encode('utf-8')
78 79 80 81 82 83 84 85 86 87 88
    return from_raw_reader(in_file, encoding_filter)


def from_command(command):
    def feeder(out_file):
        with profile('command', command.cmdline()[0]):
            feeder = from_raw_reader(
                command.stdout,
                command.filter,
            )
            end_nl = feeder(out_file)
89
            returncode = command.returncode
90 91 92 93
        if returncode not in (0, -signal.SIGTERM):
            raise subprocess.CalledProcessError(
                returncode,
                command.cmdline(),
94
                output=command.stderr,
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
            )
        return end_nl
    return feeder


def from_text(content):
    def feeder(f):
        for offset in range(0, len(content), DIFF_CHUNK):
            f.write(content[offset:offset + DIFF_CHUNK].encode('utf-8'))
        return content and content[-1] == '\n'
    return feeder


def empty():
    def feeder(f):
        return False
    return feeder