-
Chris Lamb authored
Ensure that all of our artificially-created subprocess.CalledProcessError instances have `output` instances that are bytes objects, not str.
Chris Lamb authoredEnsure that all of our artificially-created subprocess.CalledProcessError instances have `output` instances that are bytes objects, not str.
feeders.py 3.67 KiB
# -*- coding: utf-8 -*-
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2014-2015 Jérémy Bobbio <lunar@debian.org>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import signal
import hashlib
import logging
import subprocess
from .config import Config
from .profiling import profile
logger = logging.getLogger(__name__)
DIFF_CHUNK = 4096
def from_raw_reader(in_file, filter=None):
def feeder(out_file):
max_lines = Config().max_diff_input_lines
end_nl = False
line_count = 0
# If we have a maximum size, hash the content as we go along so we can
# display a nicer message.
h = None
if max_lines < float('inf'):
h = hashlib.sha1()
for buf in in_file:
line_count += 1
out = buf if filter is None else filter(buf)
if h is not None:
h.update(out)
if line_count < max_lines:
out_file.write(out)
# very long lines can sometimes interact negatively with
# python buffering; force a flush here to avoid this,
# see https://bugs.debian.org/870049
out_file.flush()
if buf:
end_nl = buf[-1] == '\n'
if h is not None and line_count >= max_lines:
out_file.write(
"[ Too much input for diff (SHA1: {}) ]\n".format(
h.hexdigest()
).encode('utf-8')
)
end_nl = True
return end_nl
return feeder
def from_text_reader(in_file, filter=None):
if filter is None:
def encoding_filter(text_buf):
return text_buf.encode('utf-8')
else:
def encoding_filter(text_buf):
return filter(text_buf).encode('utf-8')
return from_raw_reader(in_file, encoding_filter)
def from_command(command):
def feeder(out_file):
with profile('command', command.cmdline()[0]):
feeder = from_raw_reader(command.stdout, command.filter)
end_nl = feeder(out_file)
returncode = command.returncode
if returncode not in (0, -signal.SIGTERM):
# On error, default to displaying all lines of standard output.
output = command.stderr
if not output and command.stdout:
# ... but if we don't have, return the first line of the
# standard output.
output = '{}{}'.format(
command.stdout[0].decode('utf-8', 'ignore').strip(),
'\n[…]' if len(command.stdout) > 1 else '',
)
raise subprocess.CalledProcessError(
returncode, command.cmdline(), output=output.encode('utf-8')
)
return end_nl
return feeder
def from_text(content):
def feeder(f):
for offset in range(0, len(content), DIFF_CHUNK):
f.write(content[offset : offset + DIFF_CHUNK].encode('utf-8'))
return content and content[-1] == '\n'
return feeder
def empty():
def feeder(f):
return False
return feeder