-
Chris Lamb authored
This adds a "Missing-Python-Modules" entry to the output of --list-missing-tools, for example: ``` Missing-Python-Modules: PyPDF2, argcomplete, defusedxml, guestfs, jsondiff, progressbar, rpm ``` Signed-off-by:
Chris Lamb <lamby@debian.org>
Chris Lamb authoredThis adds a "Missing-Python-Modules" entry to the output of --list-missing-tools, for example: ``` Missing-Python-Modules: PyPDF2, argcomplete, defusedxml, guestfs, jsondiff, progressbar, rpm ``` Signed-off-by:
Chris Lamb <lamby@debian.org>
main.py 26.71 KiB
#!/usr/bin/env python3
# PYTHON_ARGCOMPLETE_OK
# -*- coding: utf-8 -*-
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2014-2015 Jérémy Bobbio <lunar@debian.org>
# Copyright © 2017 Chris Lamb <lamby@debian.org>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import os
import sys
import json
import errno
import signal
import logging
import textwrap
import argparse
import traceback
from . import VERSION
from .path import set_path
from .tools import (
tool_check_installed,
tool_prepend_prefix,
python_module_missing,
tool_required,
OS_NAMES,
get_current_os,
)
from .config import Config
from .locale import set_locale
from .logging import line_eraser, setup_logging
from .progress import ProgressManager, Progress
from .profiling import ProfileManager, profile
from .tempfiles import clean_all_temp_files
from .difference import Difference
from .comparators import ComparatorManager
from .external_tools import EXTERNAL_TOOLS
from .presenters.html import JQUERY_SYSTEM_LOCATIONS
from .presenters.formats import PresenterManager
from .comparators.utils.compare import compare_root_paths
from .readers import load_diff, load_diff_from_path
logger = logging.getLogger(__name__)
try:
import tlsh
except ImportError:
python_module_missing('tlsh')
tlsh = None
try:
import argcomplete
except ImportError:
python_module_missing('argcomplete')
argcomplete = None
class BooleanAction(argparse.Action):
def __init__(self, option_strings, dest, nargs=None, **kwargs):
if nargs is not None:
raise ValueError("nargs not allowed for BooleanAction")
super(BooleanAction, self).__init__(
option_strings, dest, nargs=0, **kwargs
)
def __call__(self, parser, namespace, values, option_string=None):
setattr(namespace, self.dest, not option_string.startswith("--no"))
def create_parser():
parser = argparse.ArgumentParser(
description='Calculate differences between two files or directories',
add_help=False,
formatter_class=HelpFormatter,
)
parser.add_argument(
'path1',
help='First file or directory to compare. Specify "-" to read a '
'diffoscope diff from stdin.',
)
parser.add_argument(
'path2',
nargs='?',
help='Second file or directory to '
'compare. If omitted, no comparison is done but instead we read a '
'diffoscope diff from path1 and will output this in the formats '
'specified by the rest of the command line.',
)
parser.add_argument(
'--debug',
action='store_true',
default=False,
help='Display debug messages',
)
parser.add_argument(
'--debugger',
action='store_true',
help='Open the Python debugger in case of crashes',
)
parser.add_argument(
'--status-fd',
metavar='FD',
type=int,
help='Send machine-readable status to file descriptor FD',
)
parser.add_argument(
'--progress',
'--no-progress',
action=BooleanAction,
default=None,
help='Show an approximate progress bar. Default: yes if '
'stdin is a tty, otherwise no.',
)
parser.add_argument(
'--no-default-limits',
action='store_true',
default=False,
help='Disable most default output limits and diff calculation limits.',
)
group1 = parser.add_argument_group('output types')
group1.add_argument(
'--text',
metavar='OUTPUT_FILE',
dest='text_output',
help='Write plain text output to given file (use - for stdout)',
)
group1.add_argument(
'--text-color',
metavar='WHEN',
default='auto',
choices=['never', 'auto', 'always'],
help='When to output color diff. WHEN is one of {%(choices)s}. '
'Default: auto, meaning yes if the output is a terminal, otherwise no.',
)
group1.add_argument(
'--output-empty',
action='store_true',
help='If there was no difference, then output an empty '
'diff for each output type that was specified. In '
'--text output, an empty file is written.',
)
group1.add_argument(
'--html',
metavar='OUTPUT_FILE',
dest='html_output',
help='Write HTML report to given file (use - for stdout)',
)
group1.add_argument(
'--html-dir',
metavar='OUTPUT_DIR',
dest='html_output_directory',
help='Write multi-file HTML report to given directory',
)
group1.add_argument(
'--css',
metavar='URL',
dest='css_url',
help='Link to an extra CSS for the HTML report',
)
group1.add_argument(
'--jquery',
metavar='URL',
dest='jquery_url',
help='URL link to jQuery, for --html and --html-dir output. '
'If this is a non-existent relative URL, diffoscope will create a '
'symlink to a system installation. (Paths searched: %s.) '
'If not given, --html output will not use JS but --html-dir will '
'if it can be found; give "disable" to disable JS on all outputs.'
% ', '.join(JQUERY_SYSTEM_LOCATIONS),
)
group1.add_argument(
'--json',
metavar='OUTPUT_FILE',
dest='json_output',
help='Write JSON text output to given file (use - for stdout)',
)
group1.add_argument(
'--markdown',
metavar='OUTPUT_FILE',
dest='markdown_output',
help='Write Markdown text output to given file (use - for stdout)',
)
group1.add_argument(
'--restructured-text',
metavar='OUTPUT_FILE',
dest='restructuredtext_output',
help='Write RsT text output to given file (use - for stdout)',
)
group1.add_argument(
'--profile',
metavar='OUTPUT_FILE',
dest='profile_output',
help='Write profiling info to given file (use - for stdout)',
)
group2 = parser.add_argument_group('output limits')
# everything marked with default=None below is affected by no-default-limits
group2.add_argument(
'--max-text-report-size',
metavar='BYTES',
type=int,
help='Maximum bytes written in --text report. (0 to '
'disable, default: %d)' % Config().max_text_report_size,
default=None,
)
group2.add_argument(
'--max-report-size',
metavar='BYTES',
type=int,
help='Maximum bytes of a report in a given format, '
'across all of its pages. Note that some formats, such '
'as --html, may be restricted by even smaller limits '
'such as --max-page-size. (0 to disable, default: %d)'
% Config().max_report_size,
default=None,
).completer = RangeCompleter(Config().max_report_size)
group2.add_argument(
'--max-diff-block-lines',
metavar='LINES',
type=int,
help='Maximum number of lines output per unified-diff '
'block, across all pages. (0 to disable, default: %d)'
% Config().max_diff_block_lines,
default=None,
).completer = RangeCompleter(Config().max_diff_block_lines)
group2.add_argument(
'--max-page-size',
metavar='BYTES',
type=int,
help='Maximum bytes of the top-level (--html-dir) or sole '
'(--html) page. (default: %(default)s, remains in effect '
'even with --no-default-limits)',
default=Config().max_page_size,
).completer = RangeCompleter(Config().max_page_size)
group2.add_argument(
'--max-page-size-child',
metavar='BYTES',
type=int,
help='In --html-dir output, this is the maximum bytes of '
'each child page (default: %(default)s, remains in '
'effect even with --no-default-limits)',
default=str(Config().max_page_size_child),
).completer = RangeCompleter(Config().max_page_size_child)
# TODO: old flag kept for backwards-compat, drop 6 months after v84
group2.add_argument(
'--max-report-size-child',
metavar='BYTES',
type=int,
help=argparse.SUPPRESS,
default=None,
)
group2.add_argument(
'--max-page-diff-block-lines',
metavar='LINES',
type=int,
help='Maximum number of lines output per unified-diff block '
'on the top-level (--html-dir) or sole (--html) page, before '
'spilling it into child pages (--html-dir) or skipping the '
'rest of the diff block. Child pages are limited instead by '
'--max-page-size-child. (default: %(default)s, remains in '
'effect even with --no-default-limits)',
default=Config().max_page_diff_block_lines,
).completer = RangeCompleter(Config().max_page_diff_block_lines)
# TODO: old flag kept for backwards-compat, drop 6 months after v84
group2.add_argument(
"--max-diff-block-lines-parent",
metavar='LINES',
type=int,
help=argparse.SUPPRESS,
default=None,
)
group3 = parser.add_argument_group('diff calculation')
group3.add_argument(
'--new-file', action='store_true', help='Treat absent files as empty'
)
group3.add_argument(
'--exclude',
dest='excludes',
metavar='GLOB_PATTERN',
action='append',
default=[],
help='Exclude files that match %(metavar)s. Use this '
'option to ignore files based on their names.',
)
group3.add_argument(
'--exclude-command',
dest='exclude_commands',
metavar='REGEX_PATTERN',
action='append',
default=[],
help='Exclude commands that match %(metavar)s. For '
"example '^readelf.*\\s--debug-dump=info' can take a "
'long time and differences here are likely secondary '
'differences caused by something represented '
'elsewhere. Use this option to disable commands that '
'use a lot of resources.',
)
group3.add_argument(
'--exclude-directory-metadata',
choices=('auto', 'yes', 'no', 'recursive'),
help='Exclude directory metadata. Useful if comparing '
'files whose filesystem-level metadata is not intended '
'to be distributed to other systems. This is true for '
'most distributions package builders, but not true '
'for the output of commands such as `make install`. '
'Metadata of archive members remain un-excluded '
'except if "recursive" choice is set. '
'Use this option to ignore permissions, timestamps, '
'xattrs etc. Default: False if comparing two '
'directories, else True. Note that "file" metadata '
'actually a property of its containing directory, '
'and is not relevant when distributing the file across '
'systems.',
)
group3.add_argument(
'--fuzzy-threshold',
type=int,
help='Threshold for fuzzy-matching '
'(0 to disable, %(default)s is default, 400 is high fuzziness)',
default=Config().fuzzy_threshold,
).completer = RangeCompleter(400)
group3.add_argument(
'--tool-prefix-binutils',
metavar='PREFIX',
help='Prefix for binutils program names, e.g. '
'"aarch64-linux-gnu-" for a foreign-arch binary or "g" '
'if you\'re on a non-GNU system.',
)
group3.add_argument(
'--max-diff-input-lines',
metavar='LINES',
type=int,
help='Maximum number of lines fed to diff(1) '
'(0 to disable, default: %d)' % Config().max_diff_input_lines,
default=None,
).completer = RangeCompleter(Config().max_diff_input_lines)
group3.add_argument(
'--max-container-depth',
metavar='DEPTH',
type=int,
help='Maximum depth to recurse into containers. '
'(Cannot be disabled for security reasons, default: '
'%(default)s)',
default=Config().max_container_depth,
)
group3.add_argument(
'--max-diff-block-lines-saved',
metavar='LINES',
type=int,
help='Maximum number of lines saved per diff block. '
'Most users should not need this, unless you run out '
'of memory. This truncates diff(1) output before emitting '
'it in a report, and affects all types of output, '
'including --text and --json. (0 to disable, default: '
'%(default)s)',
default=0,
)
group3.add_argument(
'--use-dbgsym',
metavar='WHEN',
default='auto',
choices=('no', 'auto', 'yes'),
help='When to automatically use corresponding -dbgsym packages when '
'comparing .deb files. WHEN is one of {%(choices)s}. Default: auto, '
'meaning yes if two .changes or .buildinfo files are specified, '
'otherwise no.',
)
group3.add_argument(
'--force-details',
default=False,
action='store_true',
help='Force recursing into the depths of file formats '
'even if files have the same content, only really '
'useful for debugging diffoscope. Default: %(default)s',
)
group4 = parser.add_argument_group('information commands')
group4.add_argument(
'--help', '-h', action='help', help="Show this help and exit"
)
group4.add_argument(
'--version',
action='version',
version='diffoscope %s' % VERSION,
help="Show program's version number and exit",
)
group4.add_argument(
'--list-tools',
nargs='?',
type=str,
action=ListToolsAction,
metavar='DISTRO',
choices=OS_NAMES,
help='Show external tools required and exit. '
'DISTRO can be one of {%(choices)s}. '
'If specified, the output will list packages in that '
'distribution that satisfy these dependencies.',
)
group4.add_argument(
'--list-debian-substvars',
action=ListDebianSubstvarsAction,
help="List packages needed for Debian in 'substvar' format.",
)
group4.add_argument(
'--list-missing-tools',
nargs='?',
type=str,
action=ListMissingToolsAction,
metavar='DISTRO',
choices=OS_NAMES,
help='Show missing external tools and exit. '
'DISTRO can be one of {%(choices)s}. '
'If specified, the output will list packages in that '
'distribution that satisfy these dependencies.',
)
if not tlsh:
parser.epilog = (
'File renaming detection based on fuzzy-matching is currently '
'disabled. It can be enabled by installing the "tlsh" module '
'available from https://github.com/trendmicro/tlsh or in the '
'python3-tlsh package.'
)
if argcomplete:
argcomplete.autocomplete(parser)
elif '_ARGCOMPLETE' in os.environ:
logger.error(
'Argument completion requested but the "argcomplete" module is '
'not installed. It can be obtained from '
'https://pypi.python.org/pypi/argcomplete or in the '
'python3-argcomplete package.'
)
sys.exit(1)
def post_parse(parsed_args):
if parsed_args.path2 is None:
# warn about unusual flags in this mode
ineffective_flags = [
f
for x in group3._group_actions
if getattr(parsed_args, x.dest) != x.default
for f in x.option_strings
]
if ineffective_flags:
logger.warning(
'Loading diff instead of calculating it, but '
'diff-calculation flags were given; they will be ignored:'
)
logger.warning(ineffective_flags)
return parser, post_parse
class HelpFormatter(argparse.HelpFormatter):
def format_help(self, *args, **kwargs):
val = super().format_help(*args, **kwargs)
# Only append the file formats if --help is passed.
if not set(sys.argv) & {'--help', '-h'}:
return val
def append(title, content, indent=24, max_width=78):
nonlocal val
wrapped = textwrap.fill(content, max_width - indent)
val += '\n{}:\n{}\n'.format(
title, textwrap.indent(wrapped, ' ' * indent)
)
descriptions = list(sorted(ComparatorManager().get_descriptions()))
append(
'file formats supported',
'{} and {}.\n'.format(
', '.join(descriptions[:-1]), descriptions[-1]
),
)
append('diffoscope homepage', '<https://diffoscope.org/>')
append(
'bugs/issues',
'<https://salsa.debian.org/reproducible-builds/diffoscope/issues>',
max_width=sys.maxsize,
)
return val
class RangeCompleter:
def __init__(self, start, end=0, divisions=16):
if end < start:
tmp = end
end = start
start = tmp
self.choices = range(
start, end + 1, int((end - start + 1) / divisions)
)
def __call__(self, prefix, **kwargs):
return (str(i) for i in self.choices if str(i).startswith(prefix))
class ListToolsAction(argparse.Action):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.only_missing = False
def __call__(self, parser, namespace, os_override, option_string=None):
# Ensure all comparators are imported so tool_required.all is
# populated.
ComparatorManager().reload()
external_tools = sorted(tool_required.all)
if self.only_missing:
external_tools = [
tool
for tool in external_tools
if not tool_check_installed(tool)
]
print("External-Tools-Required: ", end='')
print(', '.join(external_tools))
current_os = get_current_os()
os_list = [current_os] if (current_os in OS_NAMES) else iter(OS_NAMES)
if os_override:
os_list = [os_override]
for os_ in os_list:
tools = set()
print("Available-in-{}-packages: ".format(OS_NAMES[os_]), end='')
for x in external_tools:
try:
tools.add(EXTERNAL_TOOLS[x][os_])
except KeyError:
pass
print(', '.join(sorted(tools)))
print("Missing-Python-Modules: ", end='')
print(', '.join(sorted(python_module_missing.modules)))
sys.exit(0)
class ListMissingToolsAction(ListToolsAction):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.only_missing = True
class ListDebianSubstvarsAction(argparse._StoreTrueAction):
def __call__(self, *args, **kwargs):
# Attempt to import all comparators so tool_required.all is as
# populated as possible...
ComparatorManager().reload()
# ... however for the generated substvar to be effective/deterministic
# regardless of the currently installed packages we special-case some
# tools (NB. not package names) as their modules may not have been
# imported by the `ComparatorManager().reload()` call above. (#908072)
tools = set(
('gpg', 'rpm2cpio') # comparators/debian.py # comparators/rpm.py
)
tools.update(tool_required.all)
packages = set()
for x in tools:
try:
packages.add(EXTERNAL_TOOLS[x]['debian'])
except KeyError:
pass
# Exclude "Required" packages
for x in ('gzip', 'tar', 'coreutils', 'diffutils', 'findutils'):
packages.discard(x)
print('diffoscope:Recommends={}'.format(', '.join(sorted(packages))))
sys.exit(0)
def maybe_set_limit(config, parsed_args, key):
# apply limits affected by "no-default-limits"
v = getattr(parsed_args, key)
if v is not None:
setattr(config, key, float("inf") if v == 0 else v)
elif parsed_args.no_default_limits:
setattr(config, key, float("inf"))
def run_diffoscope(parsed_args):
"""
(This should not be considered a stable API suitable for external
consumption, and the lack of configuration of globals may result in
unpredictable behaviour.)
"""
ProfileManager().setup(parsed_args)
PresenterManager().configure(parsed_args)
logger.debug("Starting diffoscope %s", VERSION)
if not tlsh:
logger.warning(
'Fuzzy-matching is currently disabled as the "tlsh" module is unavailable.'
)
maybe_set_limit(Config(), parsed_args, "max_report_size")
maybe_set_limit(Config(), parsed_args, "max_text_report_size")
maybe_set_limit(Config(), parsed_args, "max_diff_block_lines")
Config().max_page_size = parsed_args.max_page_size
# TODO: old flag kept for backwards-compat, drop 6 months after v84
if parsed_args.max_report_size_child is not None:
logger.warning(
"Detected deprecated flag --max-report-size-child; use --max-page-size-child instead."
)
Config().max_page_size_child = parsed_args.max_report_size_child
Config().max_page_size_child = parsed_args.max_page_size_child
# TODO: old flag kept for backwards-compat, drop 6 months after v84
if parsed_args.max_diff_block_lines_parent is not None:
logger.warning(
"Detected deprecated flag --max-diff-block-lines-parent; use --max-page-diff-block-lines instead."
)
logger.warning(
"Note that the new flag --max-page-diff-block-lines also applies to --html output."
)
Config().max_page_diff_block_lines = (
parsed_args.max_diff_block_lines_parent
)
Config().max_page_diff_block_lines = parsed_args.max_page_diff_block_lines
maybe_set_limit(Config(), parsed_args, "max_diff_block_lines_saved")
maybe_set_limit(Config(), parsed_args, "max_diff_input_lines")
Config().max_container_depth = parsed_args.max_container_depth
Config().use_dbgsym = parsed_args.use_dbgsym
Config().force_details = parsed_args.force_details
Config().fuzzy_threshold = parsed_args.fuzzy_threshold
Config().new_file = parsed_args.new_file
Config().excludes = parsed_args.excludes
Config().exclude_commands = parsed_args.exclude_commands
Config().exclude_directory_metadata = (
parsed_args.exclude_directory_metadata
)
Config().compute_visual_diffs = PresenterManager().compute_visual_diffs()
Config().check_constraints()
tool_prepend_prefix(
parsed_args.tool_prefix_binutils,
*"ar as ld ld.bfd nm objcopy objdump ranlib readelf strip".split(),
)
set_path()
set_locale()
path1, path2 = parsed_args.path1, parsed_args.path2
if path2 is None:
logger.debug("Loading diff from stdin")
if path1 == '-':
difference = load_diff(sys.stdin, "stdin")
else:
try:
difference = load_diff_from_path(path1)
except json.JSONDecodeError:
traceback.print_exc()
print(
"E: Could not parse diff from '{}'. (Are you sure you"
"only meant to specify a single file?)".format(path1),
file=sys.stderr,
)
return 1
else:
if Config().exclude_directory_metadata in ('auto', None):
# Default to ignoring metadata directory...
Config().exclude_directory_metadata = 'yes'
if os.path.isdir(path1) and os.path.isdir(path2):
# ... except if we passed two directories.
Config().exclude_directory_metadata = 'no'
logger.debug('Starting comparison')
with Progress():
with profile('main', 'outputs'):
difference = compare_root_paths(path1, path2)
ProgressManager().finish()
# Generate an empty, dummy diff to write, saving the exit code first.
has_differences = bool(difference is not None)
if difference is None and parsed_args.output_empty:
difference = Difference(None, path1, path2)
with profile('main', 'outputs'):
PresenterManager().output(difference, parsed_args, has_differences)
return 1 if has_differences else 0
def sigterm_handler(signo, stack_frame):
clean_all_temp_files()
os._exit(2)
def main(args=None):
if args is None:
args = sys.argv[1:]
signal.signal(signal.SIGTERM, sigterm_handler)
try:
import libarchive
except (ImportError, AttributeError):
traceback.print_exc()
python_module_missing('libarchive')
print(
"\nMissing or incomplete libarchive module. Try installing your "
"system's 'libarchive' package.",
file=sys.stderr,
)
sys.exit(2)
# Rewrite/support some legacy argument styles
for val, repl in (
('--exclude-directory-metadata', '--exclude-directory-metadata=yes'),
('--no-exclude-directory-metadata', '--exclude-directory-metadata=no'),
):
args = [repl if x == val else x for x in args]
parsed_args = None
try:
with profile('main', 'parse_args'):
parser, post_parse = create_parser()
parsed_args = parser.parse_args(args)
log_handler = ProgressManager().setup(parsed_args)
with setup_logging(parsed_args.debug, log_handler) as logger:
post_parse(parsed_args)
sys.exit(run_diffoscope(parsed_args))
except OSError as e:
if e.errno != errno.ENOSPC:
raise
logger.error('No space left on device. Diffoscope exiting.')
sys.exit(2)
except KeyboardInterrupt:
logger.error('Keyboard Interrupt')
sys.exit(2)
except BrokenPipeError:
sys.exit(2)
except Exception:
sys.stderr.buffer.write(line_eraser())
traceback.print_exc()
if parsed_args and parsed_args.debugger:
import pdb
pdb.post_mortem()
sys.exit(2)
finally:
# Helps our tests run more predictably - some of them call main()
# which sets Config() values.
Config().reset()
with profile('main', 'cleanup'):
clean_all_temp_files()
# Print profiling output at the very end
if parsed_args is not None:
ProfileManager().finish(parsed_args)
if __name__ == '__main__':
main()