Commit 9d804217 authored by Ximin Luo's avatar Ximin Luo

html-dir: In html-dir output, split large diffs across several child pages

Also add some flags so html-dir behaviour is easier to reason about.

1. Previously, max-report-size would apply to both parent and child pages,
which didn't work well if you disabled this limit.

Instead, we now add an extra --max-report-child-size that remains in effect
even with --no-default-limits.

2. Previously, separate-file-diff-size would affect the parent page in an
unsmooth way: if a diff block had N lines, the parent page would contain N
lines, but if the diff block had N+1 lines, then 0 would be shown on the parent
page. This gives a visually counterintuitive result where a larger diff would
have less presence on the parent overview page.

Instead, we now change this to --max-diff-block-lines-parent which has a
smoother behaviour. If the diff block has N+1 lines, then N would be shown on
the parent page and 1 would be shown on the child page. This could be smoothed
out further, but we'll leave this complexity for the future.
parent a12ded4d
......@@ -27,10 +27,11 @@ class classproperty(property):
class Config(object):
def __init__(self):
self._max_diff_block_lines = 50
self._max_diff_block_lines = 1024
self._max_diff_block_lines_parent = 50
self._max_diff_input_lines = 2 ** 20 # GNU diff cannot process arbitrary large files :(
self._max_report_size = 2000 * 2 ** 10 # 2000 kB
self._separate_file_diff_size = 200 * 2 ** 10 # 200kB
self._max_report_child_size = 500 * 2 ** 10
self._fuzzy_threshold = 60
self._new_file = False
......@@ -40,6 +41,11 @@ class Config(object):
cls._general_config = Config()
return cls._general_config
def _check_constraints(self):
if self._max_diff_block_lines < self._max_diff_block_lines_parent:
raise ValueError("max_diff_block_lines (%s) cannot be smaller than max_diff_block_lines_parent (%s)" %
(self._max_diff_block_lines, self._max_diff_block_lines_parent))
def max_diff_block_lines(self):
return self._max_diff_block_lines
......@@ -47,6 +53,16 @@ class Config(object):
def max_diff_block_lines(self, value):
self._max_diff_block_lines = value
def max_diff_block_lines_parent(self):
return self._max_diff_block_lines_parent
def max_diff_block_lines_parent(self, value):
self._max_diff_block_lines_parent = value
def max_diff_input_lines(self):
......@@ -65,12 +81,12 @@ class Config(object):
self._max_report_size = value
def separate_file_diff_size(self):
return self._separate_file_diff_size
def max_report_child_size(self):
return self._max_report_child_size
def separate_file_diff_size(self, value):
self._separate_file_diff_size = value
def max_report_child_size(self, value):
self._max_report_child_size = value
def fuzzy_threshold(self):
......@@ -67,32 +67,49 @@ def create_parser():
parser.add_argument('--text', metavar='output', dest='text_output',
help='write plain text output to given file (use - for stdout)')
parser.add_argument('--no-default-limits', action='store_true', default=False,
help='Disable all default limits.')
help='Disable most default limits. Note that text '
'output already ignores most of these.')
parser.add_argument('--max-report-size', metavar='BYTES',
dest='max_report_size', type=int,
help='maximum bytes written in report (default: %d, 0 to disable)' %
help='Maximum bytes written in report. In html-dir '
'output, this is the max bytes of the parent page. '
'(0 to disable, default: %d)' %
Config.general.max_report_size, 200000)
parser.add_argument('--separate-file-diff-size', metavar='BYTES',
dest='separate_file_diff_size', type=int,
help='diff size to load diff on demand, with --html-dir (default: %(default)s)',
Config.general.separate_file_diff_size, 20000)
parser.add_argument('--max-diff-block-lines', dest='max_diff_block_lines', type=int,
help='maximum number of lines per diff block (default: %d, 0 to disable)' %
parser.add_argument('--max-report-child-size', metavar='BYTES',
dest='max_report_child_size', type=int,
help='In html-dir output, this is the max bytes of '
'each child page. (0 to disable, default: %(default)s, '
'remaining in effect even with --no-default-limits)',
Config.general.max_report_child_size, 50000)
parser.add_argument('--max-diff-block-lines', dest='max_diff_block_lines',
metavar='LINES', type=int,
help='Maximum number of lines output per diff block, '
'across the whole report. (0 to disable, default: %d)' %
Config.general.max_diff_block_lines, 5)
parser.add_argument('--max-diff-input-lines', dest='max_diff_input_lines', type=int,
help='maximum number of lines fed to diff (default: %d, 0 to disable)' %
parser.add_argument('--max-diff-block-lines-parent', dest='max_diff_block_lines_parent',
metavar='LINES', type=int,
help='In --html-dir output, this is maximum number of '
'lines output per diff block on the parent page, '
'before spilling it into child pages. (0 to disable, '
'default: %(default)s, remaining in effect even with '
Config.general.max_diff_block_lines_parent, 200)
parser.add_argument('--max-diff-input-lines', dest='max_diff_input_lines',
metavar='LINES', type=int,
help='Maximum number of lines fed to diff(1). '
'(0 to disable, default: %d)' %
Config.general.max_diff_input_lines, 5000)
parser.add_argument('--fuzzy-threshold', dest='fuzzy_threshold', type=int,
help='threshold for fuzzy-matching '
'(0 to disable, %d is default, 400 is high fuzziness)' %
'(0 to disable, %(default)s is default, 400 is high fuzziness)',
400, 20)
parser.add_argument('--new-file', dest='new_file', action='store_true',
......@@ -174,7 +191,9 @@ def run_diffoscope(parsed_args):
if not tlsh and Config.general.fuzzy_threshold != parsed_args.fuzzy_threshold:
logger.warning('Fuzzy-matching is currently disabled as the “tlsh” module is unavailable.')
maybe_set_limit(Config.general, parsed_args, "max_report_size")
Config.general.separate_file_diff_size = parsed_args.separate_file_diff_size
maybe_set_limit(Config.general, parsed_args, "max_report_child_size")
# need to set them in this order due to Config._check_constraints
maybe_set_limit(Config.general, parsed_args, "max_diff_block_lines_parent")
maybe_set_limit(Config.general, parsed_args, "max_diff_block_lines")
maybe_set_limit(Config.general, parsed_args, "max_diff_input_lines")
Config.general.fuzzy_threshold = parsed_args.fuzzy_threshold
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment