Commit 99c6c515 authored by Joachim Breitner's avatar Joachim Breitner Committed by Jérémy Bobbio

Multi-file HTML output

In order to make it feasible to create HTML output with large diffs,
this adds a new presenter mode (--html-dir) which spreads the output
over multiple files in a directory.

In particular, the table presenting a diff that is larger than the
number of bytes specified via --separate-file-diff-size (200kB by
default) is writen to a separate file and loaded on demand using

By default, JQuery is symlinked into the output directory from
/usr/share/javascript/jquery/jquery.js (install libjs-jquery!), but an
alternative location (e.g. /javascript/jquery/jquery.js) can be
specified using the --jquery command line parameter.

Closes: #806891
parent 21dc4a74
......@@ -31,7 +31,8 @@ Depends: python3-pkg-resources,
Recommends: ${diffoscope:Recommends},
Suggests: libjs-jquery
Breaks: debbindiff (<< 29)
Replaces: debbindiff (<< 29)
Description: in-depth comparison of files, archives, and directories
......@@ -34,6 +34,7 @@ from diffoscope import logger, VERSION, set_locale, clean_all_temp_files
import diffoscope.comparators
from diffoscope.config import Config
from diffoscope.presenters.html import output_html
from diffoscope.presenters.html import output_html_directory
from diffoscope.presenters.text import output_text
......@@ -51,6 +52,8 @@ def create_parser():
help='Open the python debugger in case of crashes.')
parser.add_argument('--html', metavar='output', dest='html_output',
help='write HTML report to given file (use - for stdout)')
parser.add_argument('--html-dir', metavar='output', dest='html_output_directory',
help='write multi-file HTML report to given directory')
parser.add_argument('--text', metavar='output', dest='text_output',
help='write plain text output to given file (use - for stdout)')
parser.add_argument('--max-report-size', metavar='BYTES',
......@@ -58,6 +61,11 @@ def create_parser():
help='maximum bytes written in report (default: %d)' %
parser.add_argument('--separate-file-diff-size', metavar='BYTES',
dest='separate_file_diff_size', type=int,
help='diff size to load diff on demand, with --html-dir (default: %d)' %
parser.add_argument('--max-diff-block-lines', dest='max_diff_block_lines', type=int,
help='maximum number of lines per diff block (default: %d)' %
......@@ -75,6 +83,8 @@ def create_parser():
help='treat absent files as empty')
parser.add_argument('--css', metavar='url', dest='css_url',
help='link to an extra CSS for the HTML report')
parser.add_argument('--jquery', metavar='url', dest='jquery_url',
help='link to the jquery url, with --html-dir. By default, a symlink to /usr/share/javascript/jquery/jquery.js is created')
parser.add_argument('file1', help='first file to compare')
parser.add_argument('file2', help='second file to compare')
if not tlsh:
......@@ -128,6 +138,7 @@ def run_diffoscope(parsed_args):
Config.general.max_diff_block_lines = parsed_args.max_diff_block_lines
Config.general.max_diff_input_lines = parsed_args.max_diff_input_lines
Config.general.max_report_size = parsed_args.max_report_size
Config.general.separate_file_diff_size = parsed_args.separate_file_diff_size
Config.general.fuzzy_threshold = parsed_args.fuzzy_threshold
Config.general.new_file = parsed_args.new_file
if parsed_args.debug:
......@@ -136,10 +147,15 @@ def run_diffoscope(parsed_args):
difference = diffoscope.comparators.compare_root_paths(
parsed_args.file1, parsed_args.file2)
if difference:
# no output desired? print text
if not any((parsed_args.text_output, parsed_args.html_output, parsed_args.html_output_directory)):
parsed_args.text_output = "-"
if parsed_args.html_output:
with make_printer(parsed_args.html_output) as print_func:
output_html(difference, css_url=parsed_args.css_url, print_func=print_func)
if (parsed_args.text_output and parsed_args.text_output != parsed_args.html_output) or not parsed_args.html_output:
if parsed_args.html_output_directory:
output_html_directory(parsed_args.html_output_directory, difference, css_url=parsed_args.css_url, jquery_url=parsed_args.jquery_url)
if parsed_args.text_output:
with make_printer(parsed_args.text_output or '-') as print_func:
output_text(difference, print_func=print_func)
return 1
......@@ -30,6 +30,7 @@ class Config(object):
self._max_diff_block_lines = 50
self._max_diff_input_lines = 100000 # GNU diff cannot process arbitrary large files :(
self._max_report_size = 2000 * 2 ** 10 # 2000 kB
self._separate_file_diff_size = 200 * 2 ** 10 # 200kB
self._fuzzy_threshold = 60
self._new_file = False
......@@ -63,6 +64,14 @@ class Config(object):
def max_report_size(self, value):
self._max_report_size = value
def separate_file_diff_size(self):
return self._separate_file_diff_size
def separate_file_diff_size(self, value):
self._separate_file_diff_size = value
def fuzzy_threshold(self):
return self._fuzzy_threshold
......@@ -34,6 +34,11 @@
import cgi
import re
import sys
import os
import os.path
import codecs
import hashlib
from contextlib import contextmanager
from xml.sax.saxutils import escape
from diffoscope import logger, VERSION
from diffoscope.config import Config
......@@ -55,8 +60,8 @@ DIFFOFF = "\x02"
HEADER = """<!DOCTYPE html>
<meta charset="utf-8">
<meta name="generator" content="diffoscope">
<meta charset="utf-8" />
<meta name="generator" content="diffoscope" />
<link rel="icon" type="image/png" href="data:image/png;base64,%(favicon)s" />
......@@ -138,6 +143,9 @@ HEADER = """<!DOCTYPE html>
.diffheader:hover .anchor {
display: inline;
.ondemand {
text-align: center;
......@@ -150,6 +158,23 @@ FOOTER = """
<script src="%(jquery_url)s"></script>
<script type="text/javascript">
$(function() {
$("div.ondemand a").on('click', function (){
var filename = $(this).attr('href');
var div = $(this).parent();
div.text('... loading ...');
div.load(filename + " table", function() {
return false;
class PrintLimitReached(Exception):
......@@ -381,7 +406,7 @@ def empty_buffer(print_func):
buf = []
def output_unified_diff(print_func, unified_diff):
def output_unified_diff_table(print_func, unified_diff):
global add_cpt, del_cpt
global line1, line2
global hunk_off1, hunk_size1, hunk_off2, hunk_size2
......@@ -464,8 +489,24 @@ def output_unified_diff(print_func, unified_diff):
print_func(u"</table>", force=True)
def output_unified_diff(print_func, css_url, directory, unified_diff):
if directory and len(unified_diff) > Config.general.separate_file_diff_size:
# open a new file for this table
filename="%s.html" % hashlib.md5(unified_diff.encode('utf-8')).hexdigest()
logger.debug('separate html output for diff of size %d', len(unified_diff))
with file_printer(directory, filename) as new_print_func:
output_header(css_url, new_print_func)
output_unified_diff_table(new_print_func, unified_diff)
print_func("<div class='ondemand'>\n")
print_func("... <a href='%s'>load diff</a> ...\n" % escape(filename))
output_unified_diff_table(print_func, unified_diff)
def output_difference(difference, print_func, parents):
def output_difference(difference, print_func, css_url, directory, parents):
logger.debug('html output for %s', difference.source1)
sources = parents + [difference.source1]
print_func(u"<div class='difference'>")
......@@ -487,9 +528,9 @@ def output_difference(difference, print_func, parents):
% u'<br />'.join(map(escape, difference.comments)))
if difference.unified_diff:
output_unified_diff(print_func, difference.unified_diff)
output_unified_diff(print_func, css_url, directory, difference.unified_diff)
for detail in difference.details:
output_difference(detail, print_func, sources)
output_difference(detail, print_func, css_url, directory, sources)
except PrintLimitReached:
logger.debug('print limit reached')
......@@ -507,16 +548,59 @@ def output_header(css_url, print_func):
'css_link': css_link,
def output_footer(print_func):
print_func(FOOTER % {'version': VERSION}, force=True)
def output_html(difference, css_url=None, print_func=None):
Default presenter, all in one HTML file
if print_func is None:
print_func = print
print_func = create_limited_print_func(print_func, Config.general.max_report_size)
output_header(css_url, print_func)
output_difference(difference, print_func, [])
output_difference(difference, print_func, css_url, None, [])
except PrintLimitReached:
logger.debug('print limit reached')
print_func(u"<div class='error'>Max output size reached.</div>",
print_func(FOOTER % {'version': VERSION}, force=True)
def file_printer(directory, filename):
with,filename), 'w', encoding='utf-8') as f:
print_func = f.write
print_func = create_limited_print_func(print_func, Config.general.max_report_size)
yield print_func
def output_html_directory(directory, difference, css_url=None, jquery_url=None):
Multi-file presenter. Writes to a directory, and puts large diff tables
into files of their own.
This uses jQuery. By default it uses /usr/share/javascript/jquery/jquery.js
(symlinked, so that you can still share the result over HTTP).
You can also pass --jquery URL to diffoscope to use a central jQuery copy.
if not os.path.exists(directory):
if not jquery_url:
jquery_symlink = os.path.join(directory, "jquery.js")
if not os.path.exists(jquery_symlink):
os.symlink("/usr/share/javascript/jquery/jquery.js", jquery_symlink)
jquery_url = "./jquery.js"
with file_printer(directory, "index.html") as print_func:
print_func = create_limited_print_func(print_func, Config.general.max_report_size)
output_header(css_url, print_func)
output_difference(difference, print_func, css_url, directory, [])
except PrintLimitReached:
logger.debug('print limit reached')
print_func(u"<div class='error'>Max output size reached.</div>",
print_func(SCRIPTS % {'jquery_url': escape(jquery_url)}, force=True)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment