Skip to content
Commits on Source (2)
diffoscope (149) UNRELEASED; urgency=medium
diffoscope (149) unstable; urgency=medium
* WIP (generated upon release).
[ Chris Lamb ]
* Update tests for file 5.39. (Closes: reproducible-builds/diffoscope#179)
* Downgrade the tlsh warning message to an "info" level warning.
(Closes: #888237, reproducible-builds/diffoscope#29)
* Use the CSS "word-break" property over manually adding U+200B zero-width
spaces that make copy-pasting cumbersome.
(Closes: reproducible-builds/diffoscope!53)
* Codebase improvements:
- Drop some unused imports from the previous commit.
- Prevent an unnecessary .format() when rendering difference comments.
- Use a semantic "AbstractMissingType" type instead of remembering to check
for both "missing" files and missing containers.
[ Jean-Romain Garnier ]
* Allow user to mask/filter reader output via --diff-mask=REGEX.
(MR: reproducible-builds/diffoscope!51)
* Make --html-dir child pages open in new window to accommodate new web
browser content security policies.
* Fix the --new-file option when comparing directories by merging
DirectoryContainer.compare and Container.compare.
(Closes: reproducible-builds/diffoscope#180)
* Fix zsh completion for --max-page-diff-block-lines.
[ Mattia Rizzolo ]
* Do not warn about missing tlsh during tests.
-- Chris Lamb <lamby@debian.org> Fri, 19 Jun 2020 11:39:15 +0100
-- Chris Lamb <lamby@debian.org> Fri, 26 Jun 2020 15:57:41 +0100
diffoscope (148) unstable; urgency=medium
......
......@@ -31,6 +31,7 @@ _diffoscope() {
'--exclude=[Exclude files whose names (including any directory part) match %(metavar)s. Use this option to ignore files based on their names.]:' \
'--exclude-command=[Exclude commands that match %(metavar)s. For example "^readelf.*\s--debug-dump=info" can take a long time and differences here are likely secondary differences caused by something represented elsewhere. Use this option to disable commands that use a lot of resources.]:' \
'--exclude-directory-metadata=[Exclude directory metadata. Useful if comparing files whose filesystem-level metadata is not intended to be distributed to other systems. This is true for most distributions package builders, but not true for the output of commands such as `make install`. Metadata of archive members remain un-excluded except if "recursive" choice is set. Use this option to ignore permissions, timestamps, xattrs etc. Default: False if comparing two directories, else True. Note that "file" metadata actually a property of its containing directory, and is not relevant when distributing the file across systems.]:--exclude-directory-metadata :(auto yes no recursive)' \
'--diff-mask=[Replace/unify substrings that match regular expression %(metavar)s from output strings before applying diff. For example, to filter out a version number or changed path.]:' \
'--fuzzy-threshold=[Threshold for fuzzy-matching (0 to disable, %(default)s is default, 400 is high fuzziness)]:' \
'--tool-prefix-binutils=[Prefix for binutils program names, e.g. "aarch64-linux-gnu-" for a foreign-arch binary or "g" if you"re on a non-GNU system.]:' \
'--max-diff-input-lines=[Maximum number of lines fed to diff(1) (0 to disable, default: 4194304)]:' \
......
......@@ -18,4 +18,4 @@
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
VERSION = "148"
VERSION = "149"
......@@ -55,6 +55,7 @@ class Config:
self.max_text_report_size = 0
self.difftool = None
self.diff_masks = ()
self.new_file = False
self.fuzzy_threshold = 60
self.enforce_constraints = True
......
......@@ -18,10 +18,12 @@
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import re
import signal
import hashlib
import logging
import subprocess
import functools
from .config import Config
from .profiling import profile
......@@ -31,6 +33,39 @@ logger = logging.getLogger(__name__)
DIFF_CHUNK = 4096
@functools.lru_cache(maxsize=128)
def compile_string_regex(regex_str):
return re.compile(regex_str)
@functools.lru_cache(maxsize=128)
def compile_bytes_regex(regex_str):
return re.compile(regex_str.encode("utf-8"))
def filter_reader(buf, additional_filter=None):
# Apply the passed filter first, for example Command.filter
if additional_filter:
buf = additional_filter(buf)
# No need to work on empty lines
if not buf:
return buf
# Use either str or bytes objects depending on buffer type
if isinstance(buf, str):
compile_func = compile_string_regex
replace = "[filtered]"
else:
compile_func = compile_bytes_regex
replace = b"[filtered]"
for regex in Config().diff_masks:
buf = compile_func(regex).sub(replace, buf)
return buf
def from_raw_reader(in_file, filter=None):
def feeder(out_file):
max_lines = Config().max_diff_input_lines
......@@ -45,7 +80,7 @@ def from_raw_reader(in_file, filter=None):
for buf in in_file:
line_count += 1
out = buf if filter is None else filter(buf)
out = filter_reader(buf, filter)
if h is not None:
h.update(out)
......@@ -113,7 +148,9 @@ def from_command(command):
def from_text(content):
def feeder(f):
for offset in range(0, len(content), DIFF_CHUNK):
f.write(content[offset : offset + DIFF_CHUNK].encode("utf-8"))
buf = filter_reader(content[offset : offset + DIFF_CHUNK])
f.write(buf.encode("utf-8"))
return content and content[-1] == "\n"
return feeder
......
......@@ -312,6 +312,16 @@ def create_parser():
"and is not relevant when distributing the file across "
"systems.",
)
group3.add_argument(
"--diff-mask",
metavar="REGEX_PATTERN",
dest="diff_masks",
action="append",
default=[],
help="Replace/unify substrings that match regular expression "
"%(metavar)s from output strings before applying diff. For example, to "
"filter out a version number or changed path.",
)
group3.add_argument(
"--fuzzy-threshold",
type=int,
......@@ -612,6 +622,7 @@ def configure(parsed_args):
Config().exclude_directory_metadata = (
parsed_args.exclude_directory_metadata
)
Config().diff_masks = parsed_args.diff_masks
Config().compute_visual_diffs = PresenterManager().compute_visual_diffs()
......
# -*- coding: utf-8 -*-
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2020 Chris Lamb <lamby@debian.org>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import os
import re
import pytest
from diffoscope.main import main
def run(capsys, *args):
with pytest.raises(SystemExit) as exc:
main(
args
+ tuple(
os.path.join(os.path.dirname(__file__), "data", x)
for x in ("test1.tar", "test2.tar")
)
)
out, err = capsys.readouterr()
assert err == ""
return exc.value.code, out
def test_none(capsys):
ret, out = run(capsys)
# Make sure the output doesn't contain any [filtered]
assert re.search(r"\[filtered\]", out) is None
assert ret == 1
def test_all(capsys):
ret, out = run(capsys, "--diff-mask=.*")
# Make sure the correct sections were filtered
assert "file list" not in out
assert "dir/link" not in out
# Make sure the output contains only [filtered]
# Lines of content start with "│ ", and then either have a +, a - or a space
# depending on the type of change
# It should then only contain "[filtered]" until the end of the string
assert re.search(r"│\s[\s\+\-](?!(\[filtered\])+)", out) is None
assert ret == 1
def test_specific(capsys):
ret, out = run(capsys, "--diff-mask=^Lorem")
# Make sure only the Lorem ipsum at the start of the line was filtered
assert "[filtered] ipsum dolor sit amet" in out
assert '"Lorem ipsum"' in out
assert ret == 1
# -*- coding: utf-8 -*-
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2020 Chris Lamb <lamby@debian.org>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import os
import re
import pytest
from diffoscope.main import main
def run(capsys, *args):
with pytest.raises(SystemExit) as exc:
main(
args
+ tuple(
os.path.join(os.path.dirname(__file__), "data", x)
for x in ("test1.tar", "test2.tar")
)
)
out, err = capsys.readouterr()
assert err == ""
return exc.value.code, out
def test_none(capsys):
ret, out = run(capsys)
# Make sure the output doesn't contain any [filtered]
assert re.search(r"\[filtered\]", out) is None
assert ret == 1
def test_all(capsys):
ret, out = run(capsys, "--diff-mask=.*")
# Make sure the correct sections were filtered
assert "file list" not in out
assert "dir/link" not in out
# Make sure the output contains only [filtered]
# Lines of content start with "│ ", and then either have a +, a - or a space
# depending on the type of change
# It should then only contain "[filtered]" until the end of the string
assert re.search(r"│\s[\s\+\-](?!(\[filtered\])+)", out) is None
assert ret == 1
def test_specific(capsys):
ret, out = run(capsys, "--diff-mask=^Lorem")
# Make sure only the Lorem ipsum at the start of the line was filtered
assert "[filtered] ipsum dolor sit amet" in out
assert '"Lorem ipsum"' in out
assert ret == 1