Commits (3)
......@@ -38,6 +38,7 @@ Build-Depends:
gnupg-utils <!nocheck>,
hdf5-tools <!nocheck>,
help2man,
html2text <!nocheck>,
imagemagick <!nocheck>,
jsbeautifier <!nocheck>,
libarchive-tools <!nocheck>,
......
......@@ -7,7 +7,7 @@
# $ mv debian/tests/control.tmp debian/tests/control
Tests: pytest-with-recommends
Depends: python3-all, diffoscope, black, python3-pytest, python3-h5py, file, linux-image-amd64 [amd64] | linux-image-generic [amd64], abootimg, acl, apksigner, apktool [!ppc64el !s390x], binutils-multiarch, bzip2, caca-utils, colord, coreboot-utils, db-util, default-jdk-headless | default-jdk | java-sdk, device-tree-compiler, docx2txt, e2fsprogs, enjarify, ffmpeg, fontforge-extras, fonttools, fp-utils [!ppc64el !s390x], genisoimage, gettext, ghc, ghostscript, giflib-tools, gnumeric, gnupg, gnupg-utils, hdf5-tools, imagemagick, jsbeautifier, libarchive-tools, libxmlb-dev, llvm, lz4 | liblz4-tool, mono-utils, ocaml-nox, odt2txt, oggvideotools [!s390x], openssh-client, openssl, pgpdump, poppler-utils, procyon-decompiler, python3-pdfminer, r-base-core, rpm2cpio, sng, sqlite3, squashfs-tools, tcpdump, u-boot-tools, unzip, wabt, xmlbeans, xxd, xz-utils, zip, zstd, androguard, python3-argcomplete, python3-binwalk, python3-defusedxml, python3-distro, python3-guestfs, python3-jsondiff, python3-progressbar, python3-pypdf2, python3-debian, python3-pyxattr, python3-rpm, python3-tlsh
Depends: python3-all, diffoscope, black, python3-pytest, python3-h5py, file, linux-image-amd64 [amd64] | linux-image-generic [amd64], abootimg, acl, apksigner, apktool [!ppc64el !s390x], binutils-multiarch, bzip2, caca-utils, colord, coreboot-utils, db-util, default-jdk-headless | default-jdk | java-sdk, device-tree-compiler, docx2txt, e2fsprogs, enjarify, ffmpeg, fontforge-extras, fonttools, fp-utils [!ppc64el !s390x], genisoimage, gettext, ghc, ghostscript, giflib-tools, gnumeric, gnupg, gnupg-utils, hdf5-tools, html2text, imagemagick, jsbeautifier, libarchive-tools, libxmlb-dev, llvm, lz4 | liblz4-tool, mono-utils, ocaml-nox, odt2txt, oggvideotools [!s390x], openssh-client, openssl, pgpdump, poppler-utils, procyon-decompiler, python3-pdfminer, r-base-core, rpm2cpio, sng, sqlite3, squashfs-tools, tcpdump, u-boot-tools, unzip, wabt, xmlbeans, xxd, xz-utils, zip, zstd,
Tests: pytest
Depends: python3-all, diffoscope, python3-pytest, python3-h5py, file, python3-tlsh
......
......@@ -54,6 +54,7 @@ class ComparatorManager:
("xmlb.XMLBFile",),
("openssl.Pkcs7File",),
("openssl.MobileProvisionFile",),
("html.HtmlFile",),
("text.TextFile",),
("bzip2.Bzip2File",),
("cpio.CpioFile",),
......
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2022 Chris Lamb <lamby@debian.org>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import re
from diffoscope.difference import Difference
from diffoscope.exc import RequiredToolNotFound
from diffoscope.tools import tool_required
from .text import TextFile
from .utils.command import Command
class Htmltotext(Command):
@tool_required("html2text")
def cmdline(self):
return ["html2text", self.path]
class HtmlFile(TextFile):
DESCRIPTION = "HTML files (.html)"
FILE_TYPE_RE = re.compile(r"^HTML document")
def compare(self, other, source=None):
difference = super().compare(other, source)
# Show text-only differences as a sub-diff.
try:
text = Difference.from_operation(Htmltotext, self.path, other.path)
if text is not None:
difference.add_details([text])
except RequiredToolNotFound as exc: # noqa
difference.add_comment(exc.get_comment())
return difference
......@@ -82,6 +82,7 @@ EXTERNAL_TOOLS = {
},
"gzip": {"debian": "gzip", "arch": "gzip", "guix": "gzip"},
"h5dump": {"debian": "hdf5-tools", "arch": "hdf5", "guix": "hdf5"},
"html2text": {"debian": "html2text"},
"identify": {
"debian": "imagemagick",
"arch": "imagemagick",
......
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2022 Chris Lamb <lamby@debian.org>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import pytest
from diffoscope.comparators.html import HtmlFile
from ..utils.data import assert_diff, load_fixture
html1 = load_fixture("test1.html")
html2 = load_fixture("test2.html")
def test_identification(html1, html2):
assert isinstance(html1, HtmlFile)
assert isinstance(html2, HtmlFile)
def test_no_differences(html1):
assert html1.compare(html1) is None
@pytest.fixture
def differences(html1, html2):
return html1.compare(html2)
def test_diff(differences):
assert_diff(differences, "html_expected_diff")
assert_diff(differences.details[0], "html_text_expected_diff")
@@ -1,17 +1,17 @@
<html>
<head>
-<title>foo</title>
+<title>bar</title>
<style>
<!--
BODY {
-BACKGROUND: #000000;
-COLOR: #FFFFFF;
+BACKGROUND: #FFFFFF;
+COLOR: #000000;
-->
</style>
</head>
<body>
<p>
-foo
+bar
</p>
</body>
</html>
<html>
<head>
<title>foo</title>
<style>
<!--
BODY {
BACKGROUND: #000000;
COLOR: #FFFFFF;
-->
</style>
</head>
<body>
<p>
foo
</p>
</body>
</html>
<html>
<head>
<title>bar</title>
<style>
<!--
BODY {
BACKGROUND: #FFFFFF;
COLOR: #000000;
-->
</style>
</head>
<body>
<p>
bar
</p>
</body>
</html>
......@@ -16,241 +16,16 @@
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import os
import glob
import subprocess
from .utils.tools import skip_unless_tool_is_at_least
ALLOWED_TEST_FILES = {
# Data files we would prefer to generate dynamically
"android1.img",
"android2.img",
"archive1.tar",
"archive2.tar",
"base-files_157-r45695_ar71xx.ipk",
"base-files_157-r45918_ar71xx.ipk",
"binary1",
"binary2",
"bug881937_1.deb",
"bug881937_2.deb",
"bug903391_1.deb",
"bug903391_2.deb",
"bug903401_1.deb",
"bug903401_2.deb",
"bug903565_1.deb",
"bug903565_2.deb",
"containers",
"containers/a.tar.bz2",
"containers/a.tar.gz",
"containers/a.tar.xz",
"containers/b.tar.bz2",
"containers/b.tar.gz",
"containers/b.tar.xz",
"containers/magic_bzip2",
"containers/magic_gzip",
"containers/magic_xz",
"dbgsym/add/test-dbgsym-dbgsym_1_amd64.deb",
"dbgsym/add/test-dbgsym_1_amd64.deb",
"dbgsym/mult/test-dbgsym-dbgsym_1_amd64.deb",
"dbgsym/mult/test-dbgsym_1_amd64.deb",
"dbgsym/test-dbgsym_1.dsc",
"dbgsym/test-dbgsym_1.tar.gz",
"debian-bug-876316-control.tar.gz",
# Outputs
"devicetree1.dtb",
"devicetree2.dtb",
"elfmix1.not_a",
"elfmix2.a",
"encrypted1.zip",
"encrypted2.zip",
"fuzzy-tar-in-tar1.tar",
"fuzzy-tar-in-tar2.tar",
"fuzzy1.tar",
"fuzzy2.tar",
"fuzzy3.tar",
"hello1.wasm",
"hello2.wasm",
"no-perms.tar",
"quine.gz",
"quine.zip",
"quine_a.deb",
"quine_b.deb",
"Samyak-Malayalam1.ttf",
"Samyak-Malayalam2.ttf",
"test1-le64.cache-4",
"test1.a",
"test1.apk",
"test1.asc",
"test1.binwalk",
"test1.buildinfo",
"test1.bz2",
"test1.changes",
"Test1.class",
"Test2.class",
"test1.cpio",
"test1.db",
"test1.deb",
"test1.debsrc.tar.gz",
"test1.debug",
"test1.dex",
"test1.docx",
"test1.dsc",
"test1.epub",
"test1.exe",
"test1.ext4",
"test1.fat12",
"test1.fat16",
"test1.fat32",
"test1.gif",
"test1.git-index",
"test1.gnumeric",
"test1.gz",
"test1.hi",
"test1.icc",
"test1.ico",
"test1.inv",
"test1.iso",
"test1.jmod",
"test1.jpg",
"test1.js",
"test1.json",
"test1.kbx",
"test1.lz4",
"test1.macho",
"test1.mo",
"test1.mozzip",
"test1.mp3",
"test1.o",
"test1.odt",
"test1.ogg",
"test1.pcap",
"test1.pdf",
"test1.pgp",
"test1_signed.pgp",
"test1.png",
"test1.ppu",
"test1.ps",
"test1.pyc-renamed",
"test1.rdb",
"test1.rdx",
"test1.rlib",
"test1.rpm",
"test1.sqlite3",
"test1.squashfs",
"test1_root.squashfs",
"test1.tar",
"test1.xml",
"test1.xsb",
"test1.xz",
"test1.zip",
"test1_meta.ico",
"test1_meta.jpg",
"test2-le64.cache-4",
"test2.a",
"test2.apk",
"test2.asc",
"test2.binwalk",
"test2.buildinfo",
"test2.bz2",
"test2.changes",
"test2.cpio",
"test2.db",
"test2.deb",
"test2.debsrc.tar.gz",
"test2.debug",
"test2.dex",
"test2.docx",
"test2.dsc",
"test2.epub",
"test2.exe",
"test2.ext4",
"test2.fat12",
"test2.gif",
"test2.git-index",
"test2.gnumeric",
"test2.gz",
"test2.hi",
"test2.icc",
"test2.ico",
"test2.inv",
"test2.iso",
"test2.jmod",
"test2.jpg",
"test2.js",
"test2.json",
"test2.kbx",
"test2.lz4",
"test2.macho",
"test2.mo",
"test2.mozzip",
"test2.mp3",
"test2.o",
"test2.odt",
"test2.ogg",
"test2.pcap",
"test2.pdf",
"test2.pgp",
"test2_signed.pgp",
"test2.png",
"test2.ppu",
"test2.ps",
"test2.pyc-renamed",
"test2.rdb",
"test2.rdx",
"test2.rlib",
"test2.rpm",
"test2.sqlite3",
"test2.squashfs",
"test2_root.squashfs",
"test2.tar",
"test2.xml",
"test2.xsb",
"test2.xz",
"test2.zip",
"test2_meta.ico",
"test2_meta.jpg",
"test3.apk",
"test3.changes",
"test3.gif",
"test3.pdf",
"test3.xml",
"test3.zip",
"test4.changes",
"test4.gif",
"test4.pdf",
"test4.xml",
"test5.changes",
"test_comment1.zip",
"test_comment2.zip",
"test_invalid.json",
"test_invalid.xml",
"test_iso8859-1.mo",
"test_no_charset.mo",
"test_openssh_pub_key1.pub",
"test_openssh_pub_key2.pub",
"test_weird_non_unicode_chars1.pdf",
"test_weird_non_unicode_chars2.pdf",
"text_ascii1", # used by multiple tests
"text_ascii2", # used by multiple tests
"text_iso8859",
"text_order1",
"text_order2",
"text_unicode1",
"text_unicode2",
"text_unicode_binary_fallback",
# Outputs
"debian-bug-875281.collapsed-diff.json",
"order1a.json",
"order1b.json",
}
def black_version():
try:
out = subprocess.check_output(("black", "--version"))
except subprocess.CalledProcessError as e:
out = e.output
except subprocess.CalledProcessError as exc:
out = exc.output
# black --version format changed starting in 21.11b0. Returning the first
# token starting with a decimal digit, since its ordinal position may vary.
......@@ -270,41 +45,3 @@ def test_code_is_black_clean():
print(output)
assert not output, output
def test_does_not_add_new_test_files():
"""
For a variety of reasons we are now prefering to generate any test data
dynamically (via pytest fixtures, etc.) rather than committing and shipping
such files.
Exceptions to this may be appropriate (or even required) but this test
ensures that test files that could be dynamically generated are not added
"automatically", for example by following previous/older commits.
"""
test_dir = os.path.join(os.path.dirname(__file__), "data")
seen = set()
for x in glob.iglob(os.path.join(test_dir, "**"), recursive=True):
if os.path.isdir(x):
continue
# Strip off common prefix
x = x[len(test_dir) + 1 :]
# Skip some known expected diff filename patterns
if (
x.endswith("_diff")
or x.endswith("_diffs")
or x.endswith(".diff")
or "_diff_" in x
or "diff." in x
or x.startswith("output")
):
continue
seen.add(x)
assert seen - ALLOWED_TEST_FILES - {""} == set()