Skip to content
Snippets Groups Projects
Commit f2652c49 authored by Simon McVittie's avatar Simon McVittie
Browse files

Add patches to make the tests pass with tidy-html5 providing libtidy

Closes: #829409
parent 447aa2e6
No related branches found
No related tags found
No related merge requests found
...@@ -16,6 +16,14 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium ...@@ -16,6 +16,14 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium
upstream. upstream.
* d/patches: put all patches in the git style allowed by DEP-3, * d/patches: put all patches in the git style allowed by DEP-3,
so they can be manipulated with gbp-pq so they can be manipulated with gbp-pq
* d/p/lib-ignore-tidy-html5-s-differently-formatted-summar.patch,
d/p/t-allow-tidy-html5-as-generator.patch,
d/p/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch,
d/p/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch,
d/p/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch,
d/p/t-venus.t-adjust-expected-result-for-tidy-html5.patch:
add patches to make the tests pass with tidy-html5 providing libtidy
(Closes: #829409)
-- gregor herrmann <gregoa@debian.org> Thu, 27 Feb 2014 22:36:29 +0100 -- gregor herrmann <gregoa@debian.org> Thu, 27 Feb 2014 22:36:29 +0100
......
...@@ -7,7 +7,7 @@ Section: perl ...@@ -7,7 +7,7 @@ Section: perl
Priority: optional Priority: optional
Build-Depends: debhelper (>= 9.20120312), Build-Depends: debhelper (>= 9.20120312),
help2man, help2man,
libtidy-dev, libtidy-dev (>= 1:5.2.0),
libtest-pod-perl, libtest-pod-perl,
libtest-pod-coverage-perl, libtest-pod-coverage-perl,
perl (>= 5.13.11) | libtest-simple-perl (>= 0.98), perl (>= 5.13.11) | libtest-simple-perl (>= 0.98),
......
From: Simon McVittie <smcv@debian.org>
Date: Fri, 22 Jul 2016 09:19:08 +0100
Subject: lib: ignore tidy-html5's differently-formatted summary line
Signed-off-by: Simon McVittie <smcv@debian.org>
---
lib/HTML/Tidy.pm | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm
index a172ea2..aa16c79 100644
--- a/lib/HTML/Tidy.pm
+++ b/lib/HTML/Tidy.pm
@@ -265,6 +265,10 @@ sub _parse_errors {
# Summary line we don't want
}
+ elsif ( $line =~ /^Tidy found \d+ warnings? and \d+ errors?!/ ) {
+ # Summary line we don't want
+
+ }
elsif ( $line eq 'No warnings or errors were found.' ) {
# Summary line we don't want
remove-tidy_version.patch remove-tidy_version.patch
tidy-not-tidyp.patch tidy-not-tidyp.patch
lib-ignore-tidy-html5-s-differently-formatted-summar.patch
t-allow-tidy-html5-as-generator.patch
t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch
t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch
t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch
t-venus.t-adjust-expected-result-for-tidy-html5.patch
fix-error-message-in-webtidy fix-error-message-in-webtidy
From: Simon McVittie <smcv@debian.org>
Date: Fri, 22 Jul 2016 09:21:39 +0100
Subject: t: allow tidy-html5 as generator
Signed-off-by: Simon McVittie <smcv@debian.org>
---
t/roundtrip.t | 2 +-
t/unicode.t | 6 ++----
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/t/roundtrip.t b/t/roundtrip.t
index e37cb88..2590795 100644
--- a/t/roundtrip.t
+++ b/t/roundtrip.t
@@ -25,7 +25,7 @@ my @messages = $tidy->messages( $clean );
is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} );
-$clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
+$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]+)"/"Tidy"/;
my $expected = do { local $/ = undef; <DATA> };
is( $clean, $expected, 'Cleaned up properly' );
diff --git a/t/unicode.t b/t/unicode.t
index 2f45384..9ca6370 100644
--- a/t/unicode.t
+++ b/t/unicode.t
@@ -30,8 +30,7 @@ ok(utf8::is_utf8($reference), 'reference is utf8');
my $clean = $tidy->clean( $html );
ok(utf8::is_utf8($clean), 'cleaned output is also unicode');
-$clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/;
-$clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
+$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/;
is($clean, $reference, q{Cleanup didn't break anything});
my @messages = $tidy->messages;
@@ -49,8 +48,7 @@ subtest 'Try send bytes to clean method.' => sub {
ok(!utf8::is_utf8($html), 'html is row bytes');
my $clean = $tidy->clean( $html );
ok(utf8::is_utf8($clean), 'but cleaned output is string');
- $clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/;
- $clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
+ $clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/;
is($clean, $reference, q{Cleanup didn't break anything});
};
From: Simon McVittie <smcv@debian.org>
Date: Fri, 22 Jul 2016 09:32:09 +0100
Subject: t: don't assert that tidy will add a HTML 3.2 doctype
tidy-html5 adds the HTML5 doctype, <!DOCTYPE html>.
Signed-off-by: Simon McVittie <smcv@debian.org>
---
t/roundtrip.t | 3 ++-
t/wordwrap.t | 3 ++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/t/roundtrip.t b/t/roundtrip.t
index 2590795..68be07f 100644
--- a/t/roundtrip.t
+++ b/t/roundtrip.t
@@ -26,12 +26,13 @@ my @messages = $tidy->messages( $clean );
is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} );
$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]+)"/"Tidy"/;
+$clean =~ s/<!DOCTYPE html[^>]*>/<!DOCTYPE html>/;
my $expected = do { local $/ = undef; <DATA> };
is( $clean, $expected, 'Cleaned up properly' );
__DATA__
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
+<!DOCTYPE html>
<html>
<head>
<meta name="generator" content="Tidy">
diff --git a/t/wordwrap.t b/t/wordwrap.t
index 717d895..3f4daaf 100644
--- a/t/wordwrap.t
+++ b/t/wordwrap.t
@@ -11,7 +11,7 @@ my $input=q{Here's some <B>ed and <BR/>eakfest MarkUp};
my $expected=<<'EOD';
<!DOCTYPE
-html PUBLIC "-//W3C//DTD HTML 3.2//EN">
+html>
<html>
<head>
<title>
@@ -32,6 +32,7 @@ my $cfg = 't/wordwrap.cfg';
my $tidy = HTML::Tidy->new( {config_file => $cfg} );
my $result = $tidy->clean( $input );
+$result =~ s/<!DOCTYPE ?\nhtml[^>]*>/<!DOCTYPE \nhtml>/;
my @result = split(/\n/, $result);
is_deeply( \@result, \@expected, 'Cleaned stuff looks like what we expected');
From: Simon McVittie <smcv@debian.org>
Date: Fri, 22 Jul 2016 09:29:39 +0100
Subject: t/unicode.t: don't assert that the DOCTYPE is preserved
tidy-html5 currently doesn't preserve user-supplied DOCTYPEs
in output: <https://github.com/htacg/tidy-html5/issues/435>
Signed-off-by: Simon McVittie <smcv@debian.org>
---
t/unicode.html | 2 +-
t/unicode.t | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/t/unicode.html b/t/unicode.html
index a90f83f..c8d1804 100644
--- a/t/unicode.html
+++ b/t/unicode.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
+<!DOCTYPE html>
<html>
<head>
<title>日本語のホムページ</title>
diff --git a/t/unicode.t b/t/unicode.t
index 9ca6370..679b48a 100644
--- a/t/unicode.t
+++ b/t/unicode.t
@@ -53,7 +53,7 @@ subtest 'Try send bytes to clean method.' => sub {
};
__DATA__
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
+<!DOCTYPE html>
<html>
<head>
<meta name="generator" content="Tidy">
From: Simon McVittie <smcv@debian.org>
Date: Fri, 22 Jul 2016 09:27:26 +0100
Subject: t: use a pre-HTML5 DOCTYPE to get warnings about unescaped ampersand
HTML5 defines an ampersand followed by whitespace to be unambiguously
an ampersand, matching what browsers have always done in practice.
As a result, tidy-html5 does not warn about them when the doctype
is either HTML5 or missing (lack of a DOCTYPE is treated as HTML5,
on the basis that HTML5 is a closer match for what browsers actually
do than any previous standard). Discussion here:
<https://github.com/htacg/tidy-html5/issues/207>
Adding the DOCTYPE throws off some of the line numbering, which needs
adjusting.
t/ignore-text.t also seems to rely on the missing DOCTYPE provoking a
warning, which is obviously not going to happen now that we've
added one, to be able to verify that case-insensitive ignoring
can work. Add a new error so we can ignore that instead.
Signed-off-by: Simon McVittie <smcv@debian.org>
---
t/ignore-text.t | 8 +++++---
t/ignore.t | 10 +++++-----
t/levels.t | 10 +++++-----
3 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/t/ignore-text.t b/t/ignore-text.t
index 9695a5a..a95e0e0 100644
--- a/t/ignore-text.t
+++ b/t/ignore-text.t
@@ -10,8 +10,8 @@ use HTML::Tidy;
my $html = do { local $/; <DATA> };
my @expected_messages = split /\n/, q{
-DATA (24:XX) Warning: unescaped & which should be written as &amp;
-DATA (24:XX) Warning: unescaped & which should be written as &amp;
+DATA (26:XX) Warning: unescaped & which should be written as &amp;
+DATA (26:XX) Warning: unescaped & which should be written as &amp;
};
chomp @expected_messages;
@@ -22,7 +22,7 @@ IGNORE_BOGOTAG: {
isa_ok( $tidy, 'HTML::Tidy' );
$tidy->ignore( text => qr/bogotag/ );
- $tidy->ignore( text => [ qr/UNESCAPED/, qr/doctype/i ] );
+ $tidy->ignore( text => [ qr/UNESCAPED/, qr/case-insensitive/i ] );
# The qr/UNESCAPED/ should not ignore anything because there's no /i
my $rc = $tidy->parse( 'DATA', $html );
ok( $rc, 'Parsed OK' );
@@ -44,6 +44,7 @@ sub munge_returned {
}
}
__DATA__
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
<HTML>
<HEAD>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
@@ -67,6 +68,7 @@ DIV.TOC P {
</HEAD>
<BODY BGCOLOR="white">
<BOGOTAG>
+<CASE-INSENSITIVE>
<IMG SRC="/pix/petdance-logo-400x312.gif" HEIGHT=312 WIDTH=400 ALT="Andy & Amy's Pet Supplies & Dance Instruction" ALIGN=RIGHT>
<DIV CLASS="TOC">
<h2>Perl, Programming &amp; Writing</h2>
diff --git a/t/ignore.t b/t/ignore.t
index 3991733..c0a1317 100644
--- a/t/ignore.t
+++ b/t/ignore.t
@@ -10,16 +10,15 @@ use HTML::Tidy;
my $html = do { local $/ = undef; <DATA> };
my @expected_warnings = split /\n/, q{
-- (1:1) Warning: missing <!DOCTYPE> declaration
-- (23:1) Warning: discarding unexpected <bogotag>
-- (24:XX) Warning: unescaped & which should be written as &amp;
-- (24:XX) Warning: unescaped & which should be written as &amp;
+- (24:1) Warning: discarding unexpected <bogotag>
+- (25:XX) Warning: unescaped & which should be written as &amp;
+- (25:XX) Warning: unescaped & which should be written as &amp;
};
chomp @expected_warnings;
shift @expected_warnings; # First one's blank
my @expected_errors = split /\n/, q{
-- (23:1) Error: <bogotag> is not recognized!
+- (24:1) Error: <bogotag> is not recognized!
};
chomp @expected_errors;
shift @expected_errors; # First one's blank
@@ -71,6 +70,7 @@ sub munge_returned {
}
}
__DATA__
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
<HTML>
<HEAD>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
diff --git a/t/levels.t b/t/levels.t
index 01aeb3b..2ee3162 100644
--- a/t/levels.t
+++ b/t/levels.t
@@ -13,11 +13,10 @@ my $rc = $tidy->parse( '-', <DATA> );
ok( $rc, 'Parsed OK' );
my @expected = split /\n/, q{
-- (1:1) Warning: missing <!DOCTYPE> declaration
-- (23:1) Error: <bogotag> is not recognized!
-- (23:1) Warning: discarding unexpected <bogotag>
-- (24:XX) Warning: unescaped & which should be written as &amp;
-- (24:XX) Warning: unescaped & which should be written as &amp;
+- (24:1) Error: <bogotag> is not recognized!
+- (24:1) Warning: discarding unexpected <bogotag>
+- (25:XX) Warning: unescaped & which should be written as &amp;
+- (25:XX) Warning: unescaped & which should be written as &amp;
};
chomp @expected;
shift @expected; # First one's blank
@@ -41,6 +40,7 @@ sub munge_returned {
}
__DATA__
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
<HTML>
<HEAD>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
From: Simon McVittie <smcv@debian.org>
Date: Fri, 22 Jul 2016 09:34:53 +0100
Subject: t/venus.t: adjust expected result for tidy-html5
tidy-html5 allows arbitrary block content inside <address>, whereas
traditional tidy only allowed inline content.
Reference: <https://github.com/htacg/tidy-html5/issues/55>
This change will break with traditional tidy, and it isn't clear
to me how to remain compatible with both.
Signed-off-by: Simon McVittie <smcv@debian.org>
---
t/venus.t | 32 +++++++++++++++++++-------------
1 file changed, 19 insertions(+), 13 deletions(-)
diff --git a/t/venus.t b/t/venus.t
index 41ee597..bd94d4b 100755
--- a/t/venus.t
+++ b/t/venus.t
@@ -72,18 +72,24 @@ __DATA__
<a href="../../General/Credits.html">Credits</a> |
<a href="../../General/Feedback.html">Feedback</a> |</h4>
</center>
- <center>
- <p>
- <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" />
- </p>
- </center>
- <div align="center"></div>
- <center>
- <address>Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.</address>
- <address> </address>
- <address>Authors: Twin Groves Museums in the Classroom Team,</address>
- <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address>
- </center>
- <center>Created: 27 June 1998- Updated: 6 October 2003</center>
+ <address>
+ <center>
+ <p>
+ <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" />
+ </p>
+ </center>
+ <div align="center"></div>
+ <address>
+ <center>
+ <address>Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.</address>
+ <address> </address>
+ <address>Authors: Twin Groves Museums in the Classroom Team,</address>
+ <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address>
+ </center>
+ </address>
+ <address>
+ <center>Created: 27 June 1998- Updated: 6 October 2003</center>
+ </address>
+ </address>
</body>
</html>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment