diff --git a/debian/changelog b/debian/changelog index 2c0a999c27133353b3e2b44330f2544987219410..21011804bd83ecc21f75f0fd6a11ab9af3a18a30 100644 --- a/debian/changelog +++ b/debian/changelog @@ -16,6 +16,14 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium upstream. * d/patches: put all patches in the git style allowed by DEP-3, so they can be manipulated with gbp-pq + * d/p/lib-ignore-tidy-html5-s-differently-formatted-summar.patch, + d/p/t-allow-tidy-html5-as-generator.patch, + d/p/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch, + d/p/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch, + d/p/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch, + d/p/t-venus.t-adjust-expected-result-for-tidy-html5.patch: + add patches to make the tests pass with tidy-html5 providing libtidy + (Closes: #829409) -- gregor herrmann <gregoa@debian.org> Thu, 27 Feb 2014 22:36:29 +0100 diff --git a/debian/control b/debian/control index 0ca02f823be4430724b5142ce2acab2abb370762..072fde54d8487bf5575228cd87dd529ead077ceb 100644 --- a/debian/control +++ b/debian/control @@ -7,7 +7,7 @@ Section: perl Priority: optional Build-Depends: debhelper (>= 9.20120312), help2man, - libtidy-dev, + libtidy-dev (>= 1:5.2.0), libtest-pod-perl, libtest-pod-coverage-perl, perl (>= 5.13.11) | libtest-simple-perl (>= 0.98), diff --git a/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch new file mode 100644 index 0000000000000000000000000000000000000000..04801b24da58880baeeaa5b1d0bf20eba4b49b21 --- /dev/null +++ b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch @@ -0,0 +1,24 @@ +From: Simon McVittie <smcv@debian.org> +Date: Fri, 22 Jul 2016 09:19:08 +0100 +Subject: lib: ignore tidy-html5's differently-formatted summary line + +Signed-off-by: Simon McVittie <smcv@debian.org> +--- + lib/HTML/Tidy.pm | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm +index a172ea2..aa16c79 100644 +--- a/lib/HTML/Tidy.pm ++++ b/lib/HTML/Tidy.pm +@@ -265,6 +265,10 @@ sub _parse_errors { + # Summary line we don't want + + } ++ elsif ( $line =~ /^Tidy found \d+ warnings? and \d+ errors?!/ ) { ++ # Summary line we don't want ++ ++ } + elsif ( $line eq 'No warnings or errors were found.' ) { + # Summary line we don't want + diff --git a/debian/patches/series b/debian/patches/series index 4adbeaa7e17a19ba2cd5f85f8df474abfd185853..0fa4da0727e086fcadba8bd75cb9ee3b8458e533 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,3 +1,9 @@ remove-tidy_version.patch tidy-not-tidyp.patch +lib-ignore-tidy-html5-s-differently-formatted-summar.patch +t-allow-tidy-html5-as-generator.patch +t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch +t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch +t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch +t-venus.t-adjust-expected-result-for-tidy-html5.patch fix-error-message-in-webtidy diff --git a/debian/patches/t-allow-tidy-html5-as-generator.patch b/debian/patches/t-allow-tidy-html5-as-generator.patch new file mode 100644 index 0000000000000000000000000000000000000000..c3d6f30263e1011504d1cf26d34dd2f9702517b4 --- /dev/null +++ b/debian/patches/t-allow-tidy-html5-as-generator.patch @@ -0,0 +1,47 @@ +From: Simon McVittie <smcv@debian.org> +Date: Fri, 22 Jul 2016 09:21:39 +0100 +Subject: t: allow tidy-html5 as generator + +Signed-off-by: Simon McVittie <smcv@debian.org> +--- + t/roundtrip.t | 2 +- + t/unicode.t | 6 ++---- + 2 files changed, 3 insertions(+), 5 deletions(-) + +diff --git a/t/roundtrip.t b/t/roundtrip.t +index e37cb88..2590795 100644 +--- a/t/roundtrip.t ++++ b/t/roundtrip.t +@@ -25,7 +25,7 @@ my @messages = $tidy->messages( $clean ); + + is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} ); + +-$clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/; ++$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]+)"/"Tidy"/; + + my $expected = do { local $/ = undef; <DATA> }; + is( $clean, $expected, 'Cleaned up properly' ); +diff --git a/t/unicode.t b/t/unicode.t +index 2f45384..9ca6370 100644 +--- a/t/unicode.t ++++ b/t/unicode.t +@@ -30,8 +30,7 @@ ok(utf8::is_utf8($reference), 'reference is utf8'); + my $clean = $tidy->clean( $html ); + ok(utf8::is_utf8($clean), 'cleaned output is also unicode'); + +-$clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/; +-$clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/; ++$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/; + is($clean, $reference, q{Cleanup didn't break anything}); + + my @messages = $tidy->messages; +@@ -49,8 +48,7 @@ subtest 'Try send bytes to clean method.' => sub { + ok(!utf8::is_utf8($html), 'html is row bytes'); + my $clean = $tidy->clean( $html ); + ok(utf8::is_utf8($clean), 'but cleaned output is string'); +- $clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/; +- $clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/; ++ $clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/; + is($clean, $reference, q{Cleanup didn't break anything}); + }; + diff --git a/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch b/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch new file mode 100644 index 0000000000000000000000000000000000000000..abbaffc3587a1a8ed48b34c040a4e112136122c8 --- /dev/null +++ b/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch @@ -0,0 +1,52 @@ +From: Simon McVittie <smcv@debian.org> +Date: Fri, 22 Jul 2016 09:32:09 +0100 +Subject: t: don't assert that tidy will add a HTML 3.2 doctype + +tidy-html5 adds the HTML5 doctype, <!DOCTYPE html>. + +Signed-off-by: Simon McVittie <smcv@debian.org> +--- + t/roundtrip.t | 3 ++- + t/wordwrap.t | 3 ++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/t/roundtrip.t b/t/roundtrip.t +index 2590795..68be07f 100644 +--- a/t/roundtrip.t ++++ b/t/roundtrip.t +@@ -26,12 +26,13 @@ my @messages = $tidy->messages( $clean ); + is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} ); + + $clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]+)"/"Tidy"/; ++$clean =~ s/<!DOCTYPE html[^>]*>/<!DOCTYPE html>/; + + my $expected = do { local $/ = undef; <DATA> }; + is( $clean, $expected, 'Cleaned up properly' ); + + __DATA__ +-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> ++<!DOCTYPE html> + <html> + <head> + <meta name="generator" content="Tidy"> +diff --git a/t/wordwrap.t b/t/wordwrap.t +index 717d895..3f4daaf 100644 +--- a/t/wordwrap.t ++++ b/t/wordwrap.t +@@ -11,7 +11,7 @@ my $input=q{Here's some <B>ed and <BR/>eakfest MarkUp}; + + my $expected=<<'EOD'; + <!DOCTYPE +-html PUBLIC "-//W3C//DTD HTML 3.2//EN"> ++html> + <html> + <head> + <title> +@@ -32,6 +32,7 @@ my $cfg = 't/wordwrap.cfg'; + my $tidy = HTML::Tidy->new( {config_file => $cfg} ); + + my $result = $tidy->clean( $input ); ++$result =~ s/<!DOCTYPE ?\nhtml[^>]*>/<!DOCTYPE \nhtml>/; + my @result = split(/\n/, $result); + is_deeply( \@result, \@expected, 'Cleaned stuff looks like what we expected'); + diff --git a/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch b/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch new file mode 100644 index 0000000000000000000000000000000000000000..48dda493f3a0c853b35edda2206bec13c687c43c --- /dev/null +++ b/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch @@ -0,0 +1,36 @@ +From: Simon McVittie <smcv@debian.org> +Date: Fri, 22 Jul 2016 09:29:39 +0100 +Subject: t/unicode.t: don't assert that the DOCTYPE is preserved + +tidy-html5 currently doesn't preserve user-supplied DOCTYPEs +in output: <https://github.com/htacg/tidy-html5/issues/435> + +Signed-off-by: Simon McVittie <smcv@debian.org> +--- + t/unicode.html | 2 +- + t/unicode.t | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/t/unicode.html b/t/unicode.html +index a90f83f..c8d1804 100644 +--- a/t/unicode.html ++++ b/t/unicode.html +@@ -1,4 +1,4 @@ +-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> ++<!DOCTYPE html> + <html> + <head> + <title>日本語ã®ãƒ›ãƒ ページ</title> +diff --git a/t/unicode.t b/t/unicode.t +index 9ca6370..679b48a 100644 +--- a/t/unicode.t ++++ b/t/unicode.t +@@ -53,7 +53,7 @@ subtest 'Try send bytes to clean method.' => sub { + }; + + __DATA__ +-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> ++<!DOCTYPE html> + <html> + <head> + <meta name="generator" content="Tidy"> diff --git a/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch b/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch new file mode 100644 index 0000000000000000000000000000000000000000..e010fc9bcfb6eb91f804689fa1064a5855439f51 --- /dev/null +++ b/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch @@ -0,0 +1,128 @@ +From: Simon McVittie <smcv@debian.org> +Date: Fri, 22 Jul 2016 09:27:26 +0100 +Subject: t: use a pre-HTML5 DOCTYPE to get warnings about unescaped ampersand + +HTML5 defines an ampersand followed by whitespace to be unambiguously +an ampersand, matching what browsers have always done in practice. +As a result, tidy-html5 does not warn about them when the doctype +is either HTML5 or missing (lack of a DOCTYPE is treated as HTML5, +on the basis that HTML5 is a closer match for what browsers actually +do than any previous standard). Discussion here: +<https://github.com/htacg/tidy-html5/issues/207> + +Adding the DOCTYPE throws off some of the line numbering, which needs +adjusting. + +t/ignore-text.t also seems to rely on the missing DOCTYPE provoking a +warning, which is obviously not going to happen now that we've +added one, to be able to verify that case-insensitive ignoring +can work. Add a new error so we can ignore that instead. + +Signed-off-by: Simon McVittie <smcv@debian.org> +--- + t/ignore-text.t | 8 +++++--- + t/ignore.t | 10 +++++----- + t/levels.t | 10 +++++----- + 3 files changed, 15 insertions(+), 13 deletions(-) + +diff --git a/t/ignore-text.t b/t/ignore-text.t +index 9695a5a..a95e0e0 100644 +--- a/t/ignore-text.t ++++ b/t/ignore-text.t +@@ -10,8 +10,8 @@ use HTML::Tidy; + my $html = do { local $/; <DATA> }; + + my @expected_messages = split /\n/, q{ +-DATA (24:XX) Warning: unescaped & which should be written as & +-DATA (24:XX) Warning: unescaped & which should be written as & ++DATA (26:XX) Warning: unescaped & which should be written as & ++DATA (26:XX) Warning: unescaped & which should be written as & + }; + + chomp @expected_messages; +@@ -22,7 +22,7 @@ IGNORE_BOGOTAG: { + isa_ok( $tidy, 'HTML::Tidy' ); + + $tidy->ignore( text => qr/bogotag/ ); +- $tidy->ignore( text => [ qr/UNESCAPED/, qr/doctype/i ] ); ++ $tidy->ignore( text => [ qr/UNESCAPED/, qr/case-insensitive/i ] ); + # The qr/UNESCAPED/ should not ignore anything because there's no /i + my $rc = $tidy->parse( 'DATA', $html ); + ok( $rc, 'Parsed OK' ); +@@ -44,6 +44,7 @@ sub munge_returned { + } + } + __DATA__ ++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> + <HTML> + <HEAD> + <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1"> +@@ -67,6 +68,7 @@ DIV.TOC P { + </HEAD> + <BODY BGCOLOR="white"> + <BOGOTAG> ++<CASE-INSENSITIVE> + <IMG SRC="/pix/petdance-logo-400x312.gif" HEIGHT=312 WIDTH=400 ALT="Andy & Amy's Pet Supplies & Dance Instruction" ALIGN=RIGHT> + <DIV CLASS="TOC"> + <h2>Perl, Programming & Writing</h2> +diff --git a/t/ignore.t b/t/ignore.t +index 3991733..c0a1317 100644 +--- a/t/ignore.t ++++ b/t/ignore.t +@@ -10,16 +10,15 @@ use HTML::Tidy; + my $html = do { local $/ = undef; <DATA> }; + + my @expected_warnings = split /\n/, q{ +-- (1:1) Warning: missing <!DOCTYPE> declaration +-- (23:1) Warning: discarding unexpected <bogotag> +-- (24:XX) Warning: unescaped & which should be written as & +-- (24:XX) Warning: unescaped & which should be written as & ++- (24:1) Warning: discarding unexpected <bogotag> ++- (25:XX) Warning: unescaped & which should be written as & ++- (25:XX) Warning: unescaped & which should be written as & + }; + chomp @expected_warnings; + shift @expected_warnings; # First one's blank + + my @expected_errors = split /\n/, q{ +-- (23:1) Error: <bogotag> is not recognized! ++- (24:1) Error: <bogotag> is not recognized! + }; + chomp @expected_errors; + shift @expected_errors; # First one's blank +@@ -71,6 +70,7 @@ sub munge_returned { + } + } + __DATA__ ++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> + <HTML> + <HEAD> + <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1"> +diff --git a/t/levels.t b/t/levels.t +index 01aeb3b..2ee3162 100644 +--- a/t/levels.t ++++ b/t/levels.t +@@ -13,11 +13,10 @@ my $rc = $tidy->parse( '-', <DATA> ); + ok( $rc, 'Parsed OK' ); + + my @expected = split /\n/, q{ +-- (1:1) Warning: missing <!DOCTYPE> declaration +-- (23:1) Error: <bogotag> is not recognized! +-- (23:1) Warning: discarding unexpected <bogotag> +-- (24:XX) Warning: unescaped & which should be written as & +-- (24:XX) Warning: unescaped & which should be written as & ++- (24:1) Error: <bogotag> is not recognized! ++- (24:1) Warning: discarding unexpected <bogotag> ++- (25:XX) Warning: unescaped & which should be written as & ++- (25:XX) Warning: unescaped & which should be written as & + }; + chomp @expected; + shift @expected; # First one's blank +@@ -41,6 +40,7 @@ sub munge_returned { + } + + __DATA__ ++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> + <HTML> + <HEAD> + <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1"> diff --git a/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch b/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch new file mode 100644 index 0000000000000000000000000000000000000000..004b09f47a7fb1bebf4a6e83555ab6b562ede9b9 --- /dev/null +++ b/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch @@ -0,0 +1,58 @@ +From: Simon McVittie <smcv@debian.org> +Date: Fri, 22 Jul 2016 09:34:53 +0100 +Subject: t/venus.t: adjust expected result for tidy-html5 + +tidy-html5 allows arbitrary block content inside <address>, whereas +traditional tidy only allowed inline content. +Reference: <https://github.com/htacg/tidy-html5/issues/55> + +This change will break with traditional tidy, and it isn't clear +to me how to remain compatible with both. + +Signed-off-by: Simon McVittie <smcv@debian.org> +--- + t/venus.t | 32 +++++++++++++++++++------------- + 1 file changed, 19 insertions(+), 13 deletions(-) + +diff --git a/t/venus.t b/t/venus.t +index 41ee597..bd94d4b 100755 +--- a/t/venus.t ++++ b/t/venus.t +@@ -72,18 +72,24 @@ __DATA__ + <a href="../../General/Credits.html">Credits</a> | + <a href="../../General/Feedback.html">Feedback</a> |</h4> + </center> +- <center> +- <p> +- <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" /> +- </p> +- </center> +- <div align="center"></div> +- <center> +- <address>Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.</address> +- <address> </address> +- <address>Authors: Twin Groves Museums in the Classroom Team,</address> +- <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address> +- </center> +- <center>Created: 27 June 1998- Updated: 6 October 2003</center> ++ <address> ++ <center> ++ <p> ++ <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" /> ++ </p> ++ </center> ++ <div align="center"></div> ++ <address> ++ <center> ++ <address>Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.</address> ++ <address> </address> ++ <address>Authors: Twin Groves Museums in the Classroom Team,</address> ++ <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address> ++ </center> ++ </address> ++ <address> ++ <center>Created: 27 June 1998- Updated: 6 October 2003</center> ++ </address> ++ </address> + </body> + </html>