Mechtilde Stehmann · Mechtilde Stehmann · Mechtilde Stehmann · Mechtilde Stehmann · Mechtilde Stehmann · Mechtilde Stehmann
--- a/.gitattributes
+++ b/.gitattributes
+*   text=auto eol=lf
--- a/.travis.yml
+++ b/.travis.yml
 language: java

 jdk:
-    - openjdk6
    - openjdk7
-    - oraclejdk7
    - oraclejdk8
+    - oraclejdk9
+    - openjdk10
+    - oraclejdk11

 cache:
    directories:

--- a/CHANGES
+++ b/CHANGES
 jsoup changelog

+**** Release 1.12.1 [PENDING]
+  * Change: removed deprecated method to disable TLS cert checking Connection.validateTLSCertificates().
+
+  * Change: some internal methods have been rearranged; if you extended any of the Jsoup internals you may need to make
+    updates.
+
+  * Improvement: documents now remember their parser, so when later manipulating them, the correct HTML or XML tree
+    builder is reused, as are the parser settings like case preservation.
+    <https://github.com/jhy/jsoup/issues/769>
+
+  * Improvement: Jsoup now detects the character set of the input if specified in an XML Declaration, when using the
+    HTML parser. Previously that only happened when the XML parser was specified.
+    <https://github.com/jhy/jsoup/issues/1009>
+
+  * Improvement: if the document's input character set does not support encoding, flip it to one that does.
+    <https://github.com/jhy/jsoup/issues/1007>
+
+  * Improvement: if a start tag is missing a > and a new tag is seen with a <, treat that as a new tag. (This differs
+    from the HTML5 spec, which would make at attribute with a name beginning with <, but in practice this impacts too
+    many pages.
+    <https://github.com/jhy/jsoup/issues/797>
+
+  * Improvement: performance tweaks when parsing start tags, data, tables.
+
+  * Improvement: added Element.nextElementSiblings() and Element.previousElementSiblings()
+    <https://github.com/jhy/jsoup/pull/1054>
+
+  * Improvement: treat center tags as block tags.
+    <https://github.com/jhy/jsoup/pull/1113>
+
+  * Improvement: allow forms to be submitted with Content-Type=multipart/form-data without requiring a file upload;
+    automatically set the mime boundary.
+    <https://github.com/jhy/jsoup/pull/1058>
+
+  * Improvement: Jsoup will now detect if an input file or URL is binary, and will refuse to attempt to parse it, with
+    an IO exception. This prevents runaway processing time and wasted effort creating meaningless parsed DOM trees.
+    <https://github.com/jhy/jsoup/issues/1192>
+
+  * Bugfix: when using the tag case preserving parsing settings, certain HTML tree building rules where not followed
+    for upper case tags.
+    <https://github.com/jhy/jsoup/issues/1149>
+
+  * Bugfix: when converting a Jsoup document to a W3C DOM, if an element is namespaced but not in a defined namespace,
+    set it to the global namespace.
+    <https://github.com/jhy/jsoup/issues/848>
+
+  * Bugfix: attributes created with the Attribute constructor with just spaces for names would incorrectly pass
+    validation.
+    <https://github.com/jhy/jsoup/issues/1159>
+
+  * Bugfix: some pseudo XML Declarations were incorrectly handled when using the XML Parser, leading to an IOOB
+    exception when parsing.
+    <https://github.com/jhy/jsoup/issues/1139>
+
+  * Bugfix: when parsing URL parameter names in an attribute that is not correctly HTML encoded, and near the end of the
+    current buffer, those parameters may be incorrectly dropped. (Improved CharacterReader mark/reset support.)
+    <https://github.com/jhy/jsoup/pull/1154>
+
+  * Bugfix: boolean attribute values would be returned as null, vs an empty string, when accessed via the
+    Attribute#getValue() method.
+    <https://github.com/jhy/jsoup/issues/1065>
+
+  * Bugix: orphan Attribute objects (i.e. created outside of a parse or an Element) would throw an NPE on
+    Attribute#setValue(val)
+    <https://github.com/jhy/jsoup/issues/1107>
+
+  * Bugfix: Element.shallowClone() was not making a clone of its attributes.
+    <https://github.com/jhy/jsoup/issues/1201>
+
+  * Bugfix: fixed an ArrayIndexOutOfBoundsException in HttpConnection.looksLikeUtf8 when testing small strings in
+    specific ranges.
+    <https://github.com/jhy/jsoup/issues/1172>
+
+  * Updated jetty-server (which is used for integration tests) to latest 9.2 series (9.2.28).
+
+*** Release 1.11.3 [2018-Apr-15]
+  * Improvement: CDATA sections are now treated as whitespace preserving (regardless of the containing element), and are
+    round-tripped into output HTML.
+    <https://github.com/jhy/jsoup/issues/406>
+    <https://github.com/jhy/jsoup/issues/965>
+
+  * Improvement: added support for Deflate encoding.
+    <https://github.com/jhy/jsoup/pull/982>
+
+  * Improvement: when parsing <pre> tags, skip the first newline if present.
+    <https://github.com/jhy/jsoup/issues/825>
+
+  * Improvement: support nested quotes for attribute selection queries.
+    <https://github.com/jhy/jsoup/pull/988>
+
+  * Improvement: character references from Windows-1252 that are not valid Unicode are mapped to the appropriate
+    Unicode replacement.
+    <https://github.com/jhy/jsoup/pull/1046>
+
+  * Improvement: accept a custom SSL socket factory in Jsoup.Connection.
+    <https://github.com/jhy/jsoup/pull/1038>
+
+  * Bugfix: "Mark has been invalidated" exception was thrown when parsing some URLs on Android <= 6.
+    <https://github.com/jhy/jsoup/issues/990>
+
+  * Bugfix: The Element.text() for <div>One</div>Two was "OneTwo", not "One Two".
+    <https://github.com/jhy/jsoup/issues/812>
+
+  * Bugfix: boolean attributes with empty string values were not collapsing in HTML output.
+    <https://github.com/jhy/jsoup/issues/985>
+
+  * Bugfix: when using the XML Parser set to lowercase normalize tags, uppercase closing tags were not correctly
+    handled.
+    <https://github.com/jhy/jsoup/issues/998>
+
+  * Bugfix: when parsing from a URL, an end tag could be read incorrectly if it started on a buffer boundary.
+    <https://github.com/jhy/jsoup/issues/995>
+
+  * Bugfix: when parsing from a URL, if the remote server failed to complete its write (i.e. it writes less than the
+    Content Length header promised on a gzipped stream), the parse method would incorrectly throw an unchecked
+    exception. It now throws the declared IOException.
+    <https://github.com/jhy/jsoup/issues/980>
+
+  * Bugfix: leaf nodes (such as text nodes) where throwing an unsupported operation exception on childNodes(), instead
+    of just returning an empty list.
+    <https://github.com/jhy/jsoup/issues/1032>
+
+  * Bugfix: documents with a leading UTF-8 BOM did not have that BOM consumed, so it acted as a zero width no-break
+    space, which could impact the parse tree.
+    <https://github.com/jhy/jsoup/issues/1003>
+
+  * Bugfix: when parsing an invalid XML declaration, the parse would fail.
+    <https://github.com/jhy/jsoup/issues/1015>
+
+*** Release 1.11.2 [2017-Nov-19]
+  * Improvement: added a new pseudo selector :matchText, which allows text nodes to match as if they were elements.
+    This enables finding text that is only marked by a "br" tag, for example.
+    <https://github.com/jhy/jsoup/issues/550>
+
+  * Change: marked Connection.validateTLSCertificates() as deprecated.
+
+  * Improvement: normalize invisible characters (like soft-hyphens) in Element.text().
+    <https://github.com/jhy/jsoup/issues/978>
+
+  * Improvement: added Element.wholeText(), to easily get the un-normalized text value of an element and its children.
+    <https://github.com/jhy/jsoup/pull/564>
+    
+  * Bugfix: in a deep DOM stack, a StackOverFlow exception could occur when generating implied end tags.
+    <https://github.com/jhy/jsoup/issues/966>
+
+  * Bugfix: when parsing attribute values that happened to cross a buffer boundary, a character was dropped.
+    <https://github.com/jhy/jsoup/issues/967>
+
+  * Bugfix: fixed an issue that prevented using infinite timeouts in Jsoup.Connection.
+    <https://github.com/jhy/jsoup/issues/968>
+
+  * Bugfix: whitespace preserving tags were not honoured when nested deeper than two levels deep.
+    <https://github.com/jhy/jsoup/issues/722>
+
+  * Bugfix: an unterminated comment token at the end of the HTML input would cause an out of bounds exception.
+    <https://github.com/jhy/jsoup/issues/972>
+
+  * Bugfix: an NPE in the Cleaner which would occur if an <a href> attribute value was missing.
+    <https://github.com/jhy/jsoup/issues/973>
+
+  * Bugfix: when serializing the same document in a multiple threads, on Android, with a character set that is not ascii
+    or UTF-8, an encoding exception could occur.
+    <https://github.com/jhy/jsoup/issues/970>
+
+  * Bugfix: removing a form value from the DOM would not remove it from FormData.
+    <https://github.com/jhy/jsoup/pull/969>
+
+  * Bugfix: in the W3CDom transformer, siblings were incorrectly inheriting namespaces defined on previous siblings.
+    <https://github.com/jhy/jsoup/issues/977>
+
+*** Release 1.11.1 [2017-Nov-06]
+  * Updated language level to Java 7 from Java 5. To maintain Android support (of minversion 8), try-with-resources are
+    not used.
+    <https://github.com/jhy/jsoup/issues/899>
+
+  * When loading content from a URL or a file, the content is now parsed as it streams in from the network or disk,
+    rather than being fully buffered before parsing. This substantially reduces memory consumption & large garbage
+    objects when loading large files. Note that this change means that a response, once parsed, may not be parsed
+    again from the same response object unless you call response.bufferUp() first, which will buffer the full response
+    into memory.
+    <https://github.com/jhy/jsoup/issues/904>
+
+  * Added Connection.Response.bodyStream(), a method to get the response body as an input stream. This is useful for
+    saving a large response straight to a file, without buffering fully into memory first.
+
+  * Performance improvements in text and HTML generation (through less GC).
+
+  * Reduced memory consumption of text, scripts, and comments in the DOM by 40%, by refactoring the node
+    hierarchy to not track childnodes or attributes by default for lead nodes. For the average document, that's about a
+    30% memory reduction.
+    <https://github.com/jhy/jsoup/issues/911>
+
+  * Reduced memory consumption of Elements by refactoring their Attributes to be a simple pair of arrays, vs a
+    LinkedHashSet.
+    <https://github.com/jhy/jsoup/issues/911>
+
+  * Added support for Element.selectFirst(query), to efficiently find the first matching element.
+
+  * Added Element.appendTo(parent) to simplify slinging elements about.
+    <https://github.com/jhy/jsoup/pull/662>
+
+  * Added support for multiple headers with the same name in Jsoup.Connect
+
+  * Added Element.shallowClone() and Node.shallowClone(), to allow cloning nodes without getting all their children.
+    <https://github.com/jhy/jsoup/issues/900>
+
+  * Updated Element.text() and the :contains(text) selector to consider &nbsp; character as spaces.
+
+  * Updated Jsoup.connect().timeout() to implement a total connect + combined read timeout. Previously it specified
+    connect and buffer read times only, so to implement a combined total timeout, you had to have another thread send
+    an interupt.
+
+  * Improved performance of Node.addChildren (was quadratic)
+    <https://github.com/jhy/jsoup/pull/930>
+
+  * Added missing support for template tags in tables
+    <https://github.com/jhy/jsoup/pull/901>
+
+  * In Jsoup.connect file uploads, added the ability to set the uploaded files' mimetype.
+     <https://github.com/jhy/jsoup/issues/936>
+
+  * Improved Node traversal, including less object creation, and partial and filtering traversor support.
+    <https://github.com/jhy/jsoup/pull/849>
+
+  * Bugfix: if a document was was redecoded after character set detection, the HTML parser was not reset correctly,
+    which could lead to an incorrect DOM.
+    <https://github.com/jhy/jsoup/issues/877>
+
+  * Bugfix: attributes with the same name but different case would be incorrectly treated as different attributes.
+    <https://github.com/jhy/jsoup/pull/903>
+
+  * Bugfix: self-closing tags for known empty elements were incorrectly treated as errors.
+    <https://github.com/jhy/jsoup/issues/868>
+
+  * Bugfix: fixed an issue where a self-closing title, noframes, or style tag would cause the rest of the page to be
+    incorrectly parsed as data or text.
+    <https://github.com/jhy/jsoup/issues/906>
+
+  * Bugfix: fixed an issue with unknown mixed-case tags
+    <https://github.com/jhy/jsoup/pull/942>
+
+  * Bugfix: fixed an issue where the entity resources were left open after startup, causing a warning.
+    <https://github.com/jhy/jsoup/pull/928>
+
+  * Bugfix: fixed an issue where Element.getElementsByIndexLessThan(index) would incorrectly provide the root element
+    <https://github.com/jhy/jsoup/pull/918>
+
+  * Improved parse time for pages with exceptionally deeply nested tags.
+    <https://github.com/jhy/jsoup/issues/955>
+
+  * Improvement / workaround: modified the Entities implementation to load its data from a .class vs from a jar resource.
+    Faster, and safer on Android.
+    <https://github.com/jhy/jsoup/issues/959>
+
+*** Release 1.10.3 [2017-Jun-11]
+  * Added Elements.eachText() and Elements.eachAttr(name), which return a list of Element's text or attribute values,
+    respectively. This makes it simpler to for example get a list of each URL on a page:
+    List<String> urls = doc.select("a").eachAttr("abs:href"");
+
+  * Improved selector validation for :contains(...) with unbalanced quotes.
+    <https://github.com/jhy/jsoup/issues/803>
+
+  * Improved the speed of index based CSS selectors and other methods that use elementSiblingIndex, by a factor of 34x.
+    <https://github.com/jhy/jsoup/pull/862>
+
+  * Added Node.clearAttributes(), to simplify removing of all attributes of a Node / Element.
+    <https://github.com/jhy/jsoup/issues/829>
+
+  * Bugfix: if an attribute name started or ended with a control character, the parse would fail with a validation
+    exception.
+    <https://github.com/jhy/jsoup/issues/793>
+
+  * Bugfix: Element.hasClass() and the ".classname" selector would not find the class attribute case-insensitively.
+    <https://github.com/jhy/jsoup/issues/814>
+
+  * Bugfix: In Jsoup.Connection, if a redirect contained a query string with %xx escapes, they would be double escaped
+    before the redirect was followed, leading to fetching an incorrect location.
+
+  * Bugfix: In Jsoup.Connection, if a request body was set and the connection was redirected, the body would incorrectly
+    still be sent.
+    <https://github.com/jhy/jsoup/pull/881>
+
+  * Bugfix: In DataUtil when detecting the character set from meta data, and there are two Content-Types defined, use
+    the one that defines a character set.
+    <https://github.com/jhy/jsoup/pull/835>
+
+  * Bugfix: when parsing unknown tags in case-sensitive HTML mode, end tags would not close scope correctly.
+    <https://github.com/jhy/jsoup/issues/819>
+
+  * In Jsoup.Connection, ensure there is no Content-Type set when being redirected to a GET.
+    <https://github.com/jhy/jsoup/pull/895>
+
+  * Bugfix: in certain locales (Turkey specifically), lowercasing and case insensitivity could fail for specific items.
+    <https://github.com/jhy/jsoup/pull/820>
+
+  * Bugfix: after an element was cloned, changes to its child list where not notifying the element correctly.
+    <https://github.com/jhy/jsoup/issues/951>
+
 *** Release 1.10.2 [2017-Jan-02]
 * Improved startup time, particularly on Android, by reducing garbage generation and CPU execution time when loading
   the HTML entity files. About 1.72x faster in this area.

--- a/LICENSE
+++ b/LICENSE
 The MIT License

-© 2009-2017, Jonathan Hedley <jonathan@hedley.net>
+Copyright (c) 2009-2019 Jonathan Hedley <jonathan@hedley.net>

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -9,13 +9,13 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README
+++ b/README
-jsoup: Java HTML parser that makes sense of real-world HTML soup.
-
-jsoup is a Java library for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jquery-like methods.
-
-jsoup implements the WHATWG HTML5 specification (http://whatwg.org/html), and parses HTML to the same DOM as modern browsers do.
-
-* parse HTML from a URL, file, or string
-* find and extract data, using DOM traversal or CSS selectors
-* manipulate the HTML elements, attributes, and text
-* clean user-submitted content against a safe white-list, to prevent XSS
-* output tidy HTML
-
-jsoup is designed to deal with all varieties of HTML found in the wild; from pristine and validating, to invalid tag-soup; jsoup will create a sensible parse tree.
-
-jsoup runs on Java 1.5 and up.
-
-See https://jsoup.org/ for downloads and documentation.
--- a/README.md
+++ b/README.md
+# jsoup: Java HTML Parser
+
+**jsoup** is a Java library for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jquery-like methods.
+
+
+**jsoup** implements the [WHATWG HTML5](http://whatwg.org/html) specification, and parses HTML to the same DOM as modern browsers do.
+
+* scrape and [parse](https://jsoup.org/cookbook/input/parse-document-from-string) HTML from a URL, file, or string
+* find and [extract data](https://jsoup.org/cookbook/extracting-data/selector-syntax), using DOM traversal or CSS selectors
+* manipulate the [HTML elements](https://jsoup.org/cookbook/modifying-data/set-html), attributes, and text
+* [clean](https://jsoup.org/cookbook/cleaning-html/whitelist-sanitizer) user-submitted content against a safe white-list, to prevent XSS attacks
+* output tidy HTML
+
+jsoup is designed to deal with all varieties of HTML found in the wild; from pristine and validating, to invalid tag-soup; jsoup will create a sensible parse tree.
+
+See [**jsoup.org**](https://jsoup.org/) for downloads and the full [API documentation](https://jsoup.org/apidocs/).
+
+[![Build Status](https://travis-ci.org/jhy/jsoup.svg?branch=master)](https://travis-ci.org/jhy/jsoup)
+
+## Example
+Fetch the [Wikipedia](http://en.wikipedia.org/wiki/Main_Page) homepage, parse it to a [DOM](https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Introduction), and select the headlines from the *In the News* section into a list of [Elements](https://jsoup.org/apidocs/index.html?org/jsoup/select/Elements.html):
+
+```java
+Document doc = Jsoup.connect("http://en.wikipedia.org/").get();
+log(doc.title());
+Elements newsHeadlines = doc.select("#mp-itn b a");
+for (Element headline : newsHeadlines) {
+  log("%s\n\t%s", 
+    headline.attr("title"), headline.absUrl("href"));
+}
+```
+[Online sample](https://try.jsoup.org/~LGB7rk_atM2roavV0d-czMt3J_g), [full source](https://github.com/jhy/jsoup/blob/master/src/main/java/org/jsoup/examples/Wikipedia.java).
+
+## Open source
+jsoup is an open source project distributed under the liberal [MIT license](https://jsoup.org/license). The source code is available at [GitHub](https://github.com/jhy/jsoup/tree/master/src/main/java/org/jsoup).
+
+## Getting started
+1. [Download](https://jsoup.org/download) the latest jsoup jar (or add it to your Maven/Gradle build)
+2. Read the [cookbook](https://jsoup.org/cookbook/)
+3. Enjoy!
+
+## Development and support
+If you have any questions on how to use jsoup, or have ideas for future development, please get in touch via the [mailing list](https://jsoup.org/discussion).
+
+If you find any issues, please file a [bug](https://jsoup.org/bugs) after checking for duplicates.
+
+The [colophon](https://jsoup.org/colophon) talks about the history of and tools used to build jsoup.
+
+## Status
+jsoup is in general, stable release.
--- a/debian/compat
+++ b/debian/compat
-11
--- a/debian/control
+++ b/debian/control
@@ -3,18 +3,17 @@ Section: java
 Priority: optional
 Maintainer: Debian Java Maintainers <pkg-java-maintainers@lists.alioth.debian.org>
 Uploaders:
- Torsten Werner <twerner@debian.org>,
- Jakub Adam <jakub.adam@ktknet.cz>,
- Emmanuel Bourg <ebourg@apache.org>
+ Torsten Werner <twerner@debian.org>
+ , Jakub Adam <jakub.adam@ktknet.cz>
+ , Emmanuel Bourg <ebourg@apache.org>
+ , Mechtilde Stehmann <mechtilde@debian.org>
 Build-Depends:
- debhelper (>= 11),
- default-jdk,
- default-jdk-doc,
- libgoogle-gson-java,
- libmaven-bundle-plugin-java,
- libmaven-javadoc-plugin-java,
- maven-debian-helper (>= 1.4),
- junit4
+ debhelper-compat (= 12)
+ , default-jdk
+ , libgoogle-gson-java
+ , libjetty9-java
+ , maven-debian-helper (>= 1.4)
+ , junit4
 Standards-Version: 4.1.4
 Vcs-Git: https://anonscm.debian.org/git/pkg-java/jsoup.git
 Vcs-Browser: https://anonscm.debian.org/cgit/pkg-java/jsoup.git

--- a/debian/copyright
+++ b/debian/copyright
@@ -18,6 +18,7 @@ License: MIT
 Files: debian/*
 Copyright: 2011, Torsten Werner <twerner@debian.org>
           2014-2016, Debian Java Maintainers <pkg-java-maintainers@lists.alioth.debian.org>
+           2019, Mechtilde Stehmann <mechtilde@debian.org>
 License: MIT

 Files: debian/patches/dfsg-free-test-data.patch

--- a/debian/maven.rules
+++ b/debian/maven.rules
@@ -18,3 +18,5 @@

 junit junit jar s/4\..*/4.x/ * *
 org.jsoup jsoup jar s/.*/debian/ * *
+com.google.code.gson gson * s/2.7/2.8*/ * test
+org.eclipse.jetty jetty-servlet * s/9.2.28.v20190418/9.4.8/ * test
--- a/debian/salsa-ci.yml
+++ b/debian/salsa-ci.yml
+include:
+ - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml
+ - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml
--- a/pom.xml
+++ b/pom.xml
@@ -5,30 +5,30 @@

  <groupId>org.jsoup</groupId>
  <artifactId>jsoup</artifactId>
-  <version>1.10.2</version>
+  <version>1.12.1</version>
  <description>jsoup is a Java library for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jquery-like methods. jsoup implements the WHATWG HTML5 specification, and parses HTML to the same DOM as modern browsers do.</description>
  <url>https://jsoup.org/</url>
  <inceptionYear>2009</inceptionYear>
  <issueManagement>
-  	<system>GitHub</system>
-  	<url>http://github.com/jhy/jsoup/issues</url>
+    <system>GitHub</system>
+    <url>https://github.com/jhy/jsoup/issues</url>
  </issueManagement>
  <licenses>
-  	<license>
-  		<name>The MIT License</name>
-  		<url>https://jsoup.org/license</url>
-  		<distribution>repo</distribution>
-  	</license>
+    <license>
+      <name>The MIT License</name>
+      <url>https://jsoup.org/license</url>
+      <distribution>repo</distribution>
+    </license>
  </licenses>
  <scm>
-  	<url>https://github.com/jhy/jsoup</url>
+    <url>https://github.com/jhy/jsoup</url>
    <connection>scm:git:https://github.com/jhy/jsoup.git</connection>
    <!-- <developerConnection>scm:git:git@github.com:jhy/jsoup.git</developerConnection> -->
-    <tag>jsoup-1.10.2</tag>
+    <tag>jsoup-1.12.1</tag>
  </scm>
  <organization>
-  	<name>Jonathan Hedley</name>
-  	<url>http://jonathanhedley.com/</url>
+    <name>Jonathan Hedley</name>
+    <url>https://jhy.io/</url>
  </organization>

  <build>
@@ -38,16 +38,16 @@
        <artifactId>maven-compiler-plugin</artifactId>
        <version>3.5.1</version>
        <configuration>
-          <source>1.5</source>
-          <target>1.5</target>
+          <source>1.7</source>
+          <target>1.7</target>
          <encoding>UTF-8</encoding>
        </configuration>
      </plugin>
      <plugin>
-      	<!-- this plugin allows us to ensure Java 5 API compatibility -->
+        <!-- this plugin allows us to ensure Java 7 API compatibility -->
        <groupId>org.codehaus.mojo</groupId>
        <artifactId>animal-sniffer-maven-plugin</artifactId>
-        <version>1.15</version>
+        <version>1.16</version>
        <executions>
          <execution>
            <id>animal-sniffer</id>
@@ -58,9 +58,14 @@
            <configuration>
              <signature>
                <groupId>org.codehaus.mojo.signature</groupId>
-                <artifactId>java15</artifactId>
+                <artifactId>java17</artifactId>
                <version>1.0</version>
              </signature>
+              <signature>
+                <groupId>net.sf.androidscents.signature</groupId>
+                <artifactId>android-api-level-8</artifactId>
+                <version>2.2_r3</version>
+              </signature>
            </configuration>
          </execution>
        </executions>
@@ -68,7 +73,7 @@
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-javadoc-plugin</artifactId>
-        <version>2.10.4</version>
+        <version>3.0.0-M1</version>
        <configuration>
          <additionalparam>-Xdoclint:none</additionalparam>
        </configuration>
@@ -87,6 +92,9 @@
        <artifactId>maven-source-plugin</artifactId>
        <version>3.0.1</version>
        <configuration>
+    <excludes>
+      <exclude>org/jsoup/examples/**</exclude>
+    </excludes>
        </configuration>
        <executions>
          <execution>
@@ -104,8 +112,14 @@
        <version>3.0.2</version>
        <configuration>
          <archive>
+            <manifestEntries>
+              <Automatic-Module-Name>org.jsoup</Automatic-Module-Name>
+            </manifestEntries>
            <manifestFile>${project.build.outputDirectory}/META-INF/MANIFEST.MF</manifestFile>
          </archive>
+    <excludes>
+      <exclude>org/jsoup/examples/**</exclude>
+    </excludes>
        </configuration>
      </plugin>
      <plugin>
@@ -150,7 +164,7 @@
        <filtering>false</filtering>
        <includes>
          <include>LICENSE</include>
-          <include>README</include>
+          <include>README.md</include>
          <include>CHANGES</include>
        </includes>
      </resource>
@@ -217,11 +231,27 @@
      <scope>test</scope>
    </dependency>

+    <dependency>
+      <!-- jetty for webserver integration tests. 9.2 is last with Java7 support -->
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-server</artifactId>
+      <version>9.2.26.v20180806</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <!-- jetty for webserver integration tests -->
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-servlet</artifactId>
+      <version>9.2.28.v20190418</version>
+      <scope>test</scope>
+    </dependency>
+
  </dependencies>

  <dependencyManagement>
-  	<dependencies>
-  	</dependencies>
+    <dependencies>
+    </dependencies>
  </dependencyManagement>

  <properties>

--- a/src/main/java/org/jsoup/Connection.java
+++ b/src/main/java/org/jsoup/Connection.java
@@ -3,11 +3,14 @@ package org.jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.parser.Parser;

+import javax.net.ssl.SSLSocketFactory;
+import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.Proxy;
 import java.net.URL;
 import java.util.Collection;
+import java.util.List;
 import java.util.Map;

 /**
@@ -83,10 +86,13 @@ public interface Connection {
    Connection userAgent(String userAgent);

    /**
-     * Set the request timeouts (connect and read). If a timeout occurs, an IOException will be thrown. The default
-     * timeout is <b<30 seconds</b> (30000 millis). A timeout of zero is treated as an infinite timeout.
+     * Set the total request timeout duration. If a timeout occurs, an {@link java.net.SocketTimeoutException} will be thrown.
+     * <p>The default timeout is <b>30 seconds</b> (30,000 millis). A timeout of zero is treated as an infinite timeout.
+     * <p>Note that this timeout specifies the combined maximum duration of the connection time and the time to read
+     * the full response.
     * @param millis number of milliseconds (thousandths of a second) before timing out connects or reads.
     * @return this Connection, for chaining
+     * @see #maxBodySize(int)
     */
    Connection timeout(int millis);

@@ -140,23 +146,11 @@ public interface Connection {
    Connection ignoreContentType(boolean ignoreContentType);

    /**
-     * Disable/enable TLS certificates validation for HTTPS requests.
-     * <p>
-     * By default this is <b>true</b>; all
-     * connections over HTTPS perform normal validation of certificates, and will abort requests if the provided
-     * certificate does not validate.
-     * </p>
-     * <p>
-     * Some servers use expired, self-generated certificates; or your JDK may not
-     * support SNI hosts. In which case, you may want to enable this setting.
-     * </p>
-     * <p>
-     * <b>Be careful</b> and understand why you need to disable these validations.
-     * </p>
-     * @param value if should validate TLS (SSL) certificates. <b>true</b> by default.
+     * Set custom SSL socket factory
+     * @param sslSocketFactory custom SSL socket factory
     * @return this Connection, for chaining
     */
-    Connection validateTLSCertificates(boolean value);
+    Connection sslSocketFactory(SSLSocketFactory sslSocketFactory);

    /**
     * Add a request data parameter. Request parameters are sent in the request query string for GETs, and in the
@@ -176,9 +170,23 @@ public interface Connection {
     * @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}.
     * You must close the InputStream in a {@code finally} block.
     * @return this Connections, for chaining
+     * @see #data(String, String, InputStream, String) if you want to set the uploaded file's mimetype.
     */
    Connection data(String key, String filename, InputStream inputStream);

+    /**
+     * Add an input stream as a request data parameter. For GETs, has no effect, but for POSTS this will upload the
+     * input stream.
+     * @param key data key (form item name)
+     * @param filename the name of the file to present to the remove server. Typically just the name, not path,
+     * component.
+     * @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}.
+     * @param contentType the Content Type (aka mimetype) to specify for this file.
+     * You must close the InputStream in a {@code finally} block.
+     * @return this Connections, for chaining
+     */
+    Connection data(String key, String filename, InputStream inputStream, String contentType);
+
    /**
     * Adds all of the supplied data to the request data parameters
     * @param data collection of data parameters
@@ -360,7 +368,8 @@ public interface Connection {
        T method(Method method);

        /**
-         * Get the value of a header. This is a simplified header model, where a header may only have one value.
+         * Get the value of a header. If there is more than one header value with the same name, the headers are returned
+         * comma seperated, per <a href="https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2">rfc2616-sec4</a>.
         * <p>
         * Header names are case insensitive.
         * </p>
@@ -372,13 +381,30 @@ public interface Connection {
        String header(String name);

        /**
-         * Set a header. This method will overwrite any existing header with the same case insensitive name.
+         * Get the values of a header.
+         * @param name header name, case insensitive.
+         * @return a list of values for this header, or an empty list if not set.
+         */
+        List<String> headers(String name);
+
+        /**
+         * Set a header. This method will overwrite any existing header with the same case insensitive name. (If there
+         * is more than one value for this header, this method will update the first matching header.
         * @param name Name of header
         * @param value Value of header
         * @return this, for chaining
+         * @see #addHeader(String, String)
         */
        T header(String name, String value);

+        /**
+         * Add a header. The header will be added regardless of whether a header with the same name already exists.
+         * @param name Name of new header
+         * @param value Value of new header
+         * @return this, for chaining
+         */
+        T addHeader(String name, String value);
+
        /**
         * Check if a header is present
         * @param name name of header (case insensitive)
@@ -395,18 +421,29 @@ public interface Connection {
        boolean hasHeaderWithValue(String name, String value);

        /**
-         * Remove a header by name
+         * Remove headers by name. If there is more than one header with this name, they will all be removed.
         * @param name name of header to remove (case insensitive)
         * @return this, for chaining
         */
        T removeHeader(String name);

        /**
-         * Retrieve all of the request/response headers as a map
+         * Retrieve all of the request/response header names and corresponding values as a map. For headers with multiple
+         * values, only the first header is returned.
+         * <p>Note that this is a view of the headers only, and changes made to this map will not be reflected in the
+         * request/response object.</p>
         * @return headers
+         * @see #multiHeaders()
+
         */
        Map<String, String> headers();

+        /**
+         * Retreive all of the headers, keyed by the header name, and with a list of values per header.
+         * @return a list of multiple values per header.
+         */
+        Map<String, List<String>> multiHeaders();
+
        /**
         * Get a cookie value by name from this request/response.
         * <p>
@@ -540,16 +577,16 @@ public interface Connection {
        Request ignoreContentType(boolean ignoreContentType);

        /**
-         * Get the current state of TLS (SSL) certificate validation.
-         * @return true if TLS cert validation enabled
+         * Get the current custom SSL socket factory, if any.
+         * @return custom SSL socket factory if set, null otherwise
         */
-        boolean validateTLSCertificates();
+        SSLSocketFactory sslSocketFactory();

        /**
-         * Set TLS certificate validation.
-         * @param value set false to ignore TLS (SSL) certificates
+         * Set a custom SSL socket factory.
+         * @param sslSocketFactory SSL socket factory
         */
-        void validateTLSCertificates(boolean value);
+        void sslSocketFactory(SSLSocketFactory sslSocketFactory);

        /**
         * Add a data parameter to the request
@@ -647,7 +684,8 @@ public interface Connection {
        String contentType();

        /**
-         * Parse the body of the response as a Document.
+         * Read and parse the body of the response as a Document. If you intend to parse the same response multiple
+         * times, you should {@link #bufferUp()} first.
         * @return a parsed Document
         * @throws IOException on error
         */
@@ -664,10 +702,27 @@ public interface Connection {
         * @return body bytes
         */
        byte[] bodyAsBytes();
+
+        /**
+         * Read the body of the response into a local buffer, so that {@link #parse()} may be called repeatedly on the
+         * same connection response (otherwise, once the response is read, its InputStream will have been drained and
+         * may not be re-read). Calling {@link #body() } or {@link #bodyAsBytes()} has the same effect.
+         * @return this response, for chaining
+         * @throws UncheckedIOException if an IO exception occurs during buffering.
+         */
+        Response bufferUp();
+
+        /**
+         * Get the body of the response as a (buffered) InputStream. You should close the input stream when you're done with it.
+         * Other body methods (like bufferUp, body, parse, etc) will not work in conjunction with this method.
+         * <p>This method is useful for writing large responses to disk, without buffering them completely into memory first.</p>
+         * @return the response body input stream
+         */
+        BufferedInputStream bodyStream();
    }

    /**
-     * A Key Value tuple.
+     * A Key:Value tuple(+), used for form data.
     */
    interface KeyVal {

@@ -715,5 +770,20 @@ public interface Connection {
         * @return true if this keyval does indeed have an input stream
         */
        boolean hasInputStream();
+
+        /**
+         * Set the Content Type header used in the MIME body (aka mimetype) when uploading files.
+         * Only useful if {@link #inputStream(InputStream)} is set.
+         * <p>Will default to {@code application/octet-stream}.</p>
+         * @param contentType the new content type
+         * @return this KeyVal
+         */
+        KeyVal contentType(String contentType);
+
+        /**
+         * Get the current Content Type, or {@code null} if not set.
+         * @return the current Content Type.
+         */
+        String contentType();
    }
 }
--- a/src/main/java/org/jsoup/UncheckedIOException.java
+++ b/src/main/java/org/jsoup/UncheckedIOException.java
+package org.jsoup;
+
+import java.io.IOException;
+
+public class UncheckedIOException extends RuntimeException {
+    public UncheckedIOException(IOException cause) {
+        super(cause);
+    }
+
+    public UncheckedIOException(String message) {
+        super(new IOException(message));
+    }
+
+    public IOException ioException() {
+        return (IOException) getCause();
+    }
+}
--- a/src/main/java/org/jsoup/examples/HtmlToPlainText.java
+++ b/src/main/java/org/jsoup/examples/HtmlToPlainText.java
 package org.jsoup.examples;

 import org.jsoup.Jsoup;
-import org.jsoup.helper.StringUtil;
+import org.jsoup.internal.StringUtil;
 import org.jsoup.helper.Validate;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
@@ -60,8 +60,7 @@ public class HtmlToPlainText {
     */
    public String getPlainText(Element element) {
        FormattingVisitor formatter = new FormattingVisitor();
-        NodeTraversor traversor = new NodeTraversor(formatter);
-        traversor.traverse(element); // walk the DOM, and call .head() and .tail() for each node
+        NodeTraversor.traverse(formatter, element); // walk the DOM, and call .head() and .tail() for each node

        return formatter.toString();
    }

--- a/src/main/java/org/jsoup/examples/Wikipedia.java
+++ b/src/main/java/org/jsoup/examples/Wikipedia.java
+package org.jsoup.examples;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+
+/**
+ * A simple example, used on the jsoup website.
+ */
+public class Wikipedia {
+    public static void main(String[] args) throws IOException {
+        Document doc = Jsoup.connect("http://en.wikipedia.org/").get();
+        log(doc.title());
+
+        Elements newsHeadlines = doc.select("#mp-itn b a");
+        for (Element headline : newsHeadlines) {
+            log("%s\n\t%s", headline.attr("title"), headline.absUrl("href"));
+        }
+    }
+
+    private static void log(String msg, String... vals) {
+        System.out.println(String.format(msg, vals));
+    }
+}
--- a/src/main/java/org/jsoup/helper/ChangeNotifyingArrayList.java
+++ b/src/main/java/org/jsoup/helper/ChangeNotifyingArrayList.java
+package org.jsoup.helper;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+/**
+ * Implementation of ArrayList that watches out for changes to the contents.
+ */
+public abstract class ChangeNotifyingArrayList<E> extends ArrayList<E> {
+    public ChangeNotifyingArrayList(int initialCapacity) {
+        super(initialCapacity);
+    }
+
+    public abstract void onContentsChanged();
+
+    @Override
+    public E set(int index, E element) {
+        onContentsChanged();
+        return super.set(index, element);
+    }
+
+    @Override
+    public boolean add(E e) {
+        onContentsChanged();
+        return super.add(e);
+    }
+
+    @Override
+    public void add(int index, E element) {
+        onContentsChanged();
+        super.add(index, element);
+    }
+
+    @Override
+    public E remove(int index) {
+        onContentsChanged();
+        return super.remove(index);
+    }
+
+    @Override
+    public boolean remove(Object o) {
+        onContentsChanged();
+        return super.remove(o);
+    }
+
+    @Override
+    public void clear() {
+        onContentsChanged();
+        super.clear();
+    }
+
+    @Override
+    public boolean addAll(Collection<? extends E> c) {
+        onContentsChanged();
+        return super.addAll(c);
+    }
+
+    @Override
+    public boolean addAll(int index, Collection<? extends E> c) {
+        onContentsChanged();
+        return super.addAll(index, c);
+    }
+
+    @Override
+    protected void removeRange(int fromIndex, int toIndex) {
+        onContentsChanged();
+        super.removeRange(fromIndex, toIndex);
+    }
+
+    @Override
+    public boolean removeAll(Collection<?> c) {
+        onContentsChanged();
+        return super.removeAll(c);
+    }
+
+    @Override
+    public boolean retainAll(Collection<?> c) {
+        onContentsChanged();
+        return super.retainAll(c);
+    }
+
+}
--- a/src/main/java/org/jsoup/helper/DataUtil.java
+++ b/src/main/java/org/jsoup/helper/DataUtil.java
 package org.jsoup.helper;

+import org.jsoup.UncheckedIOException;
+import org.jsoup.internal.ConstrainableInputStream;
+import org.jsoup.internal.StringUtil;
+import org.jsoup.nodes.Comment;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
 import org.jsoup.nodes.XmlDeclaration;
 import org.jsoup.parser.Parser;
+import org.jsoup.select.Elements;

-import java.io.ByteArrayOutputStream;
+import java.io.BufferedReader;
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.io.OutputStream;
-import java.io.RandomAccessFile;
+import java.nio.Buffer;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.IllegalCharsetNameException;
@@ -24,9 +32,10 @@ import java.util.regex.Pattern;
 *
 */
 public final class DataUtil {
-    private static final Pattern charsetPattern = Pattern.compile("(?i)\\bcharset=\\s*(?:\"|')?([^\\s,;\"']*)");
+    private static final Pattern charsetPattern = Pattern.compile("(?i)\\bcharset=\\s*(?:[\"'])?([^\\s,;\"']*)");
    static final String defaultCharset = "UTF-8"; // used if not found in header or meta charset
-    private static final int bufferSize = 60000;
+    private static final int firstReadBufferSize = 1024 * 5;
+    static final int bufferSize = 1024 * 32;
    private static final char[] mimeBoundaryChars =
            "-_1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray();
    static final int boundaryLength = 32;
@@ -42,8 +51,7 @@ public final class DataUtil {
     * @throws IOException on IO error
     */
    public static Document load(File in, String charsetName, String baseUri) throws IOException {
-        ByteBuffer byteData = readFileToByteBuffer(in);
-        return parseByteData(byteData, charsetName, baseUri, Parser.htmlParser());
+        return parseInputStream(new FileInputStream(in), charsetName, baseUri, Parser.htmlParser());
    }

    /**
@@ -55,8 +63,7 @@ public final class DataUtil {
     * @throws IOException on IO error
     */
    public static Document load(InputStream in, String charsetName, String baseUri) throws IOException {
-        ByteBuffer byteData = readToByteBuffer(in);
-        return parseByteData(byteData, charsetName, baseUri, Parser.htmlParser());
+        return parseInputStream(in, charsetName, baseUri, Parser.htmlParser());
    }

    /**
@@ -69,8 +76,7 @@ public final class DataUtil {
     * @throws IOException on IO error
     */
    public static Document load(InputStream in, String charsetName, String baseUri, Parser parser) throws IOException {
-        ByteBuffer byteData = readToByteBuffer(in);
-        return parseByteData(byteData, charsetName, baseUri, parser);
+        return parseInputStream(in, charsetName, baseUri, parser);
    }

    /**
@@ -87,54 +93,92 @@ public final class DataUtil {
        }
    }

-    // reads bytes first into a buffer, then decodes with the appropriate charset. done this way to support
-    // switching the chartset midstream when a meta http-equiv tag defines the charset.
-    // todo - this is getting gnarly. needs a rewrite.
-    static Document parseByteData(ByteBuffer byteData, String charsetName, String baseUri, Parser parser) {
-        String docData;
+    static Document parseInputStream(InputStream input, String charsetName, String baseUri, Parser parser) throws IOException  {
+        if (input == null) // empty body
+            return new Document(baseUri);
+        input = ConstrainableInputStream.wrap(input, bufferSize, 0);
+
        Document doc = null;
+        boolean fullyRead = false;
+
+        // read the start of the stream and look for a BOM or meta charset
+        input.mark(bufferSize);
+        ByteBuffer firstBytes = readToByteBuffer(input, firstReadBufferSize - 1); // -1 because we read one more to see if completed. First read is < buffer size, so can't be invalid.
+        fullyRead = input.read() == -1;
+        input.reset();

        // look for BOM - overrides any other header or input
-        charsetName = detectCharsetFromBom(byteData, charsetName);
+        BomCharset bomCharset = detectCharsetFromBom(firstBytes);
+        if (bomCharset != null)
+            charsetName = bomCharset.charset;

        if (charsetName == null) { // determine from meta. safe first parse as UTF-8
+            String docData = Charset.forName(defaultCharset).decode(firstBytes).toString();
+            try {
+                doc = parser.parseInput(docData, baseUri);
+            } catch (UncheckedIOException e) {
+                throw e.ioException();
+            }
+
            // look for <meta http-equiv="Content-Type" content="text/html;charset=gb2312"> or HTML5 <meta charset="gb2312">
-            docData = Charset.forName(defaultCharset).decode(byteData).toString();
-            doc = parser.parseInput(docData, baseUri);
-            Element meta = doc.select("meta[http-equiv=content-type], meta[charset]").first();
+            Elements metaElements = doc.select("meta[http-equiv=content-type], meta[charset]");
            String foundCharset = null; // if not found, will keep utf-8 as best attempt
-            if (meta != null) {
-                if (meta.hasAttr("http-equiv")) {
+            for (Element meta : metaElements) {
+                if (meta.hasAttr("http-equiv"))
                    foundCharset = getCharsetFromContentType(meta.attr("content"));
-                }
-                if (foundCharset == null && meta.hasAttr("charset")) {
+                if (foundCharset == null && meta.hasAttr("charset"))
                    foundCharset = meta.attr("charset");
-                }
+                if (foundCharset != null)
+                    break;
            }
+
            // look for <?xml encoding='ISO-8859-1'?>
-            if (foundCharset == null && doc.childNodeSize() > 0 && doc.childNode(0) instanceof XmlDeclaration) {
-                XmlDeclaration prolog = (XmlDeclaration) doc.childNode(0);
-                if (prolog.name().equals("xml")) {
-                    foundCharset = prolog.attr("encoding");
+            if (foundCharset == null && doc.childNodeSize() > 0) {
+                Node first = doc.childNode(0);
+                XmlDeclaration decl = null;
+                if (first instanceof XmlDeclaration)
+                    decl = (XmlDeclaration) first;
+                else if (first instanceof Comment) {
+                    Comment comment = (Comment) first;
+                    if (comment.isXmlDeclaration())
+                        decl = comment.asXmlDeclaration();
+                }
+                if (decl != null) {
+                    if (decl.name().equalsIgnoreCase("xml"))
+                        foundCharset = decl.attr("encoding");
                }
            }
            foundCharset = validateCharset(foundCharset);
-
-            if (foundCharset != null && !foundCharset.equals(defaultCharset)) { // need to re-decode
+            if (foundCharset != null && !foundCharset.equalsIgnoreCase(defaultCharset)) { // need to re-decode. (case insensitive check here to match how validate works)
                foundCharset = foundCharset.trim().replaceAll("[\"']", "");
                charsetName = foundCharset;
-                byteData.rewind();
-                docData = Charset.forName(foundCharset).decode(byteData).toString();
+                doc = null;
+            } else if (!fullyRead) {
                doc = null;
            }
        } else { // specified by content type header (or by user on file load)
            Validate.notEmpty(charsetName, "Must set charset arg to character set of file to parse. Set to null to attempt to detect from HTML");
-            docData = Charset.forName(charsetName).decode(byteData).toString();
        }
        if (doc == null) {
-            doc = parser.parseInput(docData, baseUri);
-            doc.outputSettings().charset(charsetName);
+            if (charsetName == null)
+                charsetName = defaultCharset;
+            BufferedReader reader = new BufferedReader(new InputStreamReader(input, charsetName), bufferSize);
+            if (bomCharset != null && bomCharset.offset) // creating the buffered reader ignores the input pos, so must skip here
+                reader.skip(1);
+            try {
+                doc = parser.parseInput(reader, baseUri);
+            } catch (UncheckedIOException e) {
+                // io exception when parsing (not seen before because reading the stream as we go)
+                throw e.ioException();
+            }
+            Charset charset = Charset.forName(charsetName);
+            doc.outputSettings().charset(charset);
+            if (!charset.canEncode()) {
+                // some charsets can read but not encode; switch to an encodable charset and update the meta el
+                doc.charset(Charset.forName(defaultCharset));
+            }
        }
+        input.close();
        return doc;
    }

@@ -148,43 +192,8 @@ public final class DataUtil {
     */
    public static ByteBuffer readToByteBuffer(InputStream inStream, int maxSize) throws IOException {
        Validate.isTrue(maxSize >= 0, "maxSize must be 0 (unlimited) or larger");
-        final boolean capped = maxSize > 0;
-        byte[] buffer = new byte[capped && maxSize < bufferSize ? maxSize : bufferSize];
-        ByteArrayOutputStream outStream = new ByteArrayOutputStream(capped ? maxSize : bufferSize);
-        int read;
-        int remaining = maxSize;
-
-        while (!Thread.interrupted()) {
-            read = inStream.read(buffer);
-            if (read == -1) break;
-            if (capped) {
-                if (read > remaining) {
-                    outStream.write(buffer, 0, remaining);
-                    break;
-                }
-                remaining -= read;
-            }
-            outStream.write(buffer, 0, read);
-        }
-
-        return ByteBuffer.wrap(outStream.toByteArray());
-    }
-
-    static ByteBuffer readToByteBuffer(InputStream inStream) throws IOException {
-        return readToByteBuffer(inStream, 0);
-    }
-
-    static ByteBuffer readFileToByteBuffer(File file) throws IOException {
-        RandomAccessFile randomAccessFile = null;
-        try {
-            randomAccessFile = new RandomAccessFile(file, "r");
-            byte[] bytes = new byte[(int) randomAccessFile.length()];
-            randomAccessFile.readFully(bytes);
-            return ByteBuffer.wrap(bytes);
-        } finally {
-            if (randomAccessFile != null)
-                randomAccessFile.close();
-        }
+        final ConstrainableInputStream input = ConstrainableInputStream.wrap(inStream, bufferSize, maxSize);
+        return input.readToByteBuffer(maxSize);
    }

    static ByteBuffer emptyByteBuffer() {
@@ -225,31 +234,42 @@ public final class DataUtil {
     * Creates a random string, suitable for use as a mime boundary
     */
    static String mimeBoundary() {
-        final StringBuilder mime = new StringBuilder(boundaryLength);
+        final StringBuilder mime = StringUtil.borrowBuilder();
        final Random rand = new Random();
        for (int i = 0; i < boundaryLength; i++) {
            mime.append(mimeBoundaryChars[rand.nextInt(mimeBoundaryChars.length)]);
        }
-        return mime.toString();
+        return StringUtil.releaseBuilder(mime);
    }

-    private static String detectCharsetFromBom(ByteBuffer byteData, String charsetName) {
-        byteData.mark();
+    private static BomCharset detectCharsetFromBom(final ByteBuffer byteData) {
+        final Buffer buffer = byteData; // .mark and rewind used to return Buffer, now ByteBuffer, so cast for backward compat
+        buffer.mark();
        byte[] bom = new byte[4];
        if (byteData.remaining() >= bom.length) {
            byteData.get(bom);
-            byteData.rewind();
+            buffer.rewind();
        }
        if (bom[0] == 0x00 && bom[1] == 0x00 && bom[2] == (byte) 0xFE && bom[3] == (byte) 0xFF || // BE
            bom[0] == (byte) 0xFF && bom[1] == (byte) 0xFE && bom[2] == 0x00 && bom[3] == 0x00) { // LE
-            charsetName = "UTF-32"; // and I hope it's on your system
+            return new BomCharset("UTF-32", false); // and I hope it's on your system
        } else if (bom[0] == (byte) 0xFE && bom[1] == (byte) 0xFF || // BE
            bom[0] == (byte) 0xFF && bom[1] == (byte) 0xFE) {
-            charsetName = "UTF-16"; // in all Javas
+            return new BomCharset("UTF-16", false); // in all Javas
        } else if (bom[0] == (byte) 0xEF && bom[1] == (byte) 0xBB && bom[2] == (byte) 0xBF) {
-            charsetName = "UTF-8"; // in all Javas
-            byteData.position(3); // 16 and 32 decoders consume the BOM to determine be/le; utf-8 should be consumed here
+            return new BomCharset("UTF-8", true); // in all Javas
+            // 16 and 32 decoders consume the BOM to determine be/le; utf-8 should be consumed here
+        }
+        return null;
+    }
+
+    private static class BomCharset {
+        private final String charset;
+        private final boolean offset;
+
+        public BomCharset(String charset, boolean offset) {
+            this.charset = charset;
+            this.offset = offset;
        }
-        return charsetName;
    }
 }
--- a/src/main/java/org/jsoup/helper/DescendableLinkedList.java
+++ b/src/main/java/org/jsoup/helper/DescendableLinkedList.java
-package org.jsoup.helper;
-
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.ListIterator;
-
-/**
- * Provides a descending iterator and other 1.6 methods to allow support on the 1.5 JRE.
- * @param <E> Type of elements
- */
-public class DescendableLinkedList<E> extends LinkedList<E> {
-
-    /**
-     * Create a new DescendableLinkedList.
-     */
-    public DescendableLinkedList() {
-        super();
-    }
-
-    /**
-     * Add a new element to the start of the list.
-     * @param e element to add
-     */
-    public void push(E e) {
-        addFirst(e);
-    }
-
-    /**
-     * Look at the last element, if there is one.
-     * @return the last element, or null
-     */
-    public E peekLast() {
-        return size() == 0 ? null : getLast();
-    }
-
-    /**
-     * Remove and return the last element, if there is one
-     * @return the last element, or null
-     */
-    public E pollLast() {
-        return size() == 0 ? null : removeLast();
-    }
-
-    /**
-     * Get an iterator that starts and the end of the list and works towards the start.
-     * @return an iterator that starts and the end of the list and works towards the start.
-     */
-    public Iterator<E> descendingIterator() {
-        return new DescendingIterator<E>(size());
-    }
-
-    private class DescendingIterator<E> implements Iterator<E> {
-        private final ListIterator<E> iter;
-
-        @SuppressWarnings("unchecked")
-        private DescendingIterator(int index) {
-            iter = (ListIterator<E>) listIterator(index);
-        }
-
-        /**
-         * Check if there is another element on the list.
-         * @return if another element
-         */
-        public boolean hasNext() {
-            return iter.hasPrevious();
-        }
-
-        /**
-         * Get the next element.
-         * @return the next element.
-         */
-        public E next() {
-            return iter.previous();
-        }
-
-        /**
-         * Remove the current element.
-         */
-        public void remove() {
-            iter.remove();
-        }
-    }
-}
--- a/src/main/java/org/jsoup/helper/HttpConnection.java
+++ b/src/main/java/org/jsoup/helper/HttpConnection.java