Commit 49307bc0 authored by Stefano Rivera's avatar Stefano Rivera

New upstream version 4.7.1

parent 7751693b
This diff is collapsed.
Metadata-Version: 1.1
Metadata-Version: 2.1
Name: beautifulsoup4
Version: 4.7.0
Version: 4.7.1
Summary: Screen-scraping library
Home-page: http://www.crummy.com/software/BeautifulSoup/bs4/
Author: Leonard Richardson
......@@ -58,7 +58,7 @@ Description: Beautiful Soup is a library that makes it easy to scrape informatio
* [Discussion group](http://groups.google.com/group/beautifulsoup/)
* [Development](https://code.launchpad.net/beautifulsoup/)
* [Bug tracker](https://bugs.launchpad.net/beautifulsoup/)
* [Complete changelog](https://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/view/head:/NEWS.txt)
* [Complete changelog](https://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/view/head:/CHANGELOG)
# Building the documentation
......@@ -95,3 +95,6 @@ Classifier: Topic :: Text Processing :: Markup :: HTML
Classifier: Topic :: Text Processing :: Markup :: XML
Classifier: Topic :: Text Processing :: Markup :: SGML
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Description-Content-Type: text/markdown
Provides-Extra: lxml
Provides-Extra: html5lib
......@@ -49,7 +49,7 @@ To go beyond the basics, [comprehensive documentation is available](http://www.c
* [Discussion group](http://groups.google.com/group/beautifulsoup/)
* [Development](https://code.launchpad.net/beautifulsoup/)
* [Bug tracker](https://bugs.launchpad.net/beautifulsoup/)
* [Complete changelog](https://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/view/head:/NEWS.txt)
* [Complete changelog](https://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/view/head:/CHANGELOG)
# Building the documentation
......
Metadata-Version: 1.1
Metadata-Version: 2.1
Name: beautifulsoup4
Version: 4.7.0
Version: 4.7.1
Summary: Screen-scraping library
Home-page: http://www.crummy.com/software/BeautifulSoup/bs4/
Author: Leonard Richardson
......@@ -58,7 +58,7 @@ Description: Beautiful Soup is a library that makes it easy to scrape informatio
* [Discussion group](http://groups.google.com/group/beautifulsoup/)
* [Development](https://code.launchpad.net/beautifulsoup/)
* [Bug tracker](https://bugs.launchpad.net/beautifulsoup/)
* [Complete changelog](https://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/view/head:/NEWS.txt)
* [Complete changelog](https://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/view/head:/CHANGELOG)
# Building the documentation
......@@ -95,3 +95,6 @@ Classifier: Topic :: Text Processing :: Markup :: HTML
Classifier: Topic :: Text Processing :: Markup :: XML
Classifier: Topic :: Text Processing :: Markup :: SGML
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Description-Content-Type: text/markdown
Provides-Extra: lxml
Provides-Extra: html5lib
COPYING.txt
LICENSE
MANIFEST.in
NEWS.txt
README.md
TODO.txt
convert-py3k
......
......@@ -18,7 +18,7 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.7.0"
__version__ = "4.7.1"
__copyright__ = "Copyright (c) 2004-2019 Leonard Richardson"
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
......@@ -435,113 +435,56 @@ class BeautifulSoup(Tag):
if previous_element is None:
previous_element = o.previous_element
fix = parent.next_element is not None
o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
self._most_recent_element = o
parent.contents.append(o)
# Check if we are inserting into an already parsed node.
if parent.next_element is not None:
# Check that links are proper across tag parent boundaries
child = self._linkage_fixer(parent)
if fix:
self._linkage_fixer(parent)
def _linkage_fixer(self, el, _recursive_call=False):
def _linkage_fixer(self, el):
"""Make sure linkage of this fragment is sound."""
descendant = None
# If element is document element,
# it should have no previous element, previous sibling, or next sibling.
if el.parent is None:
if el.previous_element is not None:
el.previous_element = None
if el.previous_sibling is not None:
el.previous_element = None
if el.next_sibling is not None:
el.next_sibling = None
idx = 0
child = None
last_child = None
last_idx = len(el.contents) - 1
for child in el.contents:
descendant = None
# Parent should link next element to their first child
# That child should have no previous sibling
if idx == 0:
if el.parent is not None:
if el.next_element is not child:
el.next_element = child
if child.previous_element is not el:
child.previous_element = el
if child.previous_sibling is not None:
child.previous_sibling = None
# If not the first child, previous index should link as sibling to last index.
# Previous element should match the last index or the last bubbled up descendant (of a Tag sibling).
else:
if child.previous_sibling is not el.contents[idx - 1]:
child.previous_sibling = el.contents[idx - 1]
if el.contents[idx - 1].next_sibling is not child:
el.contents[idx - 1].next_sibling = child
if last_child is not None:
if child.previous_element is not last_child:
child.previous_element = last_child
if last_child.next_element is not child:
last_child.next_element = child
# This index is a tag, dig deeper for a "last descendant" fixing linkage along the way
if isinstance(child, Tag) and child.contents:
descendant = self._linkage_fixer(child, True)
# A bubbled up descendant should have no next siblings
# as it is last in its content list.
if descendant.next_sibling is not None:
descendant.next_sibling = None
# Mark last child as either the bubbled up descendant or the current child
if descendant is not None:
last_child = descendant
else:
last_child = child
# If last child in list, there are no next siblings
if idx == last_idx:
if child.next_sibling is not None:
child.next_sibling = None
idx += 1
# The child to return is either the last descendant (if available)
# or the last processed child (if any). If neither is available,
# the parent element is its own last descendant.
child = descendant if descendant is not None else child
if child is None:
child = el
# If not a recursive call, we are done processing this element.
first = el.contents[0]
child = el.contents[-1]
descendant = child
if child is first and el.parent is not None:
# Parent should be linked to first child
el.next_element = child
# We are no longer linked to whatever this element is
prev_el = child.previous_element
if prev_el is not None and prev_el is not el:
prev_el.next_element = None
# First child should be linked to the parent, and no previous siblings.
child.previous_element = el
child.previous_sibling = None
# We have no sibling as we've been appended as the last.
child.next_sibling = None
# This index is a tag, dig deeper for a "last descendant"
if isinstance(child, Tag) and child.contents:
descendant = child._last_descendant(False)
# As the final step, link last descendant. It should be linked
# to the parent's next sibling (if found), else walk up the chain
# and find a parent with a sibling.
if not _recursive_call and child is not None:
child.next_element = None
target = el
while True:
if target is None:
break
elif target.next_sibling is not None:
child.next_element = target.next_sibling
target.next_sibling.previous_element = child
break
target = target.parent
# We are done, so nothing to return
return None
else:
# Return the child to the recursive caller
return child
# and find a parent with a sibling. It should have no next sibling.
descendant.next_element = None
descendant.next_sibling = None
target = el
while True:
if target is None:
break
elif target.next_sibling is not None:
descendant.next_element = target.next_sibling
target.next_sibling.previous_element = child
break
target = target.parent
def _popToTag(self, name, nsprefix=None, inclusivePop=True):
"""Pops the tag stack up to and including the most recent
......
......@@ -71,12 +71,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
This might be useful later on when creating CSS selectors.
"""
for key, value in mapping.items():
if key not in self.soup._namespaces:
# Let the BeautifulSoup object know about a new namespace.
# If there are multiple namespaces defined with the same
# prefix, the first one in the document takes precedence.
self.soup._namespaces[key] = value
if key and key not in self.soup._namespaces:
# Let the BeautifulSoup object know about a new namespace.
# If there are multiple namespaces defined with the same
# prefix, the first one in the document takes precedence.
self.soup._namespaces[key] = value
def default_parser(self, encoding):
# This can either return a parser object or a class, which
# will be instantiated with default arguments.
......
......@@ -217,7 +217,7 @@ class PageElement(object):
if formatter is None:
output = s
else:
if callable(formatter):
if isinstance(formatter, Callable):
# Backwards compatibility -- you used to pass in a formatting method.
output = formatter(s)
else:
......@@ -455,7 +455,7 @@ class PageElement(object):
if parent is None:
raise ValueError(
"Element has no parent, so 'before' has no meaning.")
if self in args:
if any(x is self for x in args):
raise ValueError("Can't insert an element before itself.")
for predecessor in args:
# Extract first so that the index won't be screwed up if they
......@@ -476,7 +476,7 @@ class PageElement(object):
if parent is None:
raise ValueError(
"Element has no parent, so 'after' has no meaning.")
if self in args:
if any(x is self for x in args):
raise ValueError("Can't insert an element after itself.")
offset = 0
......@@ -1138,7 +1138,7 @@ class Tag(PageElement):
# First off, turn a string formatter into a Formatter object. This
# will stop the lookup from happening over and over again.
if not isinstance(formatter, Formatter) and not callable(formatter):
if not isinstance(formatter, Formatter) and not isinstance(formatter, Callable):
formatter = self._formatter_for_name(formatter)
attrs = []
if self.attrs:
......@@ -1243,7 +1243,7 @@ class Tag(PageElement):
"""
# First off, turn a string formatter into a Formatter object. This
# will stop the lookup from happening over and over again.
if not isinstance(formatter, Formatter) and not callable(formatter):
if not isinstance(formatter, Formatter) and not isinstance(formatter, Callable):
formatter = self._formatter_for_name(formatter)
pretty_print = (indent_level is not None)
......@@ -1425,7 +1425,7 @@ class SoupStrainer(object):
def _normalize_search_value(self, value):
# Leave it alone if it's a Unicode string, a callable, a
# regular expression, a boolean, or None.
if (isinstance(value, unicode) or callable(value) or hasattr(value, 'match')
if (isinstance(value, unicode) or isinstance(value, Callable) or hasattr(value, 'match')
or isinstance(value, bool) or value is None):
return value
......
......@@ -80,3 +80,21 @@ class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
@property
def default_builder(self):
return LXMLTreeBuilderForXML()
def test_namespace_indexing(self):
# We should not track un-prefixed namespaces as we can only hold one
# and it will be recognized as the default namespace by soupsieve,
# which may be confusing in some situations. When no namespace is provided
# for a selector, the default namespace (if defined) is assumed.
soup = self.soup(
'<?xml version="1.1"?>\n'
'<root>'
'<tag xmlns="http://unprefixed-namespace.com">content</tag>'
'<prefix:tag xmlns:prefix="http://prefixed-namespace.com">content</tag>'
'</root>'
)
self.assertEqual(
soup._namespaces,
{'xml': 'http://www.w3.org/XML/1998/namespace', 'prefix': 'http://prefixed-namespace.com'}
)
......@@ -971,6 +971,10 @@ class TestTreeModification(SoupTest):
# Can't insert before if an element has no parent.
b.extract()
self.assertRaises(ValueError, b.insert_before, "nope")
# Can insert an identical element
soup = self.soup("<a>")
soup.a.insert_before(soup.new_tag("a"))
def test_insert_multiple_before(self):
soup = self.soup("<a>foo</a><b>bar</b>")
......@@ -1000,6 +1004,10 @@ class TestTreeModification(SoupTest):
# Can't insert after if an element has no parent.
b.extract()
self.assertRaises(ValueError, b.insert_after, "nope")
# Can insert an identical element
soup = self.soup("<a>")
soup.a.insert_before(soup.new_tag("a"))
def test_insert_multiple_after(self):
soup = self.soup("<a>foo</a><b>bar</b>")
......
[egg_info]
tag_date = 0
tag_build =
tag_svn_revision = 0
tag_date = 0
......@@ -8,7 +8,7 @@ with open("README.md", "r") as fh:
setup(
name="beautifulsoup4",
version = "4.7.0",
version = "4.7.1",
author="Leonard Richardson",
author_email='leonardr@segfault.org',
url="http://www.crummy.com/software/BeautifulSoup/bs4/",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment