Use a more elaborate fallback system for changelog files that fail to load

Explictly try UTF-8, sometimes that works when implicitly does not.

Warn about the invalid changelog file instead of crashing.

Traceback (most recent call last):
  File "bin/compare-source-package-list", line 803, in get_changelog_entries
    changelog_obj = changelog.Changelog(changelog_file)
  File "/usr/lib/python3/dist-packages/debian/changelog.py", line 269, in __init__
    strict=strict)
  File "/usr/lib/python3/dist-packages/debian/changelog.py", line 308, in parse_changelog
    for line in file:
  File "/usr/lib/python3.5/codecs.py", line 321, in decode
    (result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe1 in position 144: invalid continuation byte

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "bin/compare-source-package-list", line 1180, in <module>
    files, patches, links, new = process_sources(source_entries, lists_dir)
  File "bin/compare-source-package-list", line 1158, in process_sources
    actions = check_source_package(source_entry, srcpkg)
  File "bin/compare-source-package-list", line 1103, in check_source_package
    derived_from = find_derived_from(tmp_dir, name, version, dsc_name, dsc_sha1, parts_unmodified)
  File "bin/compare-source-package-list", line 832, in find_derived_from
    changelog_entries = get_changelog_entries(tmp_dir, dsc_name, dsc_sha1)
  File "bin/compare-source-package-list", line 806, in get_changelog_entries
    changelog_obj = changelog.Changelog(changelog_file, encoding='iso-8859-1')
  File "/usr/lib/python3/dist-packages/debian/changelog.py", line 269, in __init__
    strict=strict)
  File "/usr/lib/python3/dist-packages/debian/changelog.py", line 308, in parse_changelog
    for line in file:
  File "/usr/lib/python3.5/codecs.py", line 321, in decode
    (result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe1 in position 144: invalid continuation byte

Traceback (most recent call last):
  File "bin/compare-source-package-list", line 803, in get_changelog_entries
    changelog_obj = changelog.Changelog(changelog_file)
  File "/usr/lib/python3/dist-packages/debian/changelog.py", line 269, in __init__
    strict=strict)
  File "/usr/lib/python3/dist-packages/debian/changelog.py", line 308, in parse_changelog
    for line in file:
  File "/usr/lib/python3.5/codecs.py", line 321, in decode
    (result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xfc in position 401: invalid start byte

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "bin/compare-source-package-list", line 1180, in <module>
    files, patches, links, new = process_sources(source_entries, lists_dir)
  File "bin/compare-source-package-list", line 1158, in process_sources
    actions = check_source_package(source_entry, srcpkg)
  File "bin/compare-source-package-list", line 1103, in check_source_package
    derived_from = find_derived_from(tmp_dir, name, version, dsc_name, dsc_sha1, parts_unmodified)
  File "bin/compare-source-package-list", line 832, in find_derived_from
    changelog_entries = get_changelog_entries(tmp_dir, dsc_name, dsc_sha1)
  File "bin/compare-source-package-list", line 806, in get_changelog_entries
    changelog_obj = changelog.Changelog(changelog_file, encoding='iso-8859-1')
  File "/usr/lib/python3/dist-packages/debian/changelog.py", line 269, in __init__
    strict=strict)
  File "/usr/lib/python3/dist-packages/debian/changelog.py", line 308, in parse_changelog
    for line in file:
  File "/usr/lib/python3.5/codecs.py", line 321, in decode
    (result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xfc in position 401: invalid start byte
parent 755036da
......@@ -803,7 +803,16 @@ def get_changelog_entries(tmp_dir, dsc_name, dsc_sha1):
changelog_obj = changelog.Changelog(changelog_file)
except UnicodeDecodeError:
changelog_file.seek(0)
changelog_obj = changelog.Changelog(changelog_file, encoding='iso-8859-1')
try:
changelog_obj = changelog.Changelog(changelog_file, encoding='utf-8')
except UnicodeDecodeError:
changelog_file.seek(0)
try:
changelog_obj = changelog.Changelog(changelog_file, encoding='iso-8859-1')
except UnicodeDecodeError:
logging.warning('could not parse changelog from %s', dsc_name)
rmtree(extract_path)
return None
try:
changelog_entries = [(entry.package, str(entry._raw_version)) for entry in changelog_obj]
except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment