Commit 3ab78f1d authored by Barry Warsaw's avatar Barry Warsaw Committed by Scott Kitterman

Fix support for codepoints over 0xffff according to the spec. Given by John

R. Lenton.  Closes: #806826.

Patch-Name: support-high-codepoints.patch
parent 93dbe92f
......@@ -8,9 +8,13 @@
__all__ = ['Emitter', 'EmitterError']
import sys
from error import YAMLError
from events import *
has_ucs4 = sys.maxunicode > 0xffff
class EmitterError(YAMLError):
pass
......@@ -701,7 +705,8 @@ class Emitter(object):
line_breaks = True
if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
or u'\uE000' <= ch <= u'\uFFFD'
or ((not has_ucs4) or (u'\U00010000' <= ch < u'\U0010ffff'))) and ch != u'\uFEFF':
unicode_characters = True
if not self.allow_unicode:
special_characters = True
......
......@@ -19,7 +19,9 @@ __all__ = ['Reader', 'ReaderError']
from error import YAMLError, Mark
import codecs, re
import codecs, re, sys
has_ucs4 = sys.maxunicode > 0xffff
class ReaderError(YAMLError):
......@@ -134,7 +136,10 @@ class Reader(object):
self.encoding = 'utf-8'
self.update(1)
NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
if has_ucs4:
NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
else:
NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
......
......@@ -698,7 +698,8 @@ class Emitter:
line_breaks = True
if not (ch == '\n' or '\x20' <= ch <= '\x7E'):
if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF'
or '\uE000' <= ch <= '\uFFFD') and ch != '\uFEFF':
or '\uE000' <= ch <= '\uFFFD'
or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF':
unicode_characters = True
if not self.allow_unicode:
special_characters = True
......
......@@ -134,7 +134,7 @@ class Reader(object):
self.encoding = 'utf-8'
self.update(1)
NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment