diff --git a/ChangeLog.rst b/ChangeLog.rst
index ed0f92e5a3dcfbeb3b6c28d439b4c10ff56cfba8..b6158c3343b1092804097ada4e09c763ee661664 100644
--- a/ChangeLog.rst
+++ b/ChangeLog.rst
@@ -1,3 +1,39 @@
+0.5.6
+======
+
+* Fix fallback.Unpacker.feed() dropped unused data from buffer (#287)
+* Resurrect fallback.unpack() and _unpacker.unpack().
+ They were removed at 0.5.5 but it breaks backward compatibility. (#288, #290)
+
+0.5.5
+======
+
+* Fix memory leak in pure Python Unpacker.feed() (#283)
+* Fix unpack() didn't support `raw` option (#285)
+
+0.5.4
+======
+
+* Undeprecate ``unicode_errors`` option. (#278)
+
+0.5.3
+======
+
+* Fixed regression when passing ``unicode_errors`` to Packer but not ``encoding``. (#277)
+
+0.5.2
+======
+
+* Add ``raw`` option to Unpacker. It is preferred way than ``encoding`` option.
+
+* Packer.pack() reset buffer on exception (#274)
+
+
+0.5.1
+======
+
+* Remove FutureWarning about use_bin_type option (#271)
+
0.5.0
======
diff --git a/Makefile b/Makefile
index 84decd80b69c09e8e4acd7c08fe2f4810a42a608..124f24379a802fdc385e9b9d78d553a90fd4e4f1 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,8 @@ cython:
.PHONY: test
test:
- py.test -v test
+ pytest -v test
+ MSGPACK_PUREPYTHON=1 pytest -v test
.PHONY: serve-doc
serve-doc: all
@@ -17,10 +18,16 @@ serve-doc: all
.PHONY: clean
clean:
rm -rf build
- rm msgpack/*.so
+ rm -f msgpack/_packer.cpp
+ rm -f msgpack/_unpacker.cpp
rm -rf msgpack/__pycache__
rm -rf test/__pycache__
+.PHONY: update-docker
+update-docker:
+ docker pull quay.io/pypa/manylinux1_i686
+ docker pull quay.io/pypa/manylinux1_x86_64
+
.PHONY: linux-wheel
linux-wheel:
docker run --rm -ti -v `pwd`:/project -w /project quay.io/pypa/manylinux1_i686 bash docker/buildwheel.sh
diff --git a/README.rst b/README.rst
index 01a8b2a42d91c45450a0d3aed9c1215f726e36d5..8925a65c410d8efb1e1e126ed7cc2addef7a3eac 100644
--- a/README.rst
+++ b/README.rst
@@ -10,8 +10,21 @@ MessagePack for Python
:target: https://msgpack-python.readthedocs.io/en/latest/?badge=latest
:alt: Documentation Status
-IMPORTANT: Upgrading from msgpack-0.4
---------------------------------------
+
+What's this
+-----------
+
+`MessagePack `_ is an efficient binary serialization format.
+It lets you exchange data among multiple languages like JSON.
+But it's faster and smaller.
+This package provides CPython bindings for reading and writing MessagePack data.
+
+
+Very important notes for existing users
+---------------------------------------
+
+PyPI package name
+^^^^^^^^^^^^^^^^^
TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`.
Do `pip uninstall msgpack-python; pip install msgpack` instead.
@@ -24,13 +37,37 @@ Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-pyt
msgpack is removed and `import msgpack` fail.
-What's this
------------
+Deprecating encoding option
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+encoding and unicode_errors options are deprecated.
+
+In case of packer, use UTF-8 always. Storing other than UTF-8 is not recommended.
+
+For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes``
+object into msgpack raw type.
+
+In case of unpacker, there is new ``raw`` option. It is ``True`` by default
+for backward compatibility, but it is changed to ``False`` in near future.
+You can use ``raw=False`` instead of ``encoding='utf-8'``.
+
+Planned backward incompatible changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When msgpack 1.0, I planning these breaking changes:
+
+* packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option.
+* packer: Change default of ``use_bin_type`` option from False to True.
+* unpacker: Change default of ``raw`` option from True to False.
+* unpacker: Reduce all ``max_xxx_len`` options for typical usage.
+* unpacker: Remove ``write_bytes`` option from all methods.
+
+To avoid these breaking changes breaks your application, please:
+
+* Don't use deprecated options.
+* Pass ``use_bin_type`` and ``raw`` options explicitly.
+* If your application handle large (>1MB) data, specify ``max_xxx_len`` options too.
-`MessagePack `_ is an efficient binary serialization format.
-It lets you exchange data among multiple languages like JSON.
-But it's faster and smaller.
-This package provides CPython bindings for reading and writing MessagePack data.
Install
-------
@@ -76,14 +113,14 @@ msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with
>>> import msgpack
>>> msgpack.packb([1, 2, 3], use_bin_type=True)
'\x93\x01\x02\x03'
- >>> msgpack.unpackb(_)
+ >>> msgpack.unpackb(_, raw=False)
[1, 2, 3]
``unpack`` unpacks msgpack's array to Python's list, but can also unpack to tuple:
.. code-block:: pycon
- >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False)
+ >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw=False)
(1, 2, 3)
You should always specify the ``use_list`` keyword argument for backward compatibility.
@@ -109,7 +146,7 @@ stream (or from bytes provided through its ``feed`` method).
buf.seek(0)
- unpacker = msgpack.Unpacker(buf)
+ unpacker = msgpack.Unpacker(buf, raw=False)
for unpacked in unpacker:
print(unpacked)
@@ -142,7 +179,7 @@ It is also possible to pack/unpack custom data types. Here is an example for
packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True)
- this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)
+ this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw=False)
``Unpacker``'s ``object_hook`` callback receives a dict; the
``object_pairs_hook`` callback may instead be used to receive a list of
@@ -172,7 +209,7 @@ It is also possible to pack/unpack custom data types using the **ext** type.
...
>>> data = array.array('d', [1.2, 3.4])
>>> packed = msgpack.packb(data, default=default, use_bin_type=True)
- >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook)
+ >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw=False)
>>> data == unpacked
True
@@ -217,14 +254,10 @@ Early versions of msgpack didn't distinguish string and binary types (like Pytho
The type for representing both string and binary types was named **raw**.
For backward compatibility reasons, msgpack-python will still default all
-strings to byte strings, unless you specify the `use_bin_type=True` option in
+strings to byte strings, unless you specify the ``use_bin_type=True`` option in
the packer. If you do so, it will use a non-standard type called **bin** to
serialize byte arrays, and **raw** becomes to mean **str**. If you want to
-distinguish **bin** and **raw** in the unpacker, specify `encoding='utf-8'`.
-
-**In future version, default value of ``use_bin_type`` will be changed to ``True``.
-To avoid this change will break your code, you must specify it explicitly
-even when you want to use old format.**
+distinguish **bin** and **raw** in the unpacker, specify ``raw=False``.
Note that Python 2 defaults to byte-arrays over Unicode strings:
@@ -234,7 +267,7 @@ Note that Python 2 defaults to byte-arrays over Unicode strings:
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
['spam', 'eggs']
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
- encoding='utf-8')
+ raw=False)
['spam', u'eggs']
This is the same code in Python 3 (same behaviour, but Python 3 has a
@@ -246,7 +279,7 @@ different default):
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
[b'spam', b'eggs']
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
- encoding='utf-8')
+ raw=False)
[b'spam', 'eggs']
@@ -277,6 +310,7 @@ You can use ``gc.disable()`` when unpacking large message.
use_list option
^^^^^^^^^^^^^^^
+
List is the default sequence type of Python.
But tuple is lighter than list.
You can use ``use_list=False`` while unpacking when performance is important.
@@ -295,7 +329,7 @@ Test
MessagePack uses `pytest` for testing.
Run test with following command:
- $ pytest -v test
+ $ make test
..
diff --git a/ci/runtests.bat b/ci/runtests.bat
index 9efea00c8f0c5375bde21536f55bec8bed66a110..02404679f06f1dcff49df607e9718537053ee654 100644
--- a/ci/runtests.bat
+++ b/ci/runtests.bat
@@ -3,5 +3,7 @@
%PYTHON%\python.exe setup.py install
%PYTHON%\python.exe -c "import sys; print(hex(sys.maxsize))"
%PYTHON%\python.exe -c "from msgpack import _packer, _unpacker"
-%PYTHON%\python.exe -m pytest -v test
%PYTHON%\python.exe setup.py bdist_wheel
+%PYTHON%\python.exe -m pytest -v test
+SET EL=%ERRORLEVEL%
+exit /b %EL%
diff --git a/debian/changelog b/debian/changelog
index 732366ad9f259fe7dfd4e2406b72440d2a961021..fffb1e3c64f35875a962b355a83139830935b2b5 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+python-msgpack (0.5.6-1) unstable; urgency=medium
+
+ * Team upload
+ * New upstream release (Closes: #895733)
+
+ -- Gianfranco Costamagna Sun, 15 Apr 2018 18:44:47 +0200
+
python-msgpack (0.5.1-3) unstable; urgency=medium
[ Ondřej Nový ]
diff --git a/docker/runtests.sh b/docker/runtests.sh
index 11ef9f46833ea389908b901bbcf2c772174215a1..113b6307ae5f3ac8d4646ed9866e390477bc8d34 100755
--- a/docker/runtests.sh
+++ b/docker/runtests.sh
@@ -9,6 +9,6 @@ for V in cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do
pushd test # prevent importing msgpack package in current directory.
$PYBIN/python -c 'import sys; print(hex(sys.maxsize))'
$PYBIN/python -c 'from msgpack import _packer, _unpacker'
- $PYBIN/py.test -v
+ $PYBIN/pytest -v .
popd
done
diff --git a/msgpack/__init__.py b/msgpack/__init__.py
index 6c5ae53273c747e86985dd3969fe9c963a98d3a8..3955a4143904ce80b1e5bf1a291f47f1bb4eb34c 100644
--- a/msgpack/__init__.py
+++ b/msgpack/__init__.py
@@ -19,13 +19,13 @@ class ExtType(namedtuple('ExtType', 'code data')):
import os
if os.environ.get('MSGPACK_PUREPYTHON'):
- from msgpack.fallback import Packer, unpack, unpackb, Unpacker
+ from msgpack.fallback import Packer, unpackb, Unpacker
else:
try:
from msgpack._packer import Packer
- from msgpack._unpacker import unpack, unpackb, Unpacker
+ from msgpack._unpacker import unpackb, Unpacker
except ImportError:
- from msgpack.fallback import Packer, unpack, unpackb, Unpacker
+ from msgpack.fallback import Packer, unpackb, Unpacker
def pack(o, stream, **kwargs):
@@ -46,6 +46,18 @@ def packb(o, **kwargs):
"""
return Packer(**kwargs).pack(o)
+
+def unpack(stream, **kwargs):
+ """
+ Unpack an object from `stream`.
+
+ Raises `ExtraData` when `stream` contains extra bytes.
+ See :class:`Unpacker` for options.
+ """
+ data = stream.read()
+ return unpackb(data, **kwargs)
+
+
# alias for compatibility to simplejson/marshal/pickle.
load = unpack
loads = unpackb
diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx
index 13a18f6cae68731418aa5e4406b4df069bc224c9..225f24aecc3277dfdbb2feceb1a04c7799f5ada3 100644
--- a/msgpack/_packer.pyx
+++ b/msgpack/_packer.pyx
@@ -1,8 +1,9 @@
# coding: utf-8
-#cython: embedsignature=True
+#cython: embedsignature=True, c_string_encoding=ascii
from cpython cimport *
-#from cpython.exc cimport PyErr_WarnEx
+from cpython.version cimport PY_MAJOR_VERSION
+from cpython.exc cimport PyErr_WarnEx
from msgpack.exceptions import PackValueError, PackOverflowError
from msgpack import ExtType
@@ -13,6 +14,7 @@ cdef extern from "Python.h":
int PyMemoryView_Check(object obj)
int PyByteArray_Check(object obj)
int PyByteArray_CheckExact(object obj)
+ char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t *l) except NULL
cdef extern from "pack.h":
@@ -37,9 +39,10 @@ cdef extern from "pack.h":
int msgpack_pack_bin(msgpack_packer* pk, size_t l)
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l)
+ int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit)
cdef int DEFAULT_RECURSE_LIMIT=511
-cdef size_t ITEM_LIMIT = (2**32)-1
+cdef long long ITEM_LIMIT = (2**32)-1
cdef inline int PyBytesLike_Check(object o):
@@ -87,17 +90,18 @@ cdef class Packer(object):
This is useful when trying to implement accurate serialization
for python types.
+ :param str unicode_errors:
+ Error handler for encoding unicode. (default: 'strict')
+
:param str encoding:
(deprecated) Convert unicode to bytes with this encoding. (default: 'utf-8')
- :param str unicode_errors:
- (deprecated) Error handler for encoding unicode. (default: 'strict')
"""
cdef msgpack_packer pk
cdef object _default
cdef object _bencoding
cdef object _berrors
- cdef char *encoding
- cdef char *unicode_errors
+ cdef const char *encoding
+ cdef const char *unicode_errors
cdef bint strict_types
cdef bool use_float
cdef bint autoreset
@@ -110,9 +114,11 @@ cdef class Packer(object):
self.pk.buf_size = buf_size
self.pk.length = 0
- def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
+ def __init__(self, default=None, encoding=None, unicode_errors=None,
bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
bint strict_types=False):
+ if encoding is not None:
+ PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated.", 1)
self.use_float = use_single_float
self.strict_types = strict_types
self.autoreset = autoreset
@@ -121,20 +127,21 @@ cdef class Packer(object):
if not PyCallable_Check(default):
raise TypeError("default must be a callable.")
self._default = default
+
+ self._bencoding = encoding
if encoding is None:
- self.encoding = NULL
+ if PY_MAJOR_VERSION < 3:
+ self.encoding = 'utf-8'
+ else:
+ self.encoding = NULL
+ else:
+ self.encoding = self._bencoding
+
+ self._berrors = unicode_errors
+ if unicode_errors is None:
self.unicode_errors = NULL
else:
- if isinstance(encoding, unicode):
- self._bencoding = encoding.encode('ascii')
- else:
- self._bencoding = encoding
- self.encoding = PyBytes_AsString(self._bencoding)
- if isinstance(unicode_errors, unicode):
- self._berrors = unicode_errors.encode('ascii')
- else:
- self._berrors = unicode_errors
- self.unicode_errors = PyBytes_AsString(self._berrors)
+ self.unicode_errors = self._berrors
def __dealloc__(self):
PyMem_Free(self.pk.buf)
@@ -149,7 +156,7 @@ cdef class Packer(object):
cdef char* rawval
cdef int ret
cdef dict d
- cdef size_t L
+ cdef Py_ssize_t L
cdef int default_used = 0
cdef bint strict_types = self.strict_types
cdef Py_buffer view
@@ -201,16 +208,19 @@ cdef class Packer(object):
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
- if not self.encoding:
- raise TypeError("Can't encode unicode string: no encoding is specified")
- o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
- L = len(o)
- if L > ITEM_LIMIT:
- raise PackValueError("unicode string is too large")
- rawval = o
- ret = msgpack_pack_raw(&self.pk, L)
- if ret == 0:
- ret = msgpack_pack_raw_body(&self.pk, rawval, L)
+ if self.encoding == NULL and self.unicode_errors == NULL:
+ ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
+ if ret == -2:
+ raise PackValueError("unicode string is too large")
+ else:
+ o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
+ L = len(o)
+ if L > ITEM_LIMIT:
+ raise PackValueError("unicode string is too large")
+ ret = msgpack_pack_raw(&self.pk, L)
+ if ret == 0:
+ rawval = o
+ ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyDict_CheckExact(o):
d = o
L = len(d)
@@ -273,11 +283,13 @@ cdef class Packer(object):
cpdef pack(self, object obj):
cdef int ret
- ret = self._pack(obj, DEFAULT_RECURSE_LIMIT)
- if ret == -1:
- raise MemoryError
- elif ret: # should not happen.
- raise TypeError
+ try:
+ ret = self._pack(obj, DEFAULT_RECURSE_LIMIT)
+ except:
+ self.pk.length = 0
+ raise
+ if ret: # should not happen.
+ raise RuntimeError("internal error")
if self.autoreset:
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
self.pk.length = 0
diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx
index 564749e702837383a9d1f31b4bb3cf43b1a549ed..d7fa5bc6c08ba447f35cee3d3be015d558d3df8c 100644
--- a/msgpack/_unpacker.pyx
+++ b/msgpack/_unpacker.pyx
@@ -1,6 +1,7 @@
# coding: utf-8
-#cython: embedsignature=True
+#cython: embedsignature=True, c_string_encoding=ascii
+from cpython.version cimport PY_MAJOR_VERSION
from cpython.bytes cimport (
PyBytes_AsString,
PyBytes_FromStringAndSize,
@@ -43,8 +44,9 @@ from msgpack import ExtType
cdef extern from "unpack.h":
ctypedef struct msgpack_user:
bint use_list
- PyObject* object_hook
+ bint raw
bint has_pairs_hook # call object_hook with k-v pairs
+ PyObject* object_hook
PyObject* list_hook
PyObject* ext_hook
char *encoding
@@ -73,12 +75,14 @@ cdef extern from "unpack.h":
cdef inline init_ctx(unpack_context *ctx,
object object_hook, object object_pairs_hook,
object list_hook, object ext_hook,
- bint use_list, char* encoding, char* unicode_errors,
+ bint use_list, bint raw,
+ const char* encoding, const char* unicode_errors,
Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
Py_ssize_t max_array_len, Py_ssize_t max_map_len,
Py_ssize_t max_ext_len):
unpack_init(ctx)
ctx.user.use_list = use_list
+ ctx.user.raw = raw
ctx.user.object_hook = ctx.user.list_hook = NULL
ctx.user.max_str_len = max_str_len
ctx.user.max_bin_len = max_bin_len
@@ -155,7 +159,8 @@ cdef inline int get_data_from_buffer(object obj,
return 1
def unpackb(object packed, object object_hook=None, object list_hook=None,
- bint use_list=1, encoding=None, unicode_errors="strict",
+ bint use_list=True, bint raw=True,
+ encoding=None, unicode_errors=None,
object_pairs_hook=None, ext_hook=ExtType,
Py_ssize_t max_str_len=2147483647, # 2**32-1
Py_ssize_t max_bin_len=2147483647,
@@ -176,25 +181,21 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
cdef Py_buffer view
cdef char* buf = NULL
cdef Py_ssize_t buf_len
- cdef char* cenc = NULL
- cdef char* cerr = NULL
+ cdef const char* cenc = NULL
+ cdef const char* cerr = NULL
cdef int new_protocol = 0
- get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
-
- try:
- if encoding is not None:
- if isinstance(encoding, unicode):
- encoding = encoding.encode('ascii')
- cenc = PyBytes_AsString(encoding)
+ if encoding is not None:
+ PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1)
+ cenc = encoding
- if unicode_errors is not None:
- if isinstance(unicode_errors, unicode):
- unicode_errors = unicode_errors.encode('ascii')
- cerr = PyBytes_AsString(unicode_errors)
+ if unicode_errors is not None:
+ cerr = unicode_errors
+ get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
+ try:
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
- use_list, cenc, cerr,
+ use_list, raw, cenc, cerr,
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
ret = unpack_construct(&ctx, buf, buf_len, &off)
finally:
@@ -210,30 +211,12 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
raise UnpackValueError("Unpack failed: error = %d" % (ret,))
-def unpack(object stream, object object_hook=None, object list_hook=None,
- bint use_list=1, encoding=None, unicode_errors="strict",
- object_pairs_hook=None, ext_hook=ExtType,
- Py_ssize_t max_str_len=2147483647, # 2**32-1
- Py_ssize_t max_bin_len=2147483647,
- Py_ssize_t max_array_len=2147483647,
- Py_ssize_t max_map_len=2147483647,
- Py_ssize_t max_ext_len=2147483647):
- """
- Unpack an object from `stream`.
-
- Raises `ValueError` when `stream` has extra bytes.
-
- See :class:`Unpacker` for options.
- """
- return unpackb(stream.read(), use_list=use_list,
- object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook,
- encoding=encoding, unicode_errors=unicode_errors, ext_hook=ext_hook,
- max_str_len=max_str_len,
- max_bin_len=max_bin_len,
- max_array_len=max_array_len,
- max_map_len=max_map_len,
- max_ext_len=max_ext_len,
- )
+def unpack(object stream, **kwargs):
+ PyErr_WarnEx(
+ PendingDeprecationWarning,
+ "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.", 1)
+ data = stream.read()
+ return unpackb(data, **kwargs)
cdef class Unpacker(object):
@@ -252,6 +235,16 @@ cdef class Unpacker(object):
If true, unpack msgpack array to Python list.
Otherwise, unpack to Python tuple. (default: True)
+ :param bool raw:
+ If true, unpack msgpack raw to Python bytes (default).
+ Otherwise, unpack to Python str (or unicode on Python 2) by decoding
+ with UTF-8 encoding (recommended).
+ Currently, the default is true, but it will be changed to false in
+ near future. So you must specify it explicitly for keeping backward
+ compatibility.
+
+ *encoding* option which is deprecated overrides this option.
+
:param callable object_hook:
When specified, it should be callable.
Unpacker calls it with a dict argument after unpacking msgpack map.
@@ -262,14 +255,6 @@ cdef class Unpacker(object):
Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
(See also simplejson)
- :param str encoding:
- Encoding used for decoding msgpack raw.
- If it is None (default), msgpack raw is deserialized to Python bytes.
-
- :param str unicode_errors:
- Used for decoding msgpack raw with *encoding*.
- (default: `'strict'`)
-
:param int max_buffer_size:
Limits size of data waiting unpacked. 0 means system's INT_MAX (default).
Raises `BufferFull` exception when it is insufficient.
@@ -287,16 +272,24 @@ cdef class Unpacker(object):
:param int max_map_len:
Limits max length of map. (default: 2**31-1)
+ :param str encoding:
+ Deprecated, use raw instead.
+ Encoding used for decoding msgpack raw.
+ If it is None (default), msgpack raw is deserialized to Python bytes.
+
+ :param str unicode_errors:
+ Error handler used for decoding str type. (default: `'strict'`)
- example of streaming deserialize from file-like object::
- unpacker = Unpacker(file_like)
+ Example of streaming deserialize from file-like object::
+
+ unpacker = Unpacker(file_like, raw=False)
for o in unpacker:
process(o)
- example of streaming deserialize from socket::
+ Example of streaming deserialize from socket::
- unpacker = Unpacker()
+ unpacker = Unpacker(raw=False)
while True:
buf = sock.recv(1024**2)
if not buf:
@@ -324,17 +317,18 @@ cdef class Unpacker(object):
PyMem_Free(self.buf)
self.buf = NULL
- def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
+ def __init__(self, file_like=None, Py_ssize_t read_size=0,
+ bint use_list=True, bint raw=True,
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
- encoding=None, unicode_errors='strict', int max_buffer_size=0,
+ encoding=None, unicode_errors=None, int max_buffer_size=0,
object ext_hook=ExtType,
Py_ssize_t max_str_len=2147483647, # 2**32-1
Py_ssize_t max_bin_len=2147483647,
Py_ssize_t max_array_len=2147483647,
Py_ssize_t max_map_len=2147483647,
Py_ssize_t max_ext_len=2147483647):
- cdef char *cenc=NULL,
- cdef char *cerr=NULL
+ cdef const char *cenc=NULL,
+ cdef const char *cerr=NULL
self.object_hook = object_hook
self.object_pairs_hook = object_pairs_hook
@@ -363,25 +357,16 @@ cdef class Unpacker(object):
self.stream_offset = 0
if encoding is not None:
- if isinstance(encoding, unicode):
- self.encoding = encoding.encode('ascii')
- elif isinstance(encoding, bytes):
- self.encoding = encoding
- else:
- raise TypeError("encoding should be bytes or unicode")
- cenc = PyBytes_AsString(self.encoding)
+ PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1)
+ self.encoding = encoding
+ cenc = encoding
if unicode_errors is not None:
- if isinstance(unicode_errors, unicode):
- self.unicode_errors = unicode_errors.encode('ascii')
- elif isinstance(unicode_errors, bytes):
- self.unicode_errors = unicode_errors
- else:
- raise TypeError("unicode_errors should be bytes or unicode")
- cerr = PyBytes_AsString(self.unicode_errors)
+ self.unicode_errors = unicode_errors
+ cerr = unicode_errors
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
- ext_hook, use_list, cenc, cerr,
+ ext_hook, use_list, raw, cenc, cerr,
max_str_len, max_bin_len, max_array_len,
max_map_len, max_ext_len)
diff --git a/msgpack/_version.py b/msgpack/_version.py
index ecba3d88769bba171987d4bc34b4172c00a4ebd0..d28f0deb86cf2216f269b5ca7a8f45479076b43d 100644
--- a/msgpack/_version.py
+++ b/msgpack/_version.py
@@ -1 +1 @@
-version = (0, 5, 1)
+version = (0, 5, 6)
diff --git a/msgpack/fallback.py b/msgpack/fallback.py
index 5447b530b80df8ccb934ef98c10c36b30d0799c0..c0e5fd663f91a89123e393d06b3dc8a6473aca44 100644
--- a/msgpack/fallback.py
+++ b/msgpack/fallback.py
@@ -101,12 +101,9 @@ def _get_data_from_buffer(obj):
def unpack(stream, **kwargs):
- """
- Unpack an object from `stream`.
-
- Raises `ExtraData` when `packed` contains extra bytes.
- See :class:`Unpacker` for options.
- """
+ warnings.warn(
+ "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.",
+ PendingDeprecationWarning)
data = stream.read()
return unpackb(data, **kwargs)
@@ -145,6 +142,16 @@ class Unpacker(object):
If true, unpack msgpack array to Python list.
Otherwise, unpack to Python tuple. (default: True)
+ :param bool raw:
+ If true, unpack msgpack raw to Python bytes (default).
+ Otherwise, unpack to Python str (or unicode on Python 2) by decoding
+ with UTF-8 encoding (recommended).
+ Currently, the default is true, but it will be changed to false in
+ near future. So you must specify it explicitly for keeping backward
+ compatibility.
+
+ *encoding* option which is deprecated overrides this option.
+
:param callable object_hook:
When specified, it should be callable.
Unpacker calls it with a dict argument after unpacking msgpack map.
@@ -183,13 +190,13 @@ class Unpacker(object):
example of streaming deserialize from file-like object::
- unpacker = Unpacker(file_like)
+ unpacker = Unpacker(file_like, raw=False)
for o in unpacker:
process(o)
example of streaming deserialize from socket::
- unpacker = Unpacker()
+ unpacker = Unpacker(raw=False)
while True:
buf = sock.recv(1024**2)
if not buf:
@@ -199,15 +206,24 @@ class Unpacker(object):
process(o)
"""
- def __init__(self, file_like=None, read_size=0, use_list=True,
+ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True,
object_hook=None, object_pairs_hook=None, list_hook=None,
- encoding=None, unicode_errors='strict', max_buffer_size=0,
+ encoding=None, unicode_errors=None, max_buffer_size=0,
ext_hook=ExtType,
max_str_len=2147483647, # 2**32-1
max_bin_len=2147483647,
max_array_len=2147483647,
max_map_len=2147483647,
max_ext_len=2147483647):
+
+ if encoding is not None:
+ warnings.warn(
+ "encoding is deprecated, Use raw=False instead.",
+ PendingDeprecationWarning)
+
+ if unicode_errors is None:
+ unicode_errors = 'strict'
+
if file_like is None:
self._feeding = True
else:
@@ -234,6 +250,7 @@ class Unpacker(object):
if read_size > self._max_buffer_size:
raise ValueError("read_size must be smaller than max_buffer_size")
self._read_size = read_size or min(self._max_buffer_size, 16*1024)
+ self._raw = bool(raw)
self._encoding = encoding
self._unicode_errors = unicode_errors
self._use_list = use_list
@@ -265,6 +282,13 @@ class Unpacker(object):
view = _get_data_from_buffer(next_bytes)
if (len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size):
raise BufferFull
+
+ # Strip buffer before checkpoint before reading file.
+ if self._buf_checkpoint > 0:
+ del self._buffer[:self._buf_checkpoint]
+ self._buff_i -= self._buf_checkpoint
+ self._buf_checkpoint = 0
+
self._buffer += view
def _consume(self):
@@ -582,8 +606,10 @@ class Unpacker(object):
if typ == TYPE_RAW:
if self._encoding is not None:
obj = obj.decode(self._encoding, self._unicode_errors)
- else:
+ elif self._raw:
obj = bytes(obj)
+ else:
+ obj = obj.decode('utf_8')
return obj
if typ == TYPE_EXT:
return self._ext_hook(n, bytes(obj))
@@ -680,11 +706,21 @@ class Packer(object):
(deprecated) Convert unicode to bytes with this encoding. (default: 'utf-8')
:param str unicode_errors:
- (deprecated) Error handler for encoding unicode. (default: 'strict')
+ Error handler for encoding unicode. (default: 'strict')
"""
- def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
+ def __init__(self, default=None, encoding=None, unicode_errors=None,
use_single_float=False, autoreset=True, use_bin_type=False,
strict_types=False):
+ if encoding is None:
+ encoding = 'utf_8'
+ else:
+ warnings.warn(
+ "encoding is deprecated, Use raw=False instead.",
+ PendingDeprecationWarning)
+
+ if unicode_errors is None:
+ unicode_errors = 'strict'
+
self._strict_types = strict_types
self._use_float = use_single_float
self._autoreset = autoreset
@@ -808,7 +844,11 @@ class Packer(object):
raise TypeError("Cannot serialize %r" % (obj, ))
def pack(self, obj):
- self._pack(obj)
+ try:
+ self._pack(obj)
+ except:
+ self._buffer = StringIO() # force reset
+ raise
ret = self._buffer.getvalue()
if self._autoreset:
self._buffer = StringIO()
diff --git a/msgpack/pack.h b/msgpack/pack.h
index 3bc21ea5deffaf464673ecf533c6f5d7940dd10a..4f3ce1d99ec3f4af779723d21d1a540111b9eb54 100644
--- a/msgpack/pack.h
+++ b/msgpack/pack.h
@@ -67,6 +67,53 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_
#include "pack_template.h"
+// return -2 when o is too long
+static inline int
+msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit)
+{
+#if PY_MAJOR_VERSION >= 3
+ assert(PyUnicode_Check(o));
+
+ Py_ssize_t len;
+ const char* buf = PyUnicode_AsUTF8AndSize(o, &len);
+ if (buf == NULL)
+ return -1;
+
+ if (len > limit) {
+ return -2;
+ }
+
+ int ret = msgpack_pack_raw(pk, len);
+ if (ret) return ret;
+
+ return msgpack_pack_raw_body(pk, buf, len);
+#else
+ PyObject *bytes;
+ Py_ssize_t len;
+ int ret;
+
+ // py2
+ bytes = PyUnicode_AsUTF8String(o);
+ if (bytes == NULL)
+ return -1;
+
+ len = PyString_GET_SIZE(bytes);
+ if (len > limit) {
+ Py_DECREF(bytes);
+ return -2;
+ }
+
+ ret = msgpack_pack_raw(pk, len);
+ if (ret) {
+ Py_DECREF(bytes);
+ return -1;
+ }
+ ret = msgpack_pack_raw_body(pk, PyString_AS_STRING(bytes), len);
+ Py_DECREF(bytes);
+ return ret;
+#endif
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/msgpack/unpack.h b/msgpack/unpack.h
index da2cfb6aa2370cd8b355ca66abc0b860d4498150..63e5543b559818d377aa51ab9493b163579fdaa2 100644
--- a/msgpack/unpack.h
+++ b/msgpack/unpack.h
@@ -20,9 +20,10 @@
#include "unpack_define.h"
typedef struct unpack_user {
- int use_list;
- PyObject *object_hook;
+ bool use_list;
+ bool raw;
bool has_pairs_hook;
+ PyObject *object_hook;
PyObject *list_hook;
PyObject *ext_hook;
const char *encoding;
@@ -225,10 +226,13 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
}
PyObject *py;
- if(u->encoding) {
+
+ if (u->encoding) {
py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors);
- } else {
+ } else if (u->raw) {
py = PyBytes_FromStringAndSize(p, l);
+ } else {
+ py = PyUnicode_DecodeUTF8(p, l, u->unicode_errors);
}
if (!py)
return -1;
diff --git a/test/test_limits.py b/test/test_limits.py
index 197ef4615e3c1581dbff33e3c9023a8d2d3a23a4..74e48c19aebc86ae31eb8a8b282cf655ff5e1e80 100644
--- a/test/test_limits.py
+++ b/test/test_limits.py
@@ -39,11 +39,11 @@ def test_max_str_len():
d = 'x' * 3
packed = packb(d)
- unpacker = Unpacker(max_str_len=3, encoding='utf-8')
+ unpacker = Unpacker(max_str_len=3, raw=False)
unpacker.feed(packed)
assert unpacker.unpack() == d
- unpacker = Unpacker(max_str_len=2, encoding='utf-8')
+ unpacker = Unpacker(max_str_len=2, raw=False)
with pytest.raises(UnpackValueError):
unpacker.feed(packed)
unpacker.unpack()
diff --git a/test/test_pack.py b/test/test_pack.py
index ac931038fc962e2df30d51a700ef18996fe74bd1..b447f9c38631672ff2a3275267362d4b941690cf 100644
--- a/test/test_pack.py
+++ b/test/test_pack.py
@@ -31,14 +31,14 @@ def testPack():
def testPackUnicode():
test_data = ["", "abcd", ["defgh"], "Русский текст"]
for td in test_data:
- re = unpackb(packb(td, encoding='utf-8'), use_list=1, encoding='utf-8')
+ re = unpackb(packb(td), use_list=1, raw=False)
assert re == td
- packer = Packer(encoding='utf-8')
+ packer = Packer()
data = packer.pack(td)
- re = Unpacker(BytesIO(data), encoding=str('utf-8'), use_list=1).unpack()
+ re = Unpacker(BytesIO(data), raw=False, use_list=1).unpack()
assert re == td
-def testPackUTF32():
+def testPackUTF32(): # deprecated
try:
test_data = [
"",
@@ -66,26 +66,22 @@ def testPackByteArrays():
for td in test_data:
check(td)
-def testIgnoreUnicodeErrors():
+def testIgnoreUnicodeErrors(): # deprecated
re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1)
assert re == "abcdef"
def testStrictUnicodeUnpack():
with raises(UnicodeDecodeError):
- unpackb(packb(b'abc\xeddef'), encoding='utf-8', use_list=1)
+ unpackb(packb(b'abc\xeddef'), raw=False, use_list=1)
-def testStrictUnicodePack():
+def testStrictUnicodePack(): # deprecated
with raises(UnicodeEncodeError):
packb("abc\xeddef", encoding='ascii', unicode_errors='strict')
-def testIgnoreErrorsPack():
- re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), encoding='utf-8', use_list=1)
+def testIgnoreErrorsPack(): # deprecated
+ re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), raw=False, use_list=1)
assert re == "abcdef"
-def testNoEncoding():
- with raises(TypeError):
- packb("abc", encoding=None)
-
def testDecodeBinary():
re = unpackb(packb(b"abc"), encoding=None, use_list=1)
assert re == b"abc"
diff --git a/test/test_stricttype.py b/test/test_stricttype.py
index 0f865c8353b87462b0f39929a49a250794b1d006..87e7c1ce1df9e0b18956d03d944c20178f9bcdfa 100644
--- a/test/test_stricttype.py
+++ b/test/test_stricttype.py
@@ -11,7 +11,7 @@ def test_namedtuple():
return dict(o._asdict())
raise TypeError('Unsupported type %s' % (type(o),))
packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default)
- unpacked = unpackb(packed, encoding='utf-8')
+ unpacked = unpackb(packed, raw=False)
assert unpacked == {'foo': 1, 'bar': 42}
@@ -32,7 +32,7 @@ def test_tuple():
return o
data = packb(t, strict_types=True, use_bin_type=True, default=default)
- expected = unpackb(data, encoding='utf-8', object_hook=convert)
+ expected = unpackb(data, raw=False, object_hook=convert)
assert expected == t
@@ -53,10 +53,10 @@ def test_tuple_ext():
def convert(code, payload):
if code == MSGPACK_EXT_TYPE_TUPLE:
# Unpack and convert to tuple
- return tuple(unpackb(payload, encoding='utf-8', ext_hook=convert))
+ return tuple(unpackb(payload, raw=False, ext_hook=convert))
raise ValueError('Unknown Ext code {}'.format(code))
data = packb(t, strict_types=True, use_bin_type=True, default=default)
- expected = unpackb(data, encoding='utf-8', ext_hook=convert)
+ expected = unpackb(data, raw=False, ext_hook=convert)
assert expected == t
diff --git a/test/test_unpack.py b/test/test_unpack.py
index c0d711cd77c3123c2ebfbc9eeac8d9fbf1c96fed..00a10612ee72dc314109f4d2c70c9e6c20cb66f4 100644
--- a/test/test_unpack.py
+++ b/test/test_unpack.py
@@ -47,8 +47,8 @@ def test_unpacker_ext_hook():
class MyUnpacker(Unpacker):
def __init__(self):
- super(MyUnpacker, self).__init__(ext_hook=self._hook,
- encoding='utf-8')
+ super(MyUnpacker, self).__init__(
+ ext_hook=self._hook, raw=False)
def _hook(self, code, data):
if code == 1:
@@ -57,11 +57,11 @@ def test_unpacker_ext_hook():
return ExtType(code, data)
unpacker = MyUnpacker()
- unpacker.feed(packb({'a': 1}, encoding='utf-8'))
+ unpacker.feed(packb({'a': 1}))
assert unpacker.unpack() == {'a': 1}
- unpacker.feed(packb({'a': ExtType(1, b'123')}, encoding='utf-8'))
+ unpacker.feed(packb({'a': ExtType(1, b'123')}))
assert unpacker.unpack() == {'a': 123}
- unpacker.feed(packb({'a': ExtType(2, b'321')}, encoding='utf-8'))
+ unpacker.feed(packb({'a': ExtType(2, b'321')}))
assert unpacker.unpack() == {'a': ExtType(2, b'321')}