|
"""Fallback pure Python implementation of msgpack""" |
|
from datetime import datetime as _DateTime |
|
import sys |
|
import struct |
|
|
|
|
|
PY2 = sys.version_info[0] == 2 |
|
if PY2: |
|
int_types = (int, long) |
|
|
|
def dict_iteritems(d): |
|
return d.iteritems() |
|
|
|
else: |
|
int_types = int |
|
unicode = str |
|
xrange = range |
|
|
|
def dict_iteritems(d): |
|
return d.items() |
|
|
|
|
|
if sys.version_info < (3, 5): |
|
|
|
RecursionError = RuntimeError |
|
|
|
def _is_recursionerror(e): |
|
return ( |
|
len(e.args) == 1 |
|
and isinstance(e.args[0], str) |
|
and e.args[0].startswith("maximum recursion depth exceeded") |
|
) |
|
|
|
else: |
|
|
|
def _is_recursionerror(e): |
|
return True |
|
|
|
|
|
if hasattr(sys, "pypy_version_info"): |
|
|
|
|
|
from __pypy__ import newlist_hint |
|
|
|
try: |
|
from __pypy__.builders import BytesBuilder as StringBuilder |
|
except ImportError: |
|
from __pypy__.builders import StringBuilder |
|
USING_STRINGBUILDER = True |
|
|
|
class StringIO(object): |
|
def __init__(self, s=b""): |
|
if s: |
|
self.builder = StringBuilder(len(s)) |
|
self.builder.append(s) |
|
else: |
|
self.builder = StringBuilder() |
|
|
|
def write(self, s): |
|
if isinstance(s, memoryview): |
|
s = s.tobytes() |
|
elif isinstance(s, bytearray): |
|
s = bytes(s) |
|
self.builder.append(s) |
|
|
|
def getvalue(self): |
|
return self.builder.build() |
|
|
|
else: |
|
USING_STRINGBUILDER = False |
|
from io import BytesIO as StringIO |
|
|
|
newlist_hint = lambda size: [] |
|
|
|
|
|
from .exceptions import BufferFull, OutOfData, ExtraData, FormatError, StackError |
|
|
|
from .ext import ExtType, Timestamp |
|
|
|
|
|
EX_SKIP = 0 |
|
EX_CONSTRUCT = 1 |
|
EX_READ_ARRAY_HEADER = 2 |
|
EX_READ_MAP_HEADER = 3 |
|
|
|
TYPE_IMMEDIATE = 0 |
|
TYPE_ARRAY = 1 |
|
TYPE_MAP = 2 |
|
TYPE_RAW = 3 |
|
TYPE_BIN = 4 |
|
TYPE_EXT = 5 |
|
|
|
DEFAULT_RECURSE_LIMIT = 511 |
|
|
|
|
|
def _check_type_strict(obj, t, type=type, tuple=tuple): |
|
if type(t) is tuple: |
|
return type(obj) in t |
|
else: |
|
return type(obj) is t |
|
|
|
|
|
def _get_data_from_buffer(obj): |
|
view = memoryview(obj) |
|
if view.itemsize != 1: |
|
raise ValueError("cannot unpack from multi-byte object") |
|
return view |
|
|
|
|
|
def unpackb(packed, **kwargs): |
|
""" |
|
Unpack an object from `packed`. |
|
|
|
Raises ``ExtraData`` when *packed* contains extra bytes. |
|
Raises ``ValueError`` when *packed* is incomplete. |
|
Raises ``FormatError`` when *packed* is not valid msgpack. |
|
Raises ``StackError`` when *packed* contains too nested. |
|
Other exceptions can be raised during unpacking. |
|
|
|
See :class:`Unpacker` for options. |
|
""" |
|
unpacker = Unpacker(None, max_buffer_size=len(packed), **kwargs) |
|
unpacker.feed(packed) |
|
try: |
|
ret = unpacker._unpack() |
|
except OutOfData: |
|
raise ValueError("Unpack failed: incomplete input") |
|
except RecursionError as e: |
|
if _is_recursionerror(e): |
|
raise StackError |
|
raise |
|
if unpacker._got_extradata(): |
|
raise ExtraData(ret, unpacker._get_extradata()) |
|
return ret |
|
|
|
|
|
if sys.version_info < (2, 7, 6): |
|
|
|
def _unpack_from(f, b, o=0): |
|
"""Explicit type cast for legacy struct.unpack_from""" |
|
return struct.unpack_from(f, bytes(b), o) |
|
|
|
else: |
|
_unpack_from = struct.unpack_from |
|
|
|
_NO_FORMAT_USED = "" |
|
_MSGPACK_HEADERS = { |
|
0xC4: (1, _NO_FORMAT_USED, TYPE_BIN), |
|
0xC5: (2, ">H", TYPE_BIN), |
|
0xC6: (4, ">I", TYPE_BIN), |
|
0xC7: (2, "Bb", TYPE_EXT), |
|
0xC8: (3, ">Hb", TYPE_EXT), |
|
0xC9: (5, ">Ib", TYPE_EXT), |
|
0xCA: (4, ">f"), |
|
0xCB: (8, ">d"), |
|
0xCC: (1, _NO_FORMAT_USED), |
|
0xCD: (2, ">H"), |
|
0xCE: (4, ">I"), |
|
0xCF: (8, ">Q"), |
|
0xD0: (1, "b"), |
|
0xD1: (2, ">h"), |
|
0xD2: (4, ">i"), |
|
0xD3: (8, ">q"), |
|
0xD4: (1, "b1s", TYPE_EXT), |
|
0xD5: (2, "b2s", TYPE_EXT), |
|
0xD6: (4, "b4s", TYPE_EXT), |
|
0xD7: (8, "b8s", TYPE_EXT), |
|
0xD8: (16, "b16s", TYPE_EXT), |
|
0xD9: (1, _NO_FORMAT_USED, TYPE_RAW), |
|
0xDA: (2, ">H", TYPE_RAW), |
|
0xDB: (4, ">I", TYPE_RAW), |
|
0xDC: (2, ">H", TYPE_ARRAY), |
|
0xDD: (4, ">I", TYPE_ARRAY), |
|
0xDE: (2, ">H", TYPE_MAP), |
|
0xDF: (4, ">I", TYPE_MAP), |
|
} |
|
|
|
|
|
class Unpacker(object): |
|
"""Streaming unpacker. |
|
|
|
Arguments: |
|
|
|
:param file_like: |
|
File-like object having `.read(n)` method. |
|
If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. |
|
|
|
:param int read_size: |
|
Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) |
|
|
|
:param bool use_list: |
|
If true, unpack msgpack array to Python list. |
|
Otherwise, unpack to Python tuple. (default: True) |
|
|
|
:param bool raw: |
|
If true, unpack msgpack raw to Python bytes. |
|
Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). |
|
|
|
:param int timestamp: |
|
Control how timestamp type is unpacked: |
|
|
|
0 - Timestamp |
|
1 - float (Seconds from the EPOCH) |
|
2 - int (Nanoseconds from the EPOCH) |
|
3 - datetime.datetime (UTC). Python 2 is not supported. |
|
|
|
:param bool strict_map_key: |
|
If true (default), only str or bytes are accepted for map (dict) keys. |
|
|
|
:param callable object_hook: |
|
When specified, it should be callable. |
|
Unpacker calls it with a dict argument after unpacking msgpack map. |
|
(See also simplejson) |
|
|
|
:param callable object_pairs_hook: |
|
When specified, it should be callable. |
|
Unpacker calls it with a list of key-value pairs after unpacking msgpack map. |
|
(See also simplejson) |
|
|
|
:param str unicode_errors: |
|
The error handler for decoding unicode. (default: 'strict') |
|
This option should be used only when you have msgpack data which |
|
contains invalid UTF-8 string. |
|
|
|
:param int max_buffer_size: |
|
Limits size of data waiting unpacked. 0 means 2**32-1. |
|
The default value is 100*1024*1024 (100MiB). |
|
Raises `BufferFull` exception when it is insufficient. |
|
You should set this parameter when unpacking data from untrusted source. |
|
|
|
:param int max_str_len: |
|
Deprecated, use *max_buffer_size* instead. |
|
Limits max length of str. (default: max_buffer_size) |
|
|
|
:param int max_bin_len: |
|
Deprecated, use *max_buffer_size* instead. |
|
Limits max length of bin. (default: max_buffer_size) |
|
|
|
:param int max_array_len: |
|
Limits max length of array. |
|
(default: max_buffer_size) |
|
|
|
:param int max_map_len: |
|
Limits max length of map. |
|
(default: max_buffer_size//2) |
|
|
|
:param int max_ext_len: |
|
Deprecated, use *max_buffer_size* instead. |
|
Limits max size of ext type. (default: max_buffer_size) |
|
|
|
Example of streaming deserialize from file-like object:: |
|
|
|
unpacker = Unpacker(file_like) |
|
for o in unpacker: |
|
process(o) |
|
|
|
Example of streaming deserialize from socket:: |
|
|
|
unpacker = Unpacker() |
|
while True: |
|
buf = sock.recv(1024**2) |
|
if not buf: |
|
break |
|
unpacker.feed(buf) |
|
for o in unpacker: |
|
process(o) |
|
|
|
Raises ``ExtraData`` when *packed* contains extra bytes. |
|
Raises ``OutOfData`` when *packed* is incomplete. |
|
Raises ``FormatError`` when *packed* is not valid msgpack. |
|
Raises ``StackError`` when *packed* contains too nested. |
|
Other exceptions can be raised during unpacking. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
file_like=None, |
|
read_size=0, |
|
use_list=True, |
|
raw=False, |
|
timestamp=0, |
|
strict_map_key=True, |
|
object_hook=None, |
|
object_pairs_hook=None, |
|
list_hook=None, |
|
unicode_errors=None, |
|
max_buffer_size=100 * 1024 * 1024, |
|
ext_hook=ExtType, |
|
max_str_len=-1, |
|
max_bin_len=-1, |
|
max_array_len=-1, |
|
max_map_len=-1, |
|
max_ext_len=-1, |
|
): |
|
if unicode_errors is None: |
|
unicode_errors = "strict" |
|
|
|
if file_like is None: |
|
self._feeding = True |
|
else: |
|
if not callable(file_like.read): |
|
raise TypeError("`file_like.read` must be callable") |
|
self.file_like = file_like |
|
self._feeding = False |
|
|
|
|
|
self._buffer = bytearray() |
|
|
|
self._buff_i = 0 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self._buf_checkpoint = 0 |
|
|
|
if not max_buffer_size: |
|
max_buffer_size = 2**31 - 1 |
|
if max_str_len == -1: |
|
max_str_len = max_buffer_size |
|
if max_bin_len == -1: |
|
max_bin_len = max_buffer_size |
|
if max_array_len == -1: |
|
max_array_len = max_buffer_size |
|
if max_map_len == -1: |
|
max_map_len = max_buffer_size // 2 |
|
if max_ext_len == -1: |
|
max_ext_len = max_buffer_size |
|
|
|
self._max_buffer_size = max_buffer_size |
|
if read_size > self._max_buffer_size: |
|
raise ValueError("read_size must be smaller than max_buffer_size") |
|
self._read_size = read_size or min(self._max_buffer_size, 16 * 1024) |
|
self._raw = bool(raw) |
|
self._strict_map_key = bool(strict_map_key) |
|
self._unicode_errors = unicode_errors |
|
self._use_list = use_list |
|
if not (0 <= timestamp <= 3): |
|
raise ValueError("timestamp must be 0..3") |
|
self._timestamp = timestamp |
|
self._list_hook = list_hook |
|
self._object_hook = object_hook |
|
self._object_pairs_hook = object_pairs_hook |
|
self._ext_hook = ext_hook |
|
self._max_str_len = max_str_len |
|
self._max_bin_len = max_bin_len |
|
self._max_array_len = max_array_len |
|
self._max_map_len = max_map_len |
|
self._max_ext_len = max_ext_len |
|
self._stream_offset = 0 |
|
|
|
if list_hook is not None and not callable(list_hook): |
|
raise TypeError("`list_hook` is not callable") |
|
if object_hook is not None and not callable(object_hook): |
|
raise TypeError("`object_hook` is not callable") |
|
if object_pairs_hook is not None and not callable(object_pairs_hook): |
|
raise TypeError("`object_pairs_hook` is not callable") |
|
if object_hook is not None and object_pairs_hook is not None: |
|
raise TypeError( |
|
"object_pairs_hook and object_hook are mutually " "exclusive" |
|
) |
|
if not callable(ext_hook): |
|
raise TypeError("`ext_hook` is not callable") |
|
|
|
def feed(self, next_bytes): |
|
assert self._feeding |
|
view = _get_data_from_buffer(next_bytes) |
|
if len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size: |
|
raise BufferFull |
|
|
|
|
|
if self._buf_checkpoint > 0: |
|
del self._buffer[: self._buf_checkpoint] |
|
self._buff_i -= self._buf_checkpoint |
|
self._buf_checkpoint = 0 |
|
|
|
|
|
self._buffer.extend(view) |
|
|
|
def _consume(self): |
|
"""Gets rid of the used parts of the buffer.""" |
|
self._stream_offset += self._buff_i - self._buf_checkpoint |
|
self._buf_checkpoint = self._buff_i |
|
|
|
def _got_extradata(self): |
|
return self._buff_i < len(self._buffer) |
|
|
|
def _get_extradata(self): |
|
return self._buffer[self._buff_i :] |
|
|
|
def read_bytes(self, n): |
|
ret = self._read(n, raise_outofdata=False) |
|
self._consume() |
|
return ret |
|
|
|
def _read(self, n, raise_outofdata=True): |
|
|
|
self._reserve(n, raise_outofdata=raise_outofdata) |
|
i = self._buff_i |
|
ret = self._buffer[i : i + n] |
|
self._buff_i = i + len(ret) |
|
return ret |
|
|
|
def _reserve(self, n, raise_outofdata=True): |
|
remain_bytes = len(self._buffer) - self._buff_i - n |
|
|
|
|
|
if remain_bytes >= 0: |
|
return |
|
|
|
if self._feeding: |
|
self._buff_i = self._buf_checkpoint |
|
raise OutOfData |
|
|
|
|
|
if self._buf_checkpoint > 0: |
|
del self._buffer[: self._buf_checkpoint] |
|
self._buff_i -= self._buf_checkpoint |
|
self._buf_checkpoint = 0 |
|
|
|
|
|
remain_bytes = -remain_bytes |
|
if remain_bytes + len(self._buffer) > self._max_buffer_size: |
|
raise BufferFull |
|
while remain_bytes > 0: |
|
to_read_bytes = max(self._read_size, remain_bytes) |
|
read_data = self.file_like.read(to_read_bytes) |
|
if not read_data: |
|
break |
|
assert isinstance(read_data, bytes) |
|
self._buffer += read_data |
|
remain_bytes -= len(read_data) |
|
|
|
if len(self._buffer) < n + self._buff_i and raise_outofdata: |
|
self._buff_i = 0 |
|
raise OutOfData |
|
|
|
def _read_header(self): |
|
typ = TYPE_IMMEDIATE |
|
n = 0 |
|
obj = None |
|
self._reserve(1) |
|
b = self._buffer[self._buff_i] |
|
self._buff_i += 1 |
|
if b & 0b10000000 == 0: |
|
obj = b |
|
elif b & 0b11100000 == 0b11100000: |
|
obj = -1 - (b ^ 0xFF) |
|
elif b & 0b11100000 == 0b10100000: |
|
n = b & 0b00011111 |
|
typ = TYPE_RAW |
|
if n > self._max_str_len: |
|
raise ValueError("%s exceeds max_str_len(%s)" % (n, self._max_str_len)) |
|
obj = self._read(n) |
|
elif b & 0b11110000 == 0b10010000: |
|
n = b & 0b00001111 |
|
typ = TYPE_ARRAY |
|
if n > self._max_array_len: |
|
raise ValueError( |
|
"%s exceeds max_array_len(%s)" % (n, self._max_array_len) |
|
) |
|
elif b & 0b11110000 == 0b10000000: |
|
n = b & 0b00001111 |
|
typ = TYPE_MAP |
|
if n > self._max_map_len: |
|
raise ValueError("%s exceeds max_map_len(%s)" % (n, self._max_map_len)) |
|
elif b == 0xC0: |
|
obj = None |
|
elif b == 0xC2: |
|
obj = False |
|
elif b == 0xC3: |
|
obj = True |
|
elif 0xC4 <= b <= 0xC6: |
|
size, fmt, typ = _MSGPACK_HEADERS[b] |
|
self._reserve(size) |
|
if len(fmt) > 0: |
|
n = _unpack_from(fmt, self._buffer, self._buff_i)[0] |
|
else: |
|
n = self._buffer[self._buff_i] |
|
self._buff_i += size |
|
if n > self._max_bin_len: |
|
raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) |
|
obj = self._read(n) |
|
elif 0xC7 <= b <= 0xC9: |
|
size, fmt, typ = _MSGPACK_HEADERS[b] |
|
self._reserve(size) |
|
L, n = _unpack_from(fmt, self._buffer, self._buff_i) |
|
self._buff_i += size |
|
if L > self._max_ext_len: |
|
raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) |
|
obj = self._read(L) |
|
elif 0xCA <= b <= 0xD3: |
|
size, fmt = _MSGPACK_HEADERS[b] |
|
self._reserve(size) |
|
if len(fmt) > 0: |
|
obj = _unpack_from(fmt, self._buffer, self._buff_i)[0] |
|
else: |
|
obj = self._buffer[self._buff_i] |
|
self._buff_i += size |
|
elif 0xD4 <= b <= 0xD8: |
|
size, fmt, typ = _MSGPACK_HEADERS[b] |
|
if self._max_ext_len < size: |
|
raise ValueError( |
|
"%s exceeds max_ext_len(%s)" % (size, self._max_ext_len) |
|
) |
|
self._reserve(size + 1) |
|
n, obj = _unpack_from(fmt, self._buffer, self._buff_i) |
|
self._buff_i += size + 1 |
|
elif 0xD9 <= b <= 0xDB: |
|
size, fmt, typ = _MSGPACK_HEADERS[b] |
|
self._reserve(size) |
|
if len(fmt) > 0: |
|
(n,) = _unpack_from(fmt, self._buffer, self._buff_i) |
|
else: |
|
n = self._buffer[self._buff_i] |
|
self._buff_i += size |
|
if n > self._max_str_len: |
|
raise ValueError("%s exceeds max_str_len(%s)" % (n, self._max_str_len)) |
|
obj = self._read(n) |
|
elif 0xDC <= b <= 0xDD: |
|
size, fmt, typ = _MSGPACK_HEADERS[b] |
|
self._reserve(size) |
|
(n,) = _unpack_from(fmt, self._buffer, self._buff_i) |
|
self._buff_i += size |
|
if n > self._max_array_len: |
|
raise ValueError( |
|
"%s exceeds max_array_len(%s)" % (n, self._max_array_len) |
|
) |
|
elif 0xDE <= b <= 0xDF: |
|
size, fmt, typ = _MSGPACK_HEADERS[b] |
|
self._reserve(size) |
|
(n,) = _unpack_from(fmt, self._buffer, self._buff_i) |
|
self._buff_i += size |
|
if n > self._max_map_len: |
|
raise ValueError("%s exceeds max_map_len(%s)" % (n, self._max_map_len)) |
|
else: |
|
raise FormatError("Unknown header: 0x%x" % b) |
|
return typ, n, obj |
|
|
|
def _unpack(self, execute=EX_CONSTRUCT): |
|
typ, n, obj = self._read_header() |
|
|
|
if execute == EX_READ_ARRAY_HEADER: |
|
if typ != TYPE_ARRAY: |
|
raise ValueError("Expected array") |
|
return n |
|
if execute == EX_READ_MAP_HEADER: |
|
if typ != TYPE_MAP: |
|
raise ValueError("Expected map") |
|
return n |
|
|
|
if typ == TYPE_ARRAY: |
|
if execute == EX_SKIP: |
|
for i in xrange(n): |
|
|
|
self._unpack(EX_SKIP) |
|
return |
|
ret = newlist_hint(n) |
|
for i in xrange(n): |
|
ret.append(self._unpack(EX_CONSTRUCT)) |
|
if self._list_hook is not None: |
|
ret = self._list_hook(ret) |
|
|
|
return ret if self._use_list else tuple(ret) |
|
if typ == TYPE_MAP: |
|
if execute == EX_SKIP: |
|
for i in xrange(n): |
|
|
|
self._unpack(EX_SKIP) |
|
self._unpack(EX_SKIP) |
|
return |
|
if self._object_pairs_hook is not None: |
|
ret = self._object_pairs_hook( |
|
(self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) |
|
for _ in xrange(n) |
|
) |
|
else: |
|
ret = {} |
|
for _ in xrange(n): |
|
key = self._unpack(EX_CONSTRUCT) |
|
if self._strict_map_key and type(key) not in (unicode, bytes): |
|
raise ValueError( |
|
"%s is not allowed for map key" % str(type(key)) |
|
) |
|
if not PY2 and type(key) is str: |
|
key = sys.intern(key) |
|
ret[key] = self._unpack(EX_CONSTRUCT) |
|
if self._object_hook is not None: |
|
ret = self._object_hook(ret) |
|
return ret |
|
if execute == EX_SKIP: |
|
return |
|
if typ == TYPE_RAW: |
|
if self._raw: |
|
obj = bytes(obj) |
|
else: |
|
obj = obj.decode("utf_8", self._unicode_errors) |
|
return obj |
|
if typ == TYPE_BIN: |
|
return bytes(obj) |
|
if typ == TYPE_EXT: |
|
if n == -1: |
|
ts = Timestamp.from_bytes(bytes(obj)) |
|
if self._timestamp == 1: |
|
return ts.to_unix() |
|
elif self._timestamp == 2: |
|
return ts.to_unix_nano() |
|
elif self._timestamp == 3: |
|
return ts.to_datetime() |
|
else: |
|
return ts |
|
else: |
|
return self._ext_hook(n, bytes(obj)) |
|
assert typ == TYPE_IMMEDIATE |
|
return obj |
|
|
|
def __iter__(self): |
|
return self |
|
|
|
def __next__(self): |
|
try: |
|
ret = self._unpack(EX_CONSTRUCT) |
|
self._consume() |
|
return ret |
|
except OutOfData: |
|
self._consume() |
|
raise StopIteration |
|
except RecursionError: |
|
raise StackError |
|
|
|
next = __next__ |
|
|
|
def skip(self): |
|
self._unpack(EX_SKIP) |
|
self._consume() |
|
|
|
def unpack(self): |
|
try: |
|
ret = self._unpack(EX_CONSTRUCT) |
|
except RecursionError: |
|
raise StackError |
|
self._consume() |
|
return ret |
|
|
|
def read_array_header(self): |
|
ret = self._unpack(EX_READ_ARRAY_HEADER) |
|
self._consume() |
|
return ret |
|
|
|
def read_map_header(self): |
|
ret = self._unpack(EX_READ_MAP_HEADER) |
|
self._consume() |
|
return ret |
|
|
|
def tell(self): |
|
return self._stream_offset |
|
|
|
|
|
class Packer(object): |
|
""" |
|
MessagePack Packer |
|
|
|
Usage:: |
|
|
|
packer = Packer() |
|
astream.write(packer.pack(a)) |
|
astream.write(packer.pack(b)) |
|
|
|
Packer's constructor has some keyword arguments: |
|
|
|
:param callable default: |
|
Convert user type to builtin type that Packer supports. |
|
See also simplejson's document. |
|
|
|
:param bool use_single_float: |
|
Use single precision float type for float. (default: False) |
|
|
|
:param bool autoreset: |
|
Reset buffer after each pack and return its content as `bytes`. (default: True). |
|
If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. |
|
|
|
:param bool use_bin_type: |
|
Use bin type introduced in msgpack spec 2.0 for bytes. |
|
It also enables str8 type for unicode. (default: True) |
|
|
|
:param bool strict_types: |
|
If set to true, types will be checked to be exact. Derived classes |
|
from serializable types will not be serialized and will be |
|
treated as unsupported type and forwarded to default. |
|
Additionally tuples will not be serialized as lists. |
|
This is useful when trying to implement accurate serialization |
|
for python types. |
|
|
|
:param bool datetime: |
|
If set to true, datetime with tzinfo is packed into Timestamp type. |
|
Note that the tzinfo is stripped in the timestamp. |
|
You can get UTC datetime with `timestamp=3` option of the Unpacker. |
|
(Python 2 is not supported). |
|
|
|
:param str unicode_errors: |
|
The error handler for encoding unicode. (default: 'strict') |
|
DO NOT USE THIS!! This option is kept for very specific usage. |
|
|
|
Example of streaming deserialize from file-like object:: |
|
|
|
unpacker = Unpacker(file_like) |
|
for o in unpacker: |
|
process(o) |
|
|
|
Example of streaming deserialize from socket:: |
|
|
|
unpacker = Unpacker() |
|
while True: |
|
buf = sock.recv(1024**2) |
|
if not buf: |
|
break |
|
unpacker.feed(buf) |
|
for o in unpacker: |
|
process(o) |
|
|
|
Raises ``ExtraData`` when *packed* contains extra bytes. |
|
Raises ``OutOfData`` when *packed* is incomplete. |
|
Raises ``FormatError`` when *packed* is not valid msgpack. |
|
Raises ``StackError`` when *packed* contains too nested. |
|
Other exceptions can be raised during unpacking. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
default=None, |
|
use_single_float=False, |
|
autoreset=True, |
|
use_bin_type=True, |
|
strict_types=False, |
|
datetime=False, |
|
unicode_errors=None, |
|
): |
|
self._strict_types = strict_types |
|
self._use_float = use_single_float |
|
self._autoreset = autoreset |
|
self._use_bin_type = use_bin_type |
|
self._buffer = StringIO() |
|
if PY2 and datetime: |
|
raise ValueError("datetime is not supported in Python 2") |
|
self._datetime = bool(datetime) |
|
self._unicode_errors = unicode_errors or "strict" |
|
if default is not None: |
|
if not callable(default): |
|
raise TypeError("default must be callable") |
|
self._default = default |
|
|
|
def _pack( |
|
self, |
|
obj, |
|
nest_limit=DEFAULT_RECURSE_LIMIT, |
|
check=isinstance, |
|
check_type_strict=_check_type_strict, |
|
): |
|
default_used = False |
|
if self._strict_types: |
|
check = check_type_strict |
|
list_types = list |
|
else: |
|
list_types = (list, tuple) |
|
while True: |
|
if nest_limit < 0: |
|
raise ValueError("recursion limit exceeded") |
|
if obj is None: |
|
return self._buffer.write(b"\xc0") |
|
if check(obj, bool): |
|
if obj: |
|
return self._buffer.write(b"\xc3") |
|
return self._buffer.write(b"\xc2") |
|
if check(obj, int_types): |
|
if 0 <= obj < 0x80: |
|
return self._buffer.write(struct.pack("B", obj)) |
|
if -0x20 <= obj < 0: |
|
return self._buffer.write(struct.pack("b", obj)) |
|
if 0x80 <= obj <= 0xFF: |
|
return self._buffer.write(struct.pack("BB", 0xCC, obj)) |
|
if -0x80 <= obj < 0: |
|
return self._buffer.write(struct.pack(">Bb", 0xD0, obj)) |
|
if 0xFF < obj <= 0xFFFF: |
|
return self._buffer.write(struct.pack(">BH", 0xCD, obj)) |
|
if -0x8000 <= obj < -0x80: |
|
return self._buffer.write(struct.pack(">Bh", 0xD1, obj)) |
|
if 0xFFFF < obj <= 0xFFFFFFFF: |
|
return self._buffer.write(struct.pack(">BI", 0xCE, obj)) |
|
if -0x80000000 <= obj < -0x8000: |
|
return self._buffer.write(struct.pack(">Bi", 0xD2, obj)) |
|
if 0xFFFFFFFF < obj <= 0xFFFFFFFFFFFFFFFF: |
|
return self._buffer.write(struct.pack(">BQ", 0xCF, obj)) |
|
if -0x8000000000000000 <= obj < -0x80000000: |
|
return self._buffer.write(struct.pack(">Bq", 0xD3, obj)) |
|
if not default_used and self._default is not None: |
|
obj = self._default(obj) |
|
default_used = True |
|
continue |
|
raise OverflowError("Integer value out of range") |
|
if check(obj, (bytes, bytearray)): |
|
n = len(obj) |
|
if n >= 2**32: |
|
raise ValueError("%s is too large" % type(obj).__name__) |
|
self._pack_bin_header(n) |
|
return self._buffer.write(obj) |
|
if check(obj, unicode): |
|
obj = obj.encode("utf-8", self._unicode_errors) |
|
n = len(obj) |
|
if n >= 2**32: |
|
raise ValueError("String is too large") |
|
self._pack_raw_header(n) |
|
return self._buffer.write(obj) |
|
if check(obj, memoryview): |
|
n = len(obj) * obj.itemsize |
|
if n >= 2**32: |
|
raise ValueError("Memoryview is too large") |
|
self._pack_bin_header(n) |
|
return self._buffer.write(obj) |
|
if check(obj, float): |
|
if self._use_float: |
|
return self._buffer.write(struct.pack(">Bf", 0xCA, obj)) |
|
return self._buffer.write(struct.pack(">Bd", 0xCB, obj)) |
|
if check(obj, (ExtType, Timestamp)): |
|
if check(obj, Timestamp): |
|
code = -1 |
|
data = obj.to_bytes() |
|
else: |
|
code = obj.code |
|
data = obj.data |
|
assert isinstance(code, int) |
|
assert isinstance(data, bytes) |
|
L = len(data) |
|
if L == 1: |
|
self._buffer.write(b"\xd4") |
|
elif L == 2: |
|
self._buffer.write(b"\xd5") |
|
elif L == 4: |
|
self._buffer.write(b"\xd6") |
|
elif L == 8: |
|
self._buffer.write(b"\xd7") |
|
elif L == 16: |
|
self._buffer.write(b"\xd8") |
|
elif L <= 0xFF: |
|
self._buffer.write(struct.pack(">BB", 0xC7, L)) |
|
elif L <= 0xFFFF: |
|
self._buffer.write(struct.pack(">BH", 0xC8, L)) |
|
else: |
|
self._buffer.write(struct.pack(">BI", 0xC9, L)) |
|
self._buffer.write(struct.pack("b", code)) |
|
self._buffer.write(data) |
|
return |
|
if check(obj, list_types): |
|
n = len(obj) |
|
self._pack_array_header(n) |
|
for i in xrange(n): |
|
self._pack(obj[i], nest_limit - 1) |
|
return |
|
if check(obj, dict): |
|
return self._pack_map_pairs( |
|
len(obj), dict_iteritems(obj), nest_limit - 1 |
|
) |
|
|
|
if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None: |
|
obj = Timestamp.from_datetime(obj) |
|
default_used = 1 |
|
continue |
|
|
|
if not default_used and self._default is not None: |
|
obj = self._default(obj) |
|
default_used = 1 |
|
continue |
|
|
|
if self._datetime and check(obj, _DateTime): |
|
raise ValueError("Cannot serialize %r where tzinfo=None" % (obj,)) |
|
|
|
raise TypeError("Cannot serialize %r" % (obj,)) |
|
|
|
def pack(self, obj): |
|
try: |
|
self._pack(obj) |
|
except: |
|
self._buffer = StringIO() |
|
raise |
|
if self._autoreset: |
|
ret = self._buffer.getvalue() |
|
self._buffer = StringIO() |
|
return ret |
|
|
|
def pack_map_pairs(self, pairs): |
|
self._pack_map_pairs(len(pairs), pairs) |
|
if self._autoreset: |
|
ret = self._buffer.getvalue() |
|
self._buffer = StringIO() |
|
return ret |
|
|
|
def pack_array_header(self, n): |
|
if n >= 2**32: |
|
raise ValueError |
|
self._pack_array_header(n) |
|
if self._autoreset: |
|
ret = self._buffer.getvalue() |
|
self._buffer = StringIO() |
|
return ret |
|
|
|
def pack_map_header(self, n): |
|
if n >= 2**32: |
|
raise ValueError |
|
self._pack_map_header(n) |
|
if self._autoreset: |
|
ret = self._buffer.getvalue() |
|
self._buffer = StringIO() |
|
return ret |
|
|
|
def pack_ext_type(self, typecode, data): |
|
if not isinstance(typecode, int): |
|
raise TypeError("typecode must have int type.") |
|
if not 0 <= typecode <= 127: |
|
raise ValueError("typecode should be 0-127") |
|
if not isinstance(data, bytes): |
|
raise TypeError("data must have bytes type") |
|
L = len(data) |
|
if L > 0xFFFFFFFF: |
|
raise ValueError("Too large data") |
|
if L == 1: |
|
self._buffer.write(b"\xd4") |
|
elif L == 2: |
|
self._buffer.write(b"\xd5") |
|
elif L == 4: |
|
self._buffer.write(b"\xd6") |
|
elif L == 8: |
|
self._buffer.write(b"\xd7") |
|
elif L == 16: |
|
self._buffer.write(b"\xd8") |
|
elif L <= 0xFF: |
|
self._buffer.write(b"\xc7" + struct.pack("B", L)) |
|
elif L <= 0xFFFF: |
|
self._buffer.write(b"\xc8" + struct.pack(">H", L)) |
|
else: |
|
self._buffer.write(b"\xc9" + struct.pack(">I", L)) |
|
self._buffer.write(struct.pack("B", typecode)) |
|
self._buffer.write(data) |
|
|
|
def _pack_array_header(self, n): |
|
if n <= 0x0F: |
|
return self._buffer.write(struct.pack("B", 0x90 + n)) |
|
if n <= 0xFFFF: |
|
return self._buffer.write(struct.pack(">BH", 0xDC, n)) |
|
if n <= 0xFFFFFFFF: |
|
return self._buffer.write(struct.pack(">BI", 0xDD, n)) |
|
raise ValueError("Array is too large") |
|
|
|
def _pack_map_header(self, n): |
|
if n <= 0x0F: |
|
return self._buffer.write(struct.pack("B", 0x80 + n)) |
|
if n <= 0xFFFF: |
|
return self._buffer.write(struct.pack(">BH", 0xDE, n)) |
|
if n <= 0xFFFFFFFF: |
|
return self._buffer.write(struct.pack(">BI", 0xDF, n)) |
|
raise ValueError("Dict is too large") |
|
|
|
def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): |
|
self._pack_map_header(n) |
|
for (k, v) in pairs: |
|
self._pack(k, nest_limit - 1) |
|
self._pack(v, nest_limit - 1) |
|
|
|
def _pack_raw_header(self, n): |
|
if n <= 0x1F: |
|
self._buffer.write(struct.pack("B", 0xA0 + n)) |
|
elif self._use_bin_type and n <= 0xFF: |
|
self._buffer.write(struct.pack(">BB", 0xD9, n)) |
|
elif n <= 0xFFFF: |
|
self._buffer.write(struct.pack(">BH", 0xDA, n)) |
|
elif n <= 0xFFFFFFFF: |
|
self._buffer.write(struct.pack(">BI", 0xDB, n)) |
|
else: |
|
raise ValueError("Raw is too large") |
|
|
|
def _pack_bin_header(self, n): |
|
if not self._use_bin_type: |
|
return self._pack_raw_header(n) |
|
elif n <= 0xFF: |
|
return self._buffer.write(struct.pack(">BB", 0xC4, n)) |
|
elif n <= 0xFFFF: |
|
return self._buffer.write(struct.pack(">BH", 0xC5, n)) |
|
elif n <= 0xFFFFFFFF: |
|
return self._buffer.write(struct.pack(">BI", 0xC6, n)) |
|
else: |
|
raise ValueError("Bin is too large") |
|
|
|
def bytes(self): |
|
"""Return internal buffer contents as bytes object""" |
|
return self._buffer.getvalue() |
|
|
|
def reset(self): |
|
"""Reset internal buffer. |
|
|
|
This method is useful only when autoreset=False. |
|
""" |
|
self._buffer = StringIO() |
|
|
|
def getbuffer(self): |
|
"""Return view of internal buffer.""" |
|
if USING_STRINGBUILDER or PY2: |
|
return memoryview(self.bytes()) |
|
else: |
|
return self._buffer.getbuffer() |
|
|