|
"""Numpy pickle compatibility functions.""" |
|
|
|
import inspect |
|
import os |
|
import pickle |
|
import zlib |
|
from io import BytesIO |
|
|
|
from .numpy_pickle_utils import ( |
|
_ZFILE_PREFIX, |
|
Unpickler, |
|
_ensure_native_byte_order, |
|
_reconstruct, |
|
) |
|
|
|
|
|
def hex_str(an_int): |
|
"""Convert an int to an hexadecimal string.""" |
|
return "{:#x}".format(an_int) |
|
|
|
|
|
def asbytes(s): |
|
if isinstance(s, bytes): |
|
return s |
|
return s.encode("latin1") |
|
|
|
|
|
_MAX_LEN = len(hex_str(2**64)) |
|
_CHUNK_SIZE = 64 * 1024 |
|
|
|
|
|
def read_zfile(file_handle): |
|
"""Read the z-file and return the content as a string. |
|
|
|
Z-files are raw data compressed with zlib used internally by joblib |
|
for persistence. Backward compatibility is not guaranteed. Do not |
|
use for external purposes. |
|
""" |
|
file_handle.seek(0) |
|
header_length = len(_ZFILE_PREFIX) + _MAX_LEN |
|
length = file_handle.read(header_length) |
|
length = length[len(_ZFILE_PREFIX) :] |
|
length = int(length, 16) |
|
|
|
|
|
|
|
|
|
|
|
|
|
next_byte = file_handle.read(1) |
|
if next_byte != b" ": |
|
|
|
|
|
file_handle.seek(header_length) |
|
|
|
|
|
|
|
data = zlib.decompress(file_handle.read(), 15, length) |
|
assert len(data) == length, ( |
|
"Incorrect data length while decompressing %s." |
|
"The file could be corrupted." % file_handle |
|
) |
|
return data |
|
|
|
|
|
def write_zfile(file_handle, data, compress=1): |
|
"""Write the data in the given file as a Z-file. |
|
|
|
Z-files are raw data compressed with zlib used internally by joblib |
|
for persistence. Backward compatibility is not guaranteed. Do not |
|
use for external purposes. |
|
""" |
|
file_handle.write(_ZFILE_PREFIX) |
|
length = hex_str(len(data)) |
|
|
|
file_handle.write(asbytes(length.ljust(_MAX_LEN))) |
|
file_handle.write(zlib.compress(asbytes(data), compress)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
class NDArrayWrapper(object): |
|
"""An object to be persisted instead of numpy arrays. |
|
|
|
The only thing this object does, is to carry the filename in which |
|
the array has been persisted, and the array subclass. |
|
""" |
|
|
|
def __init__(self, filename, subclass, allow_mmap=True): |
|
"""Constructor. Store the useful information for later.""" |
|
self.filename = filename |
|
self.subclass = subclass |
|
self.allow_mmap = allow_mmap |
|
|
|
def read(self, unpickler): |
|
"""Reconstruct the array.""" |
|
filename = os.path.join(unpickler._dirname, self.filename) |
|
|
|
|
|
|
|
allow_mmap = getattr(self, "allow_mmap", True) |
|
kwargs = {} |
|
if allow_mmap: |
|
kwargs["mmap_mode"] = unpickler.mmap_mode |
|
if "allow_pickle" in inspect.signature(unpickler.np.load).parameters: |
|
|
|
|
|
kwargs["allow_pickle"] = True |
|
array = unpickler.np.load(filename, **kwargs) |
|
|
|
|
|
array = _ensure_native_byte_order(array) |
|
|
|
|
|
|
|
if hasattr(array, "__array_prepare__") and self.subclass not in ( |
|
unpickler.np.ndarray, |
|
unpickler.np.memmap, |
|
): |
|
|
|
new_array = _reconstruct(self.subclass, (0,), "b") |
|
return new_array.__array_prepare__(array) |
|
else: |
|
return array |
|
|
|
|
|
class ZNDArrayWrapper(NDArrayWrapper): |
|
"""An object to be persisted instead of numpy arrays. |
|
|
|
This object store the Zfile filename in which |
|
the data array has been persisted, and the meta information to |
|
retrieve it. |
|
The reason that we store the raw buffer data of the array and |
|
the meta information, rather than array representation routine |
|
(tobytes) is that it enables us to use completely the strided |
|
model to avoid memory copies (a and a.T store as fast). In |
|
addition saving the heavy information separately can avoid |
|
creating large temporary buffers when unpickling data with |
|
large arrays. |
|
""" |
|
|
|
def __init__(self, filename, init_args, state): |
|
"""Constructor. Store the useful information for later.""" |
|
self.filename = filename |
|
self.state = state |
|
self.init_args = init_args |
|
|
|
def read(self, unpickler): |
|
"""Reconstruct the array from the meta-information and the z-file.""" |
|
|
|
|
|
filename = os.path.join(unpickler._dirname, self.filename) |
|
array = _reconstruct(*self.init_args) |
|
with open(filename, "rb") as f: |
|
data = read_zfile(f) |
|
state = self.state + (data,) |
|
array.__setstate__(state) |
|
return array |
|
|
|
|
|
class ZipNumpyUnpickler(Unpickler): |
|
"""A subclass of the Unpickler to unpickle our numpy pickles.""" |
|
|
|
dispatch = Unpickler.dispatch.copy() |
|
|
|
def __init__(self, filename, file_handle, mmap_mode=None): |
|
"""Constructor.""" |
|
self._filename = os.path.basename(filename) |
|
self._dirname = os.path.dirname(filename) |
|
self.mmap_mode = mmap_mode |
|
self.file_handle = self._open_pickle(file_handle) |
|
Unpickler.__init__(self, self.file_handle) |
|
try: |
|
import numpy as np |
|
except ImportError: |
|
np = None |
|
self.np = np |
|
|
|
def _open_pickle(self, file_handle): |
|
return BytesIO(read_zfile(file_handle)) |
|
|
|
def load_build(self): |
|
"""Set the state of a newly created object. |
|
|
|
We capture it to replace our place-holder objects, |
|
NDArrayWrapper, by the array we are interested in. We |
|
replace them directly in the stack of pickler. |
|
""" |
|
Unpickler.load_build(self) |
|
if isinstance(self.stack[-1], NDArrayWrapper): |
|
if self.np is None: |
|
raise ImportError( |
|
"Trying to unpickle an ndarray, but numpy didn't import correctly" |
|
) |
|
nd_array_wrapper = self.stack.pop() |
|
array = nd_array_wrapper.read(self) |
|
self.stack.append(array) |
|
|
|
dispatch[pickle.BUILD[0]] = load_build |
|
|
|
|
|
def load_compatibility(filename): |
|
"""Reconstruct a Python object from a file persisted with joblib.dump. |
|
|
|
This function ensures the compatibility with joblib old persistence format |
|
(<= 0.9.3). |
|
|
|
Parameters |
|
---------- |
|
filename: string |
|
The name of the file from which to load the object |
|
|
|
Returns |
|
------- |
|
result: any Python object |
|
The object stored in the file. |
|
|
|
See Also |
|
-------- |
|
joblib.dump : function to save an object |
|
|
|
Notes |
|
----- |
|
|
|
This function can load numpy array files saved separately during the |
|
dump. |
|
""" |
|
with open(filename, "rb") as file_handle: |
|
|
|
|
|
|
|
|
|
unpickler = ZipNumpyUnpickler(filename, file_handle=file_handle) |
|
try: |
|
obj = unpickler.load() |
|
except UnicodeDecodeError as exc: |
|
|
|
new_exc = ValueError( |
|
"You may be trying to read with " |
|
"python 3 a joblib pickle generated with python 2. " |
|
"This feature is not supported by joblib." |
|
) |
|
new_exc.__cause__ = exc |
|
raise new_exc |
|
finally: |
|
if hasattr(unpickler, "file_handle"): |
|
unpickler.file_handle.close() |
|
return obj |
|
|