Spaces:
Sleeping
Sleeping
#!/usr/bin/env python3 | |
import sys | |
import pickle | |
import struct | |
import pprint | |
import zipfile | |
import fnmatch | |
from typing import Any, IO, BinaryIO, Union | |
__all__ = ["FakeObject", "FakeClass", "DumpUnpickler", "main"] | |
class FakeObject: | |
def __init__(self, module, name, args): | |
self.module = module | |
self.name = name | |
self.args = args | |
# NOTE: We don't distinguish between state never set and state set to None. | |
self.state = None | |
def __repr__(self): | |
state_str = "" if self.state is None else f"(state={self.state!r})" | |
return f"{self.module}.{self.name}{self.args!r}{state_str}" | |
def __setstate__(self, state): | |
self.state = state | |
def pp_format(printer, obj, stream, indent, allowance, context, level): | |
if not obj.args and obj.state is None: | |
stream.write(repr(obj)) | |
return | |
if obj.state is None: | |
stream.write(f"{obj.module}.{obj.name}") | |
printer._format(obj.args, stream, indent + 1, allowance + 1, context, level) | |
return | |
if not obj.args: | |
stream.write(f"{obj.module}.{obj.name}()(state=\n") | |
indent += printer._indent_per_level | |
stream.write(" " * indent) | |
printer._format(obj.state, stream, indent, allowance + 1, context, level + 1) | |
stream.write(")") | |
return | |
raise Exception("Need to implement") | |
class FakeClass: | |
def __init__(self, module, name): | |
self.module = module | |
self.name = name | |
self.__new__ = self.fake_new # type: ignore[assignment] | |
def __repr__(self): | |
return f"{self.module}.{self.name}" | |
def __call__(self, *args): | |
return FakeObject(self.module, self.name, args) | |
def fake_new(self, *args): | |
return FakeObject(self.module, self.name, args[1:]) | |
class DumpUnpickler(pickle._Unpickler): # type: ignore[name-defined] | |
def __init__( | |
self, | |
file, | |
*, | |
catch_invalid_utf8=False, | |
**kwargs): | |
super().__init__(file, **kwargs) | |
self.catch_invalid_utf8 = catch_invalid_utf8 | |
def find_class(self, module, name): | |
return FakeClass(module, name) | |
def persistent_load(self, pid): | |
return FakeObject("pers", "obj", (pid,)) | |
dispatch = dict(pickle._Unpickler.dispatch) # type: ignore[attr-defined] | |
# Custom objects in TorchScript are able to return invalid UTF-8 strings | |
# from their pickle (__getstate__) functions. Install a custom loader | |
# for strings that catches the decode exception and replaces it with | |
# a sentinel object. | |
def load_binunicode(self): | |
strlen, = struct.unpack("<I", self.read(4)) # type: ignore[attr-defined] | |
if strlen > sys.maxsize: | |
raise Exception("String too long.") | |
str_bytes = self.read(strlen) # type: ignore[attr-defined] | |
obj: Any | |
try: | |
obj = str(str_bytes, "utf-8", "surrogatepass") | |
except UnicodeDecodeError as exn: | |
if not self.catch_invalid_utf8: | |
raise | |
obj = FakeObject("builtin", "UnicodeDecodeError", (str(exn),)) | |
self.append(obj) # type: ignore[attr-defined] | |
dispatch[pickle.BINUNICODE[0]] = load_binunicode # type: ignore[assignment] | |
def dump(cls, in_stream, out_stream): | |
value = cls(in_stream).load() | |
pprint.pprint(value, stream=out_stream) | |
return value | |
def main(argv, output_stream=None): | |
if len(argv) != 2: | |
# Don't spam stderr if not using stdout. | |
if output_stream is not None: | |
raise Exception("Pass argv of length 2.") | |
sys.stderr.write("usage: show_pickle PICKLE_FILE\n") | |
sys.stderr.write(" PICKLE_FILE can be any of:\n") | |
sys.stderr.write(" path to a pickle file\n") | |
sys.stderr.write(" [email protected]\n") | |
sys.stderr.write(" file.zip@*/pattern.*\n") | |
sys.stderr.write(" (shell glob pattern for members)\n") | |
sys.stderr.write(" (only first match will be shown)\n") | |
return 2 | |
fname = argv[1] | |
handle: Union[IO[bytes], BinaryIO] | |
if "@" not in fname: | |
with open(fname, "rb") as handle: | |
DumpUnpickler.dump(handle, output_stream) | |
else: | |
zfname, mname = fname.split("@", 1) | |
with zipfile.ZipFile(zfname) as zf: | |
if "*" not in mname: | |
with zf.open(mname) as handle: | |
DumpUnpickler.dump(handle, output_stream) | |
else: | |
found = False | |
for info in zf.infolist(): | |
if fnmatch.fnmatch(info.filename, mname): | |
with zf.open(info) as handle: | |
DumpUnpickler.dump(handle, output_stream) | |
found = True | |
break | |
if not found: | |
raise Exception(f"Could not find member matching {mname} in {zfname}") | |
if __name__ == "__main__": | |
# This hack works on every version of Python I've tested. | |
# I've tested on the following versions: | |
# 3.7.4 | |
if True: | |
pprint.PrettyPrinter._dispatch[FakeObject.__repr__] = FakeObject.pp_format # type: ignore[attr-defined] | |
sys.exit(main(sys.argv)) | |