Spaces:
Runtime error
Runtime error
# SPDX-FileCopyrightText: 2015 Eric Larson | |
# | |
# SPDX-License-Identifier: Apache-2.0 | |
from tempfile import NamedTemporaryFile | |
import mmap | |
class CallbackFileWrapper(object): | |
""" | |
Small wrapper around a fp object which will tee everything read into a | |
buffer, and when that file is closed it will execute a callback with the | |
contents of that buffer. | |
All attributes are proxied to the underlying file object. | |
This class uses members with a double underscore (__) leading prefix so as | |
not to accidentally shadow an attribute. | |
The data is stored in a temporary file until it is all available. As long | |
as the temporary files directory is disk-based (sometimes it's a | |
memory-backed-``tmpfs`` on Linux), data will be unloaded to disk if memory | |
pressure is high. For small files the disk usually won't be used at all, | |
it'll all be in the filesystem memory cache, so there should be no | |
performance impact. | |
""" | |
def __init__(self, fp, callback): | |
self.__buf = NamedTemporaryFile("rb+", delete=True) | |
self.__fp = fp | |
self.__callback = callback | |
def __getattr__(self, name): | |
# The vaguaries of garbage collection means that self.__fp is | |
# not always set. By using __getattribute__ and the private | |
# name[0] allows looking up the attribute value and raising an | |
# AttributeError when it doesn't exist. This stop thigns from | |
# infinitely recursing calls to getattr in the case where | |
# self.__fp hasn't been set. | |
# | |
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers | |
fp = self.__getattribute__("_CallbackFileWrapper__fp") | |
return getattr(fp, name) | |
def __is_fp_closed(self): | |
try: | |
return self.__fp.fp is None | |
except AttributeError: | |
pass | |
try: | |
return self.__fp.closed | |
except AttributeError: | |
pass | |
# We just don't cache it then. | |
# TODO: Add some logging here... | |
return False | |
def _close(self): | |
if self.__callback: | |
if self.__buf.tell() == 0: | |
# Empty file: | |
result = b"" | |
else: | |
# Return the data without actually loading it into memory, | |
# relying on Python's buffer API and mmap(). mmap() just gives | |
# a view directly into the filesystem's memory cache, so it | |
# doesn't result in duplicate memory use. | |
self.__buf.seek(0, 0) | |
result = memoryview( | |
mmap.mmap(self.__buf.fileno(), 0, access=mmap.ACCESS_READ) | |
) | |
self.__callback(result) | |
# We assign this to None here, because otherwise we can get into | |
# really tricky problems where the CPython interpreter dead locks | |
# because the callback is holding a reference to something which | |
# has a __del__ method. Setting this to None breaks the cycle | |
# and allows the garbage collector to do it's thing normally. | |
self.__callback = None | |
# Closing the temporary file releases memory and frees disk space. | |
# Important when caching big files. | |
self.__buf.close() | |
def read(self, amt=None): | |
data = self.__fp.read(amt) | |
if data: | |
# We may be dealing with b'', a sign that things are over: | |
# it's passed e.g. after we've already closed self.__buf. | |
self.__buf.write(data) | |
if self.__is_fp_closed(): | |
self._close() | |
return data | |
def _safe_read(self, amt): | |
data = self.__fp._safe_read(amt) | |
if amt == 2 and data == b"\r\n": | |
# urllib executes this read to toss the CRLF at the end | |
# of the chunk. | |
return data | |
self.__buf.write(data) | |
if self.__is_fp_closed(): | |
self._close() | |
return data | |