Spaces:
Sleeping
Sleeping
| # This file is part of h5py, a Python interface to the HDF5 library. | |
| # | |
| # http://www.h5py.org | |
| # | |
| # Copyright 2008-2013 Andrew Collette and contributors | |
| # | |
| # License: Standard 3-clause BSD; see "license.txt" for full license terms | |
| # and contributor agreement. | |
| """ | |
| Implements high-level support for HDF5 file objects. | |
| """ | |
| import sys | |
| import os | |
| from warnings import warn | |
| from .compat import filename_decode, filename_encode | |
| from .base import phil, with_phil | |
| from .group import Group | |
| from .. import h5, h5f, h5p, h5i, h5fd, _objects | |
| from .. import version | |
| mpi = h5.get_config().mpi | |
| ros3 = h5.get_config().ros3 | |
| direct_vfd = h5.get_config().direct_vfd | |
| hdf5_version = version.hdf5_version_tuple[0:3] | |
| swmr_support = True | |
| libver_dict = {'earliest': h5f.LIBVER_EARLIEST, 'latest': h5f.LIBVER_LATEST, | |
| 'v108': h5f.LIBVER_V18, 'v110': h5f.LIBVER_V110} | |
| libver_dict_r = dict((y, x) for x, y in libver_dict.items()) | |
| if hdf5_version >= (1, 11, 4): | |
| libver_dict.update({'v112': h5f.LIBVER_V112}) | |
| libver_dict_r.update({h5f.LIBVER_V112: 'v112'}) | |
| if hdf5_version >= (1, 13, 0): | |
| libver_dict.update({'v114': h5f.LIBVER_V114}) | |
| libver_dict_r.update({h5f.LIBVER_V114: 'v114'}) | |
| def _set_fapl_mpio(plist, **kwargs): | |
| """Set file access property list for mpio driver""" | |
| if not mpi: | |
| raise ValueError("h5py was built without MPI support, can't use mpio driver") | |
| import mpi4py.MPI | |
| kwargs.setdefault('info', mpi4py.MPI.Info()) | |
| plist.set_fapl_mpio(**kwargs) | |
| def _set_fapl_fileobj(plist, **kwargs): | |
| """Set the Python file object driver in a file access property list""" | |
| plist.set_fileobj_driver(h5fd.fileobj_driver, kwargs.get('fileobj')) | |
| _drivers = { | |
| 'sec2': lambda plist, **kwargs: plist.set_fapl_sec2(**kwargs), | |
| 'stdio': lambda plist, **kwargs: plist.set_fapl_stdio(**kwargs), | |
| 'core': lambda plist, **kwargs: plist.set_fapl_core(**kwargs), | |
| 'family': lambda plist, **kwargs: plist.set_fapl_family( | |
| memb_fapl=plist.copy(), | |
| **kwargs | |
| ), | |
| 'mpio': _set_fapl_mpio, | |
| 'fileobj': _set_fapl_fileobj, | |
| 'split': lambda plist, **kwargs: plist.set_fapl_split(**kwargs), | |
| } | |
| if ros3: | |
| _drivers['ros3'] = lambda plist, **kwargs: plist.set_fapl_ros3(**kwargs) | |
| if direct_vfd: | |
| _drivers['direct'] = lambda plist, **kwargs: plist.set_fapl_direct(**kwargs) # noqa | |
| def register_driver(name, set_fapl): | |
| """Register a custom driver. | |
| Parameters | |
| ---------- | |
| name : str | |
| The name of the driver. | |
| set_fapl : callable[PropFAID, **kwargs] -> NoneType | |
| The function to set the fapl to use your custom driver. | |
| """ | |
| _drivers[name] = set_fapl | |
| def unregister_driver(name): | |
| """Unregister a custom driver. | |
| Parameters | |
| ---------- | |
| name : str | |
| The name of the driver. | |
| """ | |
| del _drivers[name] | |
| def registered_drivers(): | |
| """Return a frozenset of the names of all of the registered drivers. | |
| """ | |
| return frozenset(_drivers) | |
| def make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0, locking, | |
| page_buf_size, min_meta_keep, min_raw_keep, | |
| alignment_threshold, alignment_interval, meta_block_size, | |
| **kwds): | |
| """ Set up a file access property list """ | |
| plist = h5p.create(h5p.FILE_ACCESS) | |
| if libver is not None: | |
| if libver in libver_dict: | |
| low = libver_dict[libver] | |
| high = h5f.LIBVER_LATEST | |
| else: | |
| low, high = (libver_dict[x] for x in libver) | |
| else: | |
| # we default to earliest | |
| low, high = h5f.LIBVER_EARLIEST, h5f.LIBVER_LATEST | |
| plist.set_libver_bounds(low, high) | |
| plist.set_alignment(alignment_threshold, alignment_interval) | |
| cache_settings = list(plist.get_cache()) | |
| if rdcc_nslots is not None: | |
| cache_settings[1] = rdcc_nslots | |
| if rdcc_nbytes is not None: | |
| cache_settings[2] = rdcc_nbytes | |
| if rdcc_w0 is not None: | |
| cache_settings[3] = rdcc_w0 | |
| plist.set_cache(*cache_settings) | |
| if page_buf_size: | |
| plist.set_page_buffer_size(int(page_buf_size), int(min_meta_keep), | |
| int(min_raw_keep)) | |
| if meta_block_size is not None: | |
| plist.set_meta_block_size(int(meta_block_size)) | |
| if locking is not None: | |
| if hdf5_version < (1, 12, 1) and (hdf5_version[:2] != (1, 10) or hdf5_version[2] < 7): | |
| raise ValueError( | |
| "HDF5 version >= 1.12.1 or 1.10.x >= 1.10.7 required for file locking.") | |
| if locking in ("false", False): | |
| plist.set_file_locking(False, ignore_when_disabled=False) | |
| elif locking in ("true", True): | |
| plist.set_file_locking(True, ignore_when_disabled=False) | |
| elif locking == "best-effort": | |
| plist.set_file_locking(True, ignore_when_disabled=True) | |
| else: | |
| raise ValueError(f"Unsupported locking value: {locking}") | |
| if driver is None or (driver == 'windows' and sys.platform == 'win32'): | |
| # Prevent swallowing unused key arguments | |
| if kwds: | |
| msg = "'{key}' is an invalid keyword argument for this function" \ | |
| .format(key=next(iter(kwds))) | |
| raise TypeError(msg) | |
| return plist | |
| try: | |
| set_fapl = _drivers[driver] | |
| except KeyError: | |
| raise ValueError('Unknown driver type "%s"' % driver) | |
| else: | |
| if driver == 'ros3': | |
| token = kwds.pop('session_token', None) | |
| set_fapl(plist, **kwds) | |
| if token: | |
| if hdf5_version < (1, 14, 2): | |
| raise ValueError('HDF5 >= 1.14.2 required for AWS session token') | |
| plist.set_fapl_ros3_token(token) | |
| else: | |
| set_fapl(plist, **kwds) | |
| return plist | |
| def make_fcpl(track_order=False, fs_strategy=None, fs_persist=False, | |
| fs_threshold=1, fs_page_size=None): | |
| """ Set up a file creation property list """ | |
| if track_order or fs_strategy: | |
| plist = h5p.create(h5p.FILE_CREATE) | |
| if track_order: | |
| plist.set_link_creation_order( | |
| h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED) | |
| plist.set_attr_creation_order( | |
| h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED) | |
| if fs_strategy: | |
| strategies = { | |
| 'fsm': h5f.FSPACE_STRATEGY_FSM_AGGR, | |
| 'page': h5f.FSPACE_STRATEGY_PAGE, | |
| 'aggregate': h5f.FSPACE_STRATEGY_AGGR, | |
| 'none': h5f.FSPACE_STRATEGY_NONE | |
| } | |
| fs_strat_num = strategies.get(fs_strategy, -1) | |
| if fs_strat_num == -1: | |
| raise ValueError("Invalid file space strategy type") | |
| plist.set_file_space_strategy(fs_strat_num, fs_persist, fs_threshold) | |
| if fs_page_size and fs_strategy == 'page': | |
| plist.set_file_space_page_size(int(fs_page_size)) | |
| else: | |
| plist = None | |
| return plist | |
| def make_fid(name, mode, userblock_size, fapl, fcpl=None, swmr=False): | |
| """ Get a new FileID by opening or creating a file. | |
| Also validates mode argument.""" | |
| if userblock_size is not None: | |
| if mode in ('r', 'r+'): | |
| raise ValueError("User block may only be specified " | |
| "when creating a file") | |
| try: | |
| userblock_size = int(userblock_size) | |
| except (TypeError, ValueError): | |
| raise ValueError("User block size must be an integer") | |
| if fcpl is None: | |
| fcpl = h5p.create(h5p.FILE_CREATE) | |
| fcpl.set_userblock(userblock_size) | |
| if mode == 'r': | |
| flags = h5f.ACC_RDONLY | |
| if swmr and swmr_support: | |
| flags |= h5f.ACC_SWMR_READ | |
| fid = h5f.open(name, flags, fapl=fapl) | |
| elif mode == 'r+': | |
| fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl) | |
| elif mode in ['w-', 'x']: | |
| fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl) | |
| elif mode == 'w': | |
| fid = h5f.create(name, h5f.ACC_TRUNC, fapl=fapl, fcpl=fcpl) | |
| elif mode == 'a': | |
| # Open in append mode (read/write). | |
| # If that fails, create a new file only if it won't clobber an | |
| # existing one (ACC_EXCL) | |
| try: | |
| fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl) | |
| # Not all drivers raise FileNotFoundError (commented those that do not) | |
| except FileNotFoundError if fapl.get_driver() in ( | |
| h5fd.SEC2, | |
| h5fd.DIRECT if direct_vfd else -1, | |
| # h5fd.STDIO, | |
| # h5fd.CORE, | |
| h5fd.FAMILY, | |
| h5fd.WINDOWS, | |
| # h5fd.MPIO, | |
| # h5fd.MPIPOSIX, | |
| h5fd.fileobj_driver, | |
| h5fd.ROS3D if ros3 else -1, | |
| ) else OSError: | |
| fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl) | |
| else: | |
| raise ValueError("Invalid mode; must be one of r, r+, w, w-, x, a") | |
| try: | |
| if userblock_size is not None: | |
| existing_fcpl = fid.get_create_plist() | |
| if existing_fcpl.get_userblock() != userblock_size: | |
| raise ValueError("Requested userblock size (%d) does not match that of existing file (%d)" % (userblock_size, existing_fcpl.get_userblock())) | |
| except Exception as e: | |
| fid.close() | |
| raise e | |
| return fid | |
| class File(Group): | |
| """ | |
| Represents an HDF5 file. | |
| """ | |
| def attrs(self): | |
| """ Attributes attached to this object """ | |
| # hdf5 complains that a file identifier is an invalid location for an | |
| # attribute. Instead of self, pass the root group to AttributeManager: | |
| from . import attrs | |
| with phil: | |
| return attrs.AttributeManager(self['/']) | |
| def filename(self): | |
| """File name on disk""" | |
| return filename_decode(h5f.get_name(self.id)) | |
| def driver(self): | |
| """Low-level HDF5 file driver used to open file""" | |
| drivers = {h5fd.SEC2: 'sec2', | |
| h5fd.STDIO: 'stdio', | |
| h5fd.CORE: 'core', | |
| h5fd.FAMILY: 'family', | |
| h5fd.WINDOWS: 'windows', | |
| h5fd.MPIO: 'mpio', | |
| h5fd.MPIPOSIX: 'mpiposix', | |
| h5fd.fileobj_driver: 'fileobj'} | |
| if ros3: | |
| drivers[h5fd.ROS3D] = 'ros3' | |
| if direct_vfd: | |
| drivers[h5fd.DIRECT] = 'direct' | |
| return drivers.get(self.id.get_access_plist().get_driver(), 'unknown') | |
| def mode(self): | |
| """ Python mode used to open file """ | |
| write_intent = h5f.ACC_RDWR | |
| if swmr_support: | |
| write_intent |= h5f.ACC_SWMR_WRITE | |
| return 'r+' if self.id.get_intent() & write_intent else 'r' | |
| def libver(self): | |
| """File format version bounds (2-tuple: low, high)""" | |
| bounds = self.id.get_access_plist().get_libver_bounds() | |
| return tuple(libver_dict_r[x] for x in bounds) | |
| def userblock_size(self): | |
| """ User block size (in bytes) """ | |
| fcpl = self.id.get_create_plist() | |
| return fcpl.get_userblock() | |
| def meta_block_size(self): | |
| """ Meta block size (in bytes) """ | |
| fapl = self.id.get_access_plist() | |
| return fapl.get_meta_block_size() | |
| if mpi: | |
| def atomic(self): | |
| """ Set/get MPI-IO atomic mode | |
| """ | |
| return self.id.get_mpi_atomicity() | |
| def atomic(self, value): | |
| # pylint: disable=missing-docstring | |
| self.id.set_mpi_atomicity(value) | |
| def swmr_mode(self): | |
| """ Controls single-writer multiple-reader mode """ | |
| return swmr_support and bool(self.id.get_intent() & (h5f.ACC_SWMR_READ | h5f.ACC_SWMR_WRITE)) | |
| def swmr_mode(self, value): | |
| # pylint: disable=missing-docstring | |
| if value: | |
| self.id.start_swmr_write() | |
| else: | |
| raise ValueError("It is not possible to forcibly switch SWMR mode off.") | |
| def __init__(self, name, mode='r', driver=None, libver=None, userblock_size=None, swmr=False, | |
| rdcc_nslots=None, rdcc_nbytes=None, rdcc_w0=None, track_order=None, | |
| fs_strategy=None, fs_persist=False, fs_threshold=1, fs_page_size=None, | |
| page_buf_size=None, min_meta_keep=0, min_raw_keep=0, locking=None, | |
| alignment_threshold=1, alignment_interval=1, meta_block_size=None, **kwds): | |
| """Create a new file object. | |
| See the h5py user guide for a detailed explanation of the options. | |
| name | |
| Name of the file on disk, or file-like object. Note: for files | |
| created with the 'core' driver, HDF5 still requires this be | |
| non-empty. | |
| mode | |
| r Readonly, file must exist (default) | |
| r+ Read/write, file must exist | |
| w Create file, truncate if exists | |
| w- or x Create file, fail if exists | |
| a Read/write if exists, create otherwise | |
| driver | |
| Name of the driver to use. Legal values are None (default, | |
| recommended), 'core', 'sec2', 'direct', 'stdio', 'mpio', 'ros3'. | |
| libver | |
| Library version bounds. Supported values: 'earliest', 'v108', | |
| 'v110', 'v112' and 'latest'. The 'v108', 'v110' and 'v112' | |
| options can only be specified with the HDF5 1.10.2 library or later. | |
| userblock_size | |
| Desired size of user block. Only allowed when creating a new | |
| file (mode w, w- or x). | |
| swmr | |
| Open the file in SWMR read mode. Only used when mode = 'r'. | |
| rdcc_nbytes | |
| Total size of the dataset chunk cache in bytes. The default size | |
| is 1024**2 (1 MiB) per dataset. Applies to all datasets unless individually changed. | |
| rdcc_w0 | |
| The chunk preemption policy for all datasets. This must be | |
| between 0 and 1 inclusive and indicates the weighting according to | |
| which chunks which have been fully read or written are penalized | |
| when determining which chunks to flush from cache. A value of 0 | |
| means fully read or written chunks are treated no differently than | |
| other chunks (the preemption is strictly LRU) while a value of 1 | |
| means fully read or written chunks are always preempted before | |
| other chunks. If your application only reads or writes data once, | |
| this can be safely set to 1. Otherwise, this should be set lower | |
| depending on how often you re-read or re-write the same data. The | |
| default value is 0.75. Applies to all datasets unless individually changed. | |
| rdcc_nslots | |
| The number of chunk slots in the raw data chunk cache for this | |
| file. Increasing this value reduces the number of cache collisions, | |
| but slightly increases the memory used. Due to the hashing | |
| strategy, this value should ideally be a prime number. As a rule of | |
| thumb, this value should be at least 10 times the number of chunks | |
| that can fit in rdcc_nbytes bytes. For maximum performance, this | |
| value should be set approximately 100 times that number of | |
| chunks. The default value is 521. Applies to all datasets unless individually changed. | |
| track_order | |
| Track dataset/group/attribute creation order under root group | |
| if True. If None use global default h5.get_config().track_order. | |
| fs_strategy | |
| The file space handling strategy to be used. Only allowed when | |
| creating a new file (mode w, w- or x). Defined as: | |
| "fsm" FSM, Aggregators, VFD | |
| "page" Paged FSM, VFD | |
| "aggregate" Aggregators, VFD | |
| "none" VFD | |
| If None use HDF5 defaults. | |
| fs_page_size | |
| File space page size in bytes. Only used when fs_strategy="page". If | |
| None use the HDF5 default (4096 bytes). | |
| fs_persist | |
| A boolean value to indicate whether free space should be persistent | |
| or not. Only allowed when creating a new file. The default value | |
| is False. | |
| fs_threshold | |
| The smallest free-space section size that the free space manager | |
| will track. Only allowed when creating a new file. The default | |
| value is 1. | |
| page_buf_size | |
| Page buffer size in bytes. Only allowed for HDF5 files created with | |
| fs_strategy="page". Must be a power of two value and greater or | |
| equal than the file space page size when creating the file. It is | |
| not used by default. | |
| min_meta_keep | |
| Minimum percentage of metadata to keep in the page buffer before | |
| allowing pages containing metadata to be evicted. Applicable only if | |
| page_buf_size is set. Default value is zero. | |
| min_raw_keep | |
| Minimum percentage of raw data to keep in the page buffer before | |
| allowing pages containing raw data to be evicted. Applicable only if | |
| page_buf_size is set. Default value is zero. | |
| locking | |
| The file locking behavior. Defined as: | |
| - False (or "false") -- Disable file locking | |
| - True (or "true") -- Enable file locking | |
| - "best-effort" -- Enable file locking but ignore some errors | |
| - None -- Use HDF5 defaults | |
| .. warning:: | |
| The HDF5_USE_FILE_LOCKING environment variable can override | |
| this parameter. | |
| Only available with HDF5 >= 1.12.1 or 1.10.x >= 1.10.7. | |
| alignment_threshold | |
| Together with ``alignment_interval``, this property ensures that | |
| any file object greater than or equal in size to the alignment | |
| threshold (in bytes) will be aligned on an address which is a | |
| multiple of alignment interval. | |
| alignment_interval | |
| This property should be used in conjunction with | |
| ``alignment_threshold``. See the description above. For more | |
| details, see | |
| https://portal.hdfgroup.org/display/HDF5/H5P_SET_ALIGNMENT | |
| meta_block_size | |
| Set the current minimum size, in bytes, of new metadata block allocations. | |
| See https://portal.hdfgroup.org/display/HDF5/H5P_SET_META_BLOCK_SIZE | |
| Additional keywords | |
| Passed on to the selected file driver. | |
| """ | |
| if driver == 'ros3': | |
| if ros3: | |
| from urllib.parse import urlparse | |
| url = urlparse(name) | |
| if url.scheme == 's3': | |
| aws_region = kwds.get('aws_region', b'').decode('ascii') | |
| if len(aws_region) == 0: | |
| raise ValueError('AWS region required for s3:// location') | |
| name = f'https://s3.{aws_region}.amazonaws.com/{url.netloc}{url.path}' | |
| elif url.scheme not in ('https', 'http'): | |
| raise ValueError(f'{name}: S3 location must begin with ' | |
| 'either "https://", "http://", or "s3://"') | |
| else: | |
| raise ValueError( | |
| "h5py was built without ROS3 support, can't use ros3 driver") | |
| if locking is not None and hdf5_version < (1, 12, 1) and ( | |
| hdf5_version[:2] != (1, 10) or hdf5_version[2] < 7): | |
| raise ValueError("HDF5 version >= 1.12.1 or 1.10.x >= 1.10.7 required for file locking options.") | |
| if isinstance(name, _objects.ObjectID): | |
| if fs_strategy: | |
| raise ValueError("Unable to set file space strategy of an existing file") | |
| with phil: | |
| fid = h5i.get_file_id(name) | |
| else: | |
| if hasattr(name, 'read') and hasattr(name, 'seek'): | |
| if driver not in (None, 'fileobj'): | |
| raise ValueError("Driver must be 'fileobj' for file-like object if specified.") | |
| driver = 'fileobj' | |
| if kwds.get('fileobj', name) != name: | |
| raise ValueError("Invalid value of 'fileobj' argument; " | |
| "must equal to file-like object if specified.") | |
| kwds.update(fileobj=name) | |
| name = repr(name).encode('ASCII', 'replace') | |
| else: | |
| name = filename_encode(name) | |
| if track_order is None: | |
| track_order = h5.get_config().track_order | |
| if fs_strategy and mode not in ('w', 'w-', 'x'): | |
| raise ValueError("Unable to set file space strategy of an existing file") | |
| if swmr and mode != 'r': | |
| warn( | |
| "swmr=True only affects read ('r') mode. For swmr write " | |
| "mode, set f.swmr_mode = True after opening the file.", | |
| stacklevel=2, | |
| ) | |
| with phil: | |
| fapl = make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0, | |
| locking, page_buf_size, min_meta_keep, min_raw_keep, | |
| alignment_threshold=alignment_threshold, | |
| alignment_interval=alignment_interval, | |
| meta_block_size=meta_block_size, | |
| **kwds) | |
| fcpl = make_fcpl(track_order=track_order, fs_strategy=fs_strategy, | |
| fs_persist=fs_persist, fs_threshold=fs_threshold, | |
| fs_page_size=fs_page_size) | |
| fid = make_fid(name, mode, userblock_size, fapl, fcpl, swmr=swmr) | |
| if isinstance(libver, tuple): | |
| self._libver = libver | |
| else: | |
| self._libver = (libver, 'latest') | |
| super().__init__(fid) | |
| def close(self): | |
| """ Close the file. All open objects become invalid """ | |
| with phil: | |
| # Check that the file is still open, otherwise skip | |
| if self.id.valid: | |
| # We have to explicitly murder all open objects related to the file | |
| # Close file-resident objects first, then the files. | |
| # Otherwise we get errors in MPI mode. | |
| self.id._close_open_objects(h5f.OBJ_LOCAL | ~h5f.OBJ_FILE) | |
| self.id._close_open_objects(h5f.OBJ_LOCAL | h5f.OBJ_FILE) | |
| self.id.close() | |
| _objects.nonlocal_close() | |
| def flush(self): | |
| """ Tell the HDF5 library to flush its buffers. | |
| """ | |
| with phil: | |
| h5f.flush(self.id) | |
| def __enter__(self): | |
| return self | |
| def __exit__(self, *args): | |
| if self.id: | |
| self.close() | |
| def __repr__(self): | |
| if not self.id: | |
| r = '<Closed HDF5 file>' | |
| else: | |
| # Filename has to be forced to Unicode if it comes back bytes | |
| # Mode is always a "native" string | |
| filename = self.filename | |
| if isinstance(filename, bytes): # Can't decode fname | |
| filename = filename.decode('utf8', 'replace') | |
| r = f'<HDF5 file "{os.path.basename(filename)}" (mode {self.mode})>' | |
| return r | |