Source code for txpipe.utils.hdf_tools

import h5py
import subprocess
import shutil

[docs]def repack(filename): """ In-place HDF5 repack operation on file. """ tmp_name = f'{filename}.tmp_325467847' subprocess.check_call(f'h5repack {filename} {tmp_name}', shell=True) shutil.move(tmp_name, filename)
[docs]def create_dataset_early_allocated(group, name, size, dtype): """ Create an HdF5 dataset, allocating the full space for it at the start of the process. This can make it faster to write data incrementally from multiple processes. The dataset is also not pre-filled, saving more time. Parameters ---------- group: h5py.Group the parent for the dataset name: str name for the new dataset size: int The size of the new data set (which must be 1D) dtype: str Data type, One of f4, f8, i4, i8 """ # create a data-space object, which describes the dimensions of the dataset space_id = h5py.h5s.create_simple((size,)) # Create and fill a property list describing options # which apply to the data set. plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE) plist.set_fill_time(h5py.h5d.FILL_TIME_NEVER) plist.set_alloc_time(h5py.h5d.ALLOC_TIME_EARLY) dtype = { 'f8': h5py.h5t.NATIVE_DOUBLE, 'f4': h5py.h5t.NATIVE_FLOAT, 'i4': h5py.h5t.NATIVE_INT32, 'i8': h5py.h5t.NATIVE_INT64, }[dtype] datasetid = h5py.h5d.create(group.id, name.encode('ascii'), dtype, space_id, plist) data_set = h5py.Dataset(datasetid) return data_set