Source code for SciFiReaders.readers.SID.Nsid_reader

# -*- coding: utf-8 -*-
"""
Reader capable of reading one or all NSID datasets present in a given HDF5 file

Created on Fri May 22 16:29:25 2020

@author: Gerd Duscher, Suhas Somnath, Maxim Ziadtinov
"""
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

import sys

import h5py
import sidpy

try:
    from pyNSID.io.hdf_utils import check_if_main, get_all_main, \
        read_h5py_dataset
except ModuleNotFoundError:
    check_if_main = get_all_main = read_h5py_dataset = None


[docs]class NSIDReader(sidpy.Reader): def __init__(self, file_path): """ Creates an instance of NSIDReader which can read one or more HDF5 datasets formatted according to NSID into sidpy.Dataset objects Parameters ---------- file_path : str, h5py.File, or h5py.Group Path to a HDF5 file or a handle to an open HDF5 file or group object Notes ----- Please consider using the ``self._h5_file`` object to get handles to specific datasets or sub-trees that need to be read instead of opening the file again outside the context of this Reader. """ super(NSIDReader, self).__init__(file_path) if not check_if_main: raise ModuleNotFoundError('Please install pyNSID to use this Reader') # Let h5py raise an OS error if a non-HDF5 file was provided self._h5_file = h5py.File(file_path, mode='r+') self._main_dsets = get_all_main(self._h5_file, verbose=False) # DO NOT close HDF5 file. Dask array will fail if you do so.
[docs] def can_read(self): """ Checks whether or not this Reader can read the provided file Returns ------- bool : True if this Reader can read the provided file and if this file contains at least one NSID-formatted main dataset. Else, False """ return len(self._main_dsets) > 0
[docs] def read(self, h5_object=None): """ Reads all available NSID main datasets or the specified h5_object Parameters ---------- h5_object : h5py.Dataset or h5py.Group HDF5 Dataset to read or the HDF5 group under which to read all datasets Returns ------- sidpy.Dataset or list of sidpy.Dataset objects Datasets present in the provided file """ if h5_object is None: return self.read_all(recursive=True) if not isinstance(h5_object, (h5py.Group, h5py.Dataset)): raise TypeError('Provided h5_object was not a h5py.Dataset or ' 'h5py.Group object but was of type: {}' ''.format(type(h5_object))) self.__validate_obj_in_same_file(h5_object) if isinstance(h5_object, h5py.Dataset): return read_h5py_dataset(h5_object) else: return self.read_all(parent=h5_object)
def __validate_obj_in_same_file(self, h5_object): """ Internal function that ensures that the provided HDF5 object is within the same file as that provided in __init__ Parameters ---------- h5_object : h5py.Dataset, h5py.Group HDF5 object Raises ------ OSError - if the provded object is in a different HDF5 file. """ if h5_object.file != self._h5_file: raise OSError('The file containing the provided h5_object: {} is ' 'not the same as provided HDF5 file when ' 'instantiating this object: {}' ''.format(h5_object.file.filename, self._h5_file.filename))
[docs] def read_all(self, recursive=True, parent=None): """ Reads all HDF5 datasets formatted according to NSID specifications. Parameters ---------- recursive : bool, default = True We might just remove this kwarg parent : h5py.Group, Default = None HDF5 group under which to read all available datasets. By default, all datasets within the HDF5 file are read. Returns ------- sidpy.Dataset or list of sidpy.Dataset objects Datasets present in the provided file """ if parent is None: h5_group = self._h5_file else: if not isinstance(parent, h5py.Group): raise TypeError('parent should be a h5py.Group object') self.__validate_obj_in_same_file(parent) h5_group = parent if recursive: list_of_main = self._main_dsets else: list_of_main = [] for key in h5_group: if isinstance(h5_group[key], h5py.Dataset): if check_if_main(h5_group[key]): list_of_main.append(h5_group[key]) # Go through each of the identified list_of_datasets = [] for dset in list_of_main: list_of_datasets.append(read_h5py_dataset(dset)) return list_of_datasets