Source code for BGlib.gmode.analysis.giv_bayesian

# -*- coding: utf-8 -*-
"""
:class:`~pycroscopy.analysis.giv_bayesian.GIVBayesian` - A class that performs Bayesian inference to decouple spurious
hysteresis from current-voltage spectroscopy signals in General mode I-V data

Created on Thu Nov 02 11:48:53 2017

@author: Suhas Somnath

"""

from __future__ import division, print_function, absolute_import, unicode_literals

import numpy as np
from pyUSID.processing.comp_utils import parallel_compute
from sidpy.hdf.dtype_utils import stack_real_to_compound
from sidpy.hdf.hdf_utils import write_simple_attrs, print_tree, get_attributes
from pyUSID import Dimension
from pyUSID.processing.process import Process
from pyUSID.io.hdf_utils import write_main_dataset, create_results_group, \
    create_empty_dataset
from pyUSID import USIDataset
from .utils.giv_utils import do_bayesian_inference, bayesian_inference_on_period

cap_dtype = np.dtype({'names': ['Forward', 'Reverse'],
                      'formats': [np.float32, np.float32]})
# TODO : Take lesser used bayesian inference params from kwargs if provided


[docs] class GIVBayesian(Process): """ A class that performs Bayesian inference to decouple spurious hysteresis signals from current-voltage spectroscopy signals in General mode I-V data """
[docs] def __init__(self, h5_main, ex_freq, gain, num_x_steps=250, r_extra=110, **kwargs): """ Applies Bayesian Inference to General Mode IV (G-IV) data to extract the true current Parameters ---------- h5_main : h5py.Dataset object Dataset to process ex_freq : float Frequency of the excitation waveform gain : uint Gain setting on current amplifier (typically 7-9) num_x_steps : uint (Optional, default = 250) Number of steps for the inferred results. Note: this may be end up being slightly different from specified. r_extra : float (Optional, default = 110 [Ohms]) Extra resistance in the RC circuit that will provide correct current and resistance values h5_target_group : h5py.Group, optional. Default = None Location where to look for existing results and to place newly computed results. Use this kwarg if the results need to be written to a different HDF5 file. By default, this value is set to the parent group containing `h5_main` kwargs : dict Other parameters specific to the Process class and nuanced bayesian_inference parameters """ super(GIVBayesian, self).__init__(h5_main, 'Bayesian_Inference', **kwargs) self.gain = gain self.ex_freq = ex_freq self.r_extra = r_extra self.num_x_steps = int(num_x_steps) if self.num_x_steps % 4 == 0: self.num_x_steps = ((self.num_x_steps // 2) + 1) * 2 if self.verbose and self.mpi_rank == 0: print('ensuring that half steps should be odd, num_x_steps is now', self.num_x_steps) self.h5_main = USIDataset(self.h5_main) # take these from kwargs bayesian_parms = {'gam': 0.03, 'e': 10.0, 'sigma': 10.0, 'sigmaC': 1.0, 'num_samples': 2E3} self.parms_dict = {'freq': self.ex_freq, 'num_x_steps': self.num_x_steps, 'r_extra': self.r_extra} self.parms_dict.update(bayesian_parms) self.duplicate_h5_groups, self.partial_h5_groups = self._check_for_duplicates() # Should not be extracting excitation this way! h5_spec_vals = self.h5_main.h5_spec_vals[0] self.single_ao = np.squeeze(h5_spec_vals[()]) roll_cyc_fract = -0.25 self.roll_pts = int(self.single_ao.size * roll_cyc_fract) self.rolled_bias = np.roll(self.single_ao, self.roll_pts) dt = 1 / (ex_freq * self.single_ao.size) self.dvdt = np.diff(self.single_ao) / dt self.dvdt = np.append(self.dvdt, self.dvdt[-1]) self.reverse_results = None self.forward_results = None self._bayes_parms = None self.__first_batch = True
[docs] def test(self, pix_ind=None, show_plots=True): """ Tests the inference on a single pixel (randomly chosen unless manually specified) worth of data. Parameters ---------- pix_ind : int, optional. default = random Index of the pixel whose data will be used for inference show_plots : bool, optional. default = True Whether or not to show plots Returns ------- fig, axes """ if self.mpi_rank > 0: return if pix_ind is None: pix_ind = np.random.randint(0, high=self.h5_main.shape[0]) other_params = self.parms_dict.copy() # removing duplicates: _ = other_params.pop('freq') return bayesian_inference_on_period(self.h5_main[pix_ind], self.single_ao, self.parms_dict['freq'], show_plots=show_plots, **other_params)
def _set_memory_and_cores(self, cores=None, man_mem_limit=None, mem_multiplier=1.0): """ Checks hardware limitations such as memory, # cpus and sets the recommended datachunk sizes and the number of cores to be used by analysis methods. Parameters ---------- cores : uint, optional Default - 1 How many cores to use for the computation mem : uint, optional Default - 1024 The amount a memory in Mb to use in the computation """ super(GIVBayesian, self)._set_memory_and_cores(cores=cores, mem_multiplier=mem_multiplier, man_mem_limit=man_mem_limit) # Remember that the default number of pixels corresponds to only the raw data that can be held in memory # In the case of simplified Bayesian inference, four (roughly) equally sized datasets need to be held in memory: # raw, compensated current, resistance, variance self._max_pos_per_read = self._max_pos_per_read // 4 # Integer division # Since these computations take far longer than functional fitting, do in smaller batches: self._max_pos_per_read = min(1000, self._max_pos_per_read) if self.verbose and self.mpi_rank == 0: print('Max positions per read set to {}'.format(self._max_pos_per_read)) def _create_results_datasets(self): """ Creates hdf5 datasets and datagroups to hold the resutls """ # create all h5 datasets here: num_pos = self.h5_main.shape[0] if self.verbose and self.mpi_rank == 0: print('Now creating the datasets') self.h5_results_grp = create_results_group(self.h5_main, self.process_name, h5_parent_group=self._h5_target_group) write_simple_attrs(self.h5_results_grp, {'algorithm_author': 'Kody J. Law', 'last_pixel': 0}) write_simple_attrs(self.h5_results_grp, self.parms_dict) if self.verbose and self.mpi_rank == 0: print('created group: {} with attributes:'.format(self.h5_results_grp.name)) print(get_attributes(self.h5_results_grp)) # One of those rare instances when the result is exactly the same as the source self.h5_i_corrected = create_empty_dataset(self.h5_main, np.float32, 'Corrected_Current', h5_group=self.h5_results_grp) if self.verbose and self.mpi_rank == 0: print('Created I Corrected') # print_tree(self.h5_results_grp) # For some reason, we cannot specify chunks or compression! # The resistance dataset requires the creation of a new spectroscopic dimension self.h5_resistance = write_main_dataset(self.h5_results_grp, (num_pos, self.num_x_steps), 'Resistance', 'Resistance', 'GOhms', None, Dimension('Bias', 'V', self.num_x_steps), dtype=np.float32, # chunks=(1, self.num_x_steps), #compression='gzip', h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals) if self.verbose and self.mpi_rank == 0: print('Created Resistance') # print_tree(self.h5_results_grp) assert isinstance(self.h5_resistance, USIDataset) # only here for PyCharm self.h5_new_spec_vals = self.h5_resistance.h5_spec_vals # The variance is identical to the resistance dataset self.h5_variance = create_empty_dataset(self.h5_resistance, np.float32, 'R_variance') if self.verbose and self.mpi_rank == 0: print('Created Variance') # print_tree(self.h5_results_grp) # The capacitance dataset requires new spectroscopic dimensions as well self.h5_cap = write_main_dataset(self.h5_results_grp, (num_pos, 1), 'Capacitance', 'Capacitance', 'pF', None, Dimension('Direction', '', [1]), h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, dtype=cap_dtype, #compression='gzip', aux_spec_prefix='Cap_Spec_') if self.verbose and self.mpi_rank == 0: print('Created Capacitance') # print_tree(self.h5_results_grp) print('Done creating all results datasets!') if self.mpi_size > 1: self.mpi_comm.Barrier() self.h5_main.file.flush() def _get_existing_datasets(self): """ Extracts references to the existing datasets that hold the results """ self.h5_new_spec_vals = self.h5_results_grp['Spectroscopic_Values'] self.h5_cap = self.h5_results_grp['Capacitance'] self.h5_variance = self.h5_results_grp['R_variance'] self.h5_resistance = self.h5_results_grp['Resistance'] self.h5_i_corrected = self.h5_results_grp['Corrected_Current'] def _write_results_chunk(self): """ Writes data chunks back to the h5 file """ if self.verbose: print('Rank {} - Started accumulating results for this chunk'.format(self.mpi_rank)) num_pixels = len(self.forward_results) cap_mat = np.zeros((num_pixels, 2), dtype=np.float32) r_inf_mat = np.zeros((num_pixels, self.num_x_steps), dtype=np.float32) r_var_mat = np.zeros((num_pixels, self.num_x_steps), dtype=np.float32) i_cor_sin_mat = np.zeros((num_pixels, self.single_ao.size), dtype=np.float32) for pix_ind, i_meas, forw_results, rev_results in zip(range(num_pixels), self.data, self.forward_results, self.reverse_results): full_results = dict() for item in ['cValue']: full_results[item] = np.hstack((forw_results[item], rev_results[item])) # print(item, full_results[item].shape) # Capacitance is always doubled - halve it now (locally): # full_results['cValue'] *= 0.5 cap_val = np.mean(full_results['cValue']) * 0.5 # Compensating the resistance.. """ omega = 2 * np.pi * self.ex_freq i_cap = cap_val * omega * self.rolled_bias """ i_cap = cap_val * self.dvdt i_extra = self.r_extra * 2 * cap_val * self.single_ao i_corr_sine = i_meas - i_cap - i_extra # Equivalent to flipping the X: rev_results['x'] *= -1 # Stacking the results - no flipping required for reverse: for item in ['x', 'mR', 'vR']: full_results[item] = np.hstack((forw_results[item], rev_results[item])) i_cor_sin_mat[pix_ind] = i_corr_sine cap_mat[pix_ind] = full_results['cValue'] * 1000 # convert from nF to pF r_inf_mat[pix_ind] = full_results['mR'] r_var_mat[pix_ind] = full_results['vR'] # Now write to h5 files: if self.verbose: print('Rank {} - Finished accumulating results. Writing results of chunk to h5'.format(self.mpi_rank)) if self.__first_batch: self.h5_new_spec_vals[0, :] = full_results['x'] # Technically this needs to only be done once self.__first_batch = False # Get access to the private variable: pos_in_batch = self._get_pixels_in_current_batch() self.h5_cap[pos_in_batch, :] = np.atleast_2d(stack_real_to_compound(cap_mat, cap_dtype)).T self.h5_variance[pos_in_batch, :] = r_var_mat self.h5_resistance[pos_in_batch, :] = r_inf_mat self.h5_i_corrected[pos_in_batch, :] = i_cor_sin_mat def _unit_computation(self, *args, **kwargs): """ Processing per chunk of the dataset Parameters ---------- args : list Not used kwargs : dictionary Not used """ half_v_steps = self.single_ao.size // 2 # first roll the data rolled_raw_data = np.roll(self.data, self.roll_pts, axis=1) # Ensure that the bias has a positive slope. Multiply current by -1 accordingly if self.verbose: print('Rank {} beginning parallel compute for Forward'.format(self.mpi_rank)) self.reverse_results = parallel_compute(rolled_raw_data[:, :half_v_steps] * -1, do_bayesian_inference, cores=self._cores, func_args=[self.rolled_bias[:half_v_steps] * -1, self.ex_freq], func_kwargs=self._bayes_parms, lengthy_computation=False, verbose=self.verbose) if self.verbose: print('Rank {} finished processing forward sections. Now working on reverse sections'.format(self.mpi_rank)) self.forward_results = parallel_compute(rolled_raw_data[:, half_v_steps:], do_bayesian_inference, cores=self._cores, func_args=[self.rolled_bias[half_v_steps:], self.ex_freq], func_kwargs=self._bayes_parms, lengthy_computation=False, verbose=self.verbose) if self.verbose: print('Rank {} Finished processing reverse loops (and this chunk)'.format(self.mpi_rank))
[docs] def compute(self, override=False, *args, **kwargs): """ Creates placeholders for the results, applies the inference to the data, and writes the output to the file. Consider calling test() before this function to make sure that the parameters are appropriate. Parameters ---------- override : bool, optional. default = False By default, compute will simply return duplicate results to avoid recomputing or resume computation on a group with partial results. Set to True to force fresh computation. args : list Not used kwargs : dictionary Not used Returns ------- h5_results_grp : h5py.Datagroup object Datagroup containing all the results """ # remove additional parm and halve the x points self._bayes_parms = self.parms_dict.copy() self._bayes_parms['num_x_steps'] = self.num_x_steps // 2 self._bayes_parms['econ'] = True del(self._bayes_parms['freq']) return super(GIVBayesian, self).compute(override=override, *args, **kwargs)