{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Write/read SIDpy Dataset via pyNSID\n", "\n", "*Author: Maxim Ziatdinov*\n", "\n", "*Date: September 2020*\n", "\n", "update: \n", "- *Gerd Duscher 01/2021 (compatibility to pyNSID version 0.0.2)*" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A fast introduction into how to write SIDpy datasets to NSID formatted HDF5 files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Start with standard imports:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Ensure python 3 compatibility:\n", "from __future__ import (absolute_import, division, print_function,\n", " unicode_literals)\n", "\n", "import warnings\n", "\n", "import h5py\n", "import matplotlib.pylab as plt\n", "import numpy as np\n", "\n", "# we will also need a sidpy package\n", "try:\n", " import sidpy\n", "except ModuleNotFoundError:\n", " !pip3 install sidpy\n", " import sidpy\n", "\n", "import pyNSID\n", "\n", "warnings.filterwarnings(\"ignore\", module=\"numpy.core.fromnumeric\")\n", "warnings.filterwarnings(\"ignore\", module=\"pyNSID.io.nsi_reader\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Creating sidpy.Dataset object(s)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's create a simple sidpy Dataset from a numpy array:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Array Chunk
Bytes 1.60 kB 1.60 kB
Shape (4, 5, 10) (4, 5, 10)
Count 1 Tasks 1 Chunks
Type float64 numpy.ndarray
\n", "
\n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " 10\n", " 5\n", " 4\n", "\n", "
" ], "text/plain": [ "sidpy.Dataset of type UNKNOWN with:\n", " dask.array\n", " data contains: generic (generic)\n", " and Dimensions: \n", "a: generic (generic) of size (4,)\n", "b: generic (generic) of size (5,)\n", "c: generic (generic) of size (10,)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset = sidpy.Dataset.from_array(np.random.random([4, 5, 10]), name='new')\n", "dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's also define the dataset attributes..." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "dataset.data_type = 'SPECTRAL_IMAGE'\n", "dataset.units = 'nA'\n", "dataset.quantity = 'Current'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "... and set individual dimensions. In the case of spectroscopic datasets, the first two dimensions are typically spatial units (e.g. nm) and the third one can be energy (e.g. $meV$ or $nm^{-1}$)." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "dataset.set_dimension(0, sidpy.Dimension(np.arange(dataset.shape[0]), 'x',\n", " units='nm', quantity='Length',\n", " dimension_type='spatial'))\n", "dataset.set_dimension(1, sidpy.Dimension(np.linspace(-2, 2, num=dataset.shape[1], endpoint=True), 'y', \n", " units='nm', quantity='Length',\n", " dimension_type='spatial'))\n", "dataset.set_dimension(2, sidpy.Dimension(np.sin(np.linspace(0, 2 * np.pi, num=dataset.shape[2])), 'bias',\n", " units='mV', quantity='Voltage',\n", " dimension_type='spectral'))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "x: Length (nm) of size (4,)\n", "y: Length (nm) of size (5,)\n", "bias: Voltage (mV) of size (10,)\n" ] } ], "source": [ "print(dataset.dim_0)\n", "print(dataset.dim_1)\n", "print(dataset.dim_2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Writing sidpy.Dataset object(s) to HDF5 files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load NSID-formatted h5 file:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "hf = h5py.File(\"test.hf5\", 'a')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's create a new channel where we are going to save our sidpy dataset:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Channel_000\n" ] } ], "source": [ "hf.create_group('Measurement_000/Channel_000')\n", "print(*hf[\"Measurement_000\"].keys())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now let's write our sidpy dataset into the newly created channel:" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\gduscher\\Anaconda3\\lib\\site-packages\\pyNSID\\io\\hdf_utils.py:351: FutureWarning: validate_h5_dimension may be removed in a future version\n", " warn('validate_h5_dimension may be removed in a future version',\n" ] } ], "source": [ "pyNSID.hdf_io.write_nsid_dataset(dataset, hf['Measurement_000/Channel_000'], main_data_name=\"new_spectrum\");" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Close h5 file:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "hf.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reading sidpy.Dataset object(s)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load back the file:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Channel_000\n" ] } ], "source": [ "hf = h5py.File(\"test.hf5\", 'r+')\n", "print(*hf[\"Measurement_000\"].keys())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Find our dataset:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset_hdf5 = pyNSID.io.hdf_utils.find_dataset(hf,'new_spectrum')[0]\n", "dataset_hdf5\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "hf.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read the dataset stored in HDF5 format as a sidpy object (Dataset) using NSIDReader:" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "dr = pyNSID.NSIDReader('test.hf5')\n", "dataset_sid = dr.read()[0]\n", "assert isinstance(dataset_sid, sidpy.Dataset)\n", "dataset_sid.plot()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "sidpy.Dataset of type SPECTRAL_IMAGE with:\n", " dask.array\n", " data contains: Current (nA)\n", " and Dimensions: \n", "x: Length (nm) of size (4,)\n", "y: Length (nm) of size (5,)\n", "bias: Voltage (mV) of size (10,)\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Array Chunk
Bytes 1.60 kB 1.60 kB
Shape (4, 5, 10) (4, 5, 10)
Count 1 Tasks 1 Chunks
Type float64 numpy.ndarray
\n", "
\n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " 10\n", " 5\n", " 4\n", "\n", "
" ], "text/plain": [ "sidpy.Dataset of type SPECTRAL_IMAGE with:\n", " dask.array\n", " data contains: Current (nA)\n", " and Dimensions: \n", "x: Length (nm) of size (4,)\n", "y: Length (nm) of size (5,)\n", "bias: Voltage (mV) of size (10,)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(dataset_sid)\n", "dataset_sid" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Close File" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "dataset_sid.h5_dataset.file.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }