From 18be7018f7d797293f58d1383760949175343b81 Mon Sep 17 00:00:00 2001
From: Bjoern Ludwig <bjoern.ludwig@ptb.de>
Date: Mon, 16 Jan 2023 18:22:24 +0100
Subject: [PATCH] docs(notebook): adapt read_dataset.ipynb to new
 implementation without manual cache location spec

---
 .../examples/read_dataset.ipynb               | 167 +-----------------
 1 file changed, 4 insertions(+), 163 deletions(-)

diff --git a/src/zema_emc_annotated/examples/read_dataset.ipynb b/src/zema_emc_annotated/examples/read_dataset.ipynb
index 8561a47..847a21f 100644
--- a/src/zema_emc_annotated/examples/read_dataset.ipynb
+++ b/src/zema_emc_annotated/examples/read_dataset.ipynb
@@ -9,44 +9,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "pycharm": {
-     "is_executing": true
-    }
-   },
+   "execution_count": 13,
+   "metadata": {},
    "outputs": [],
    "source": [
     "import json\n",
-    "import os\n",
-    "from os.path import dirname\n",
-    "from pathlib import Path\n",
     "\n",
     "import h5py\n",
-    "import numpy as np\n",
-    "from h5py import Dataset, File, Group\n",
-    "from numpy import ndarray\n",
     "from pooch import retrieve"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
     "def local_path_to_dataset_after_download_if_required():\n",
-    "    LOCAL_ZEMA_DATASET_PATH = Path(os.path.abspath(\"\")).parent.joinpath(\"datasets\")\n",
-    "    ZEMA_DATASET_HASH = (\n",
-    "        \"sha256:fb0e80de4e8928ae8b859ad9668a1b6ea6310028a6690bb8d4c1abee31cb8833\"\n",
-    "    )\n",
     "    ZEMA_DATASET_URL = (\n",
     "        \"https://zenodo.org/record/5185953/files/axis11_2kHz_ZeMA_PTB_SI.h5\"\n",
     "    )\n",
     "    return retrieve(\n",
     "        url=ZEMA_DATASET_URL,\n",
-    "        known_hash=ZEMA_DATASET_HASH,\n",
-    "        path=LOCAL_ZEMA_DATASET_PATH,\n",
+    "        known_hash=None,\n",
     "        progressbar=True,\n",
     "    )"
    ]
@@ -140,150 +125,6 @@
     "    print(my_uncertainty)\n",
     "    print(list(h5f[\"PTB_SUU\"]))"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def extract_data(n_samples: int, verbose: bool = False) -> ndarray:\n",
-    "    extracted_data = np.empty((n_samples, 0))\n",
-    "    indices = np.s_[0:5, 0]\n",
-    "    with h5py.File(local_path_to_dataset_after_download_if_required(), \"r\") as h5f:\n",
-    "        daq_identifier = \"ZeMA_DAQ\"\n",
-    "        if verbose:\n",
-    "            print(\n",
-    "                f\"\\nShow data for sensor set {daq_identifier}:\\n{'-'*(26 + len(daq_identifier))}\"\n",
-    "            )\n",
-    "        for quantity in conditional_first_level_element(h5f, daq_identifier):\n",
-    "            if verbose:\n",
-    "                print(\n",
-    "                    f\"\\n    Show data for quantity {quantity}:\\n    {'-'*(24 + len(quantity))}\"\n",
-    "                )\n",
-    "            for dataset in hdf5_part(h5f, (daq_identifier, quantity)):\n",
-    "                if verbose:\n",
-    "                    print(f\"    {hdf5_part(h5f, (daq_identifier, quantity, dataset))}\")\n",
-    "                if (\n",
-    "                    \"qudt:standardUncertainty\"\n",
-    "                    in hdf5_part(h5f, (daq_identifier, quantity, dataset)).name\n",
-    "                ):\n",
-    "                    if (\n",
-    "                        len(hdf5_part(h5f, (daq_identifier, quantity, dataset)).shape)\n",
-    "                        == 3\n",
-    "                    ):\n",
-    "                        for sensor in hdf5_part(\n",
-    "                            h5f, (daq_identifier, quantity, dataset)\n",
-    "                        ):\n",
-    "                            extracted_data = append_to_extraction(\n",
-    "                                extracted_data,\n",
-    "                                extract_sample_from_dataset(sensor, indices),\n",
-    "                            )\n",
-    "                    else:\n",
-    "                        extracted_data = append_to_extraction(\n",
-    "                            extracted_data,\n",
-    "                            extract_sample_from_dataset(\n",
-    "                                hdf5_part(h5f, (daq_identifier, quantity, dataset)),\n",
-    "                                indices,\n",
-    "                            ),\n",
-    "                        )\n",
-    "    return extracted_data\n",
-    "\n",
-    "\n",
-    "def conditional_first_level_element(hdf5_file: File, identifier: str) -> Group:\n",
-    "    for sensor_set_descriptor in hdf5_file:\n",
-    "        if identifier in sensor_set_descriptor:\n",
-    "            return hdf5_file[sensor_set_descriptor]\n",
-    "\n",
-    "\n",
-    "def hdf5_part(hdf5_file: File, keys: tuple[str, ...]) -> Group | Dataset:\n",
-    "    part = hdf5_file\n",
-    "    for key in keys:\n",
-    "        part = part[key]\n",
-    "    return part\n",
-    "\n",
-    "\n",
-    "def extract_sample_from_dataset(\n",
-    "    data_set: Dataset, ns_samples: tuple[int | slice]\n",
-    ") -> ndarray[float]:\n",
-    "    return np.expand_dims(np.array(data_set[ns_samples]), 1)\n",
-    "\n",
-    "\n",
-    "def append_to_extraction(append_to: ndarray, appendix: ndarray) -> ndarray:\n",
-    "    return np.append(append_to, appendix, axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Show data for sensor set ZeMA_DAQ:\n",
-      "----------------------------------\n",
-      "\n",
-      "    Show data for quantity Acceleration:\n",
-      "    ------------------------------------\n",
-      "    <HDF5 dataset \"qudt:standardUncertainty\": shape (3, 2000, 4766), type \"<f8\">\n",
-      "    <HDF5 dataset \"qudt:value\": shape (3, 2000, 4766), type \"<f8\">\n",
-      "\n",
-      "    Show data for quantity Active_Current:\n",
-      "    --------------------------------------\n",
-      "    <HDF5 dataset \"qudt:standardUncertainty\": shape (2000, 4766), type \"<f8\">\n",
-      "    <HDF5 dataset \"qudt:value\": shape (2000, 4766), type \"<f8\">\n",
-      "\n",
-      "    Show data for quantity Force:\n",
-      "    -----------------------------\n",
-      "    <HDF5 dataset \"qudt:standardUncertainty\": shape (2000, 4766), type \"<f8\">\n",
-      "    <HDF5 dataset \"qudt:value\": shape (2000, 4766), type \"<f8\">\n",
-      "\n",
-      "    Show data for quantity Motor_Current:\n",
-      "    -------------------------------------\n",
-      "    <HDF5 dataset \"qudt:standardUncertainty\": shape (3, 2000, 4766), type \"<f8\">\n",
-      "    <HDF5 dataset \"qudt:value\": shape (3, 2000, 4766), type \"<f8\">\n",
-      "\n",
-      "    Show data for quantity Pressure:\n",
-      "    --------------------------------\n",
-      "    <HDF5 dataset \"qudt:standardUncertainty\": shape (2000, 4766), type \"<f8\">\n",
-      "    <HDF5 dataset \"qudt:value\": shape (2000, 4766), type \"<f8\">\n",
-      "\n",
-      "    Show data for quantity Sound_Pressure:\n",
-      "    --------------------------------------\n",
-      "    <HDF5 dataset \"qudt:standardUncertainty\": shape (2000, 4766), type \"<f8\">\n",
-      "    <HDF5 dataset \"qudt:value\": shape (2000, 4766), type \"<f8\">\n",
-      "\n",
-      "    Show data for quantity Velocity:\n",
-      "    --------------------------------\n",
-      "    <HDF5 dataset \"qudt:standardUncertainty\": shape (2000, 4766), type \"<f8\">\n",
-      "    <HDF5 dataset \"qudt:value\": shape (2000, 4766), type \"<f8\">\n",
-      "[[2.83190307e+00 2.83190307e+00 2.83190307e+00 1.64743668e-02\n",
-      "  1.24365050e-02 1.16511079e-02 2.13708300e-02 3.66123419e-02\n",
-      "  1.68325082e+04 2.78848019e-05 1.20545254e+00]\n",
-      " [2.83190307e+00 2.83190307e+00 2.83190307e+00 3.01910282e-02\n",
-      "  1.24365050e-02 5.74690879e-02 1.12427249e-02 6.80918703e-02\n",
-      "  1.68325082e+04 2.78848019e-05 2.17917358e+00]\n",
-      " [2.83190307e+00 2.83190307e+00 2.83190307e+00 2.61650718e-02\n",
-      "  1.24365050e-02 6.31271288e-02 4.34207110e-02 1.82414959e-02\n",
-      "  1.68325082e+04 2.78848019e-05 1.92350168e+00]\n",
-      " [2.83190307e+00 2.83190307e+00 2.83190307e+00 2.82367380e-02\n",
-      "  1.24365050e-02 3.59956144e-02 5.92208475e-02 2.20117766e-02\n",
-      "  1.68325082e+04 2.78848019e-05 2.06234912e+00]\n",
-      " [2.83190307e+00 2.83190307e+00 2.83190307e+00 2.92119176e-02\n",
-      "  1.24365050e-02 8.26010663e-03 4.94686133e-02 5.76812843e-02\n",
-      "  1.68325082e+04 2.78848019e-05 1.97239442e+00]]\n",
-      "(5, 11)\n"
-     ]
-    }
-   ],
-   "source": [
-    "uncertainties = extract_data(1, verbose=True)\n",
-    "print(uncertainties)\n",
-    "print(uncertainties.shape)"
-   ]
   }
  ],
  "metadata": {
-- 
GitLab