Skip to content
Snippets Groups Projects
Verified Commit 438bf321 authored by Björn Ludwig's avatar Björn Ludwig
Browse files

refactor(read_dataset): blacken

parent e55f5cdb
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
# Read ZeMA dataset and preprocess data
%% Cell type:code id: tags:
``` python
import json
import os
from os.path import dirname
from pathlib import Path
import h5py
import numpy as np
from h5py import Dataset, File, Group
from numpy import ndarray
from pooch import retrieve
```
%% Cell type:code id: tags:
``` python
def local_path_to_dataset_after_download_if_required():
LOCAL_ZEMA_DATASET_PATH = Path(os.path.abspath('')).parent.joinpath("datasets")
LOCAL_ZEMA_DATASET_PATH = Path(os.path.abspath("")).parent.joinpath("datasets")
ZEMA_DATASET_HASH = (
"sha256:fb0e80de4e8928ae8b859ad9668a1b6ea6310028a6690bb8d4c1abee31cb8833"
)
ZEMA_DATASET_URL = (
"https://zenodo.org/record/5185953/files/axis11_2kHz_ZeMA_PTB_SI.h5"
)
return retrieve(
url=ZEMA_DATASET_URL,
known_hash=ZEMA_DATASET_HASH,
path=LOCAL_ZEMA_DATASET_PATH,
progressbar=True,
)
```
%% Cell type:code id: tags:
``` python
def print_attrs(h5py_dataset_or_group):
for key in h5py_dataset_or_group.attrs:
print(key)
val = json.loads(h5py_dataset_or_group.attrs[key])
if isinstance(val, dict):
for subkey, subval in val.items():
print(f" {subkey} : {subval}")
else:
print(f" {val}")
```
%% Cell type:code id: tags:
``` python
with h5py.File(local_path_to_dataset_after_download_if_required(), "r") as h5f:
print_attrs(h5f)
```
%% Output
Experiment
date : 2021-03-29/2021-04-15
DUT : Festo ESBF cylinder
identifier : axis11
label : Electromechanical cylinder no. 11
Person
dc:author : ['Tanja Dorst', 'Maximilian Gruber', 'Anupam Prasad Vedurmudi']
e-mail : ['t.dorst@zema.de', 'maximilian.gruber@ptb.de', 'anupam.vedurmudi@ptb.de']
affiliation : ['ZeMA gGmbH', 'Physikalisch-Technische Bundesanstalt', 'Physikalisch-Technische Bundesanstalt']
Project
fullTitle : Metrology for the Factory of the Future
acronym : Met4FoF
websiteLink : www.met4fof.eu
fundingSource : European Commission (EC)
fundingAdministrator : EURAMET
funding programme : EMPIR
fundingNumber : 17IND12
acknowledgementText : This work has received funding within the project 17IND12 Met4FoF from the EMPIR program co-financed by the Participating States and from the European Union's Horizon 2020 research and innovation program. The authors want to thank Clifford Brown, Daniel Hutzschenreuter, Holger Israel, Giacomo Lanza, Björn Ludwig, and Julia Neumann fromPhysikalisch-Technische Bundesanstalt (PTB) for their helpful suggestions and support.
Publication
dc:identifier : 10.5281/zenodo.5185953
dc:license : Creative Commons Attribution 4.0 International (CC-BY-4.0)
dc:title : Sensor data set of one electromechanical cylinder at ZeMA testbed (ZeMA DAQ and Smart-Up Unit)
dc:description : The data set was generated with two different measurement systems at the ZeMA testbed. The ZeMA DAQ unit consists of 11 sensors and the SmartUp-Unit has 13 differentsignals. A typical working cycle lasts 2.8s and consists of a forward stroke, a waiting time and a return stroke of the electromechanical cylinder. The data set does not consist of the entire working cycles. Only one second of the return stroke of every 100rd working cycle is included. The dataset consists of 4776 cycles. One row represents one second of the return stroke of one working cycle.
dc:subject : ['dynamic measurement', 'measurement uncertainty', 'sensor network', 'digital sensors', 'MEMS', 'machine learning', 'European Union (EU)', 'Horizon 2020', 'EMPIR']
dc:SizeOrDuration : 24 sensors, 4776 cycles and 2000 datapoints each
dc:type : Dataset
dc:issued : 2021-09-10
dc:bibliographicCitation : T. Dorst, M. Gruber and A. P. Vedurmudi : Sensor data set of one electromechanical cylinder at ZeMA testbed (ZeMA DAQ and Smart-Up Unit), Zenodo [data set], https://doi.org/10.5281/zenodo.5185953, 2021.
%% Cell type:code id: tags:
``` python
with h5py.File(local_path_to_dataset_after_download_if_required(), "r") as h5f:
my_uncertainty = h5f["PTB_SUU"]["MPU_9250"]["Acceleration"][
"qudt:standardUncertainty"
]
print("qudt:standardUncertainty" in my_uncertainty.name)
print_attrs(my_uncertainty)
print(my_uncertainty)
print(list(h5f["PTB_SUU"]))
```
%% Output
True
si:label
['X acceleration uncertainty', 'Y acceleration uncertainty', 'Z acceleration uncertainty']
<HDF5 dataset "qudt:standardUncertainty": shape (3, 1000, 4766), type "<f8">
['BMA_280', 'MPU_9250']
%% Cell type:code id: tags:
``` python
def extract_data(n_samples: int, verbose: bool = False) -> ndarray:
extracted_data = np.empty((n_samples, 0))
indices = np.s_[0:5, 0]
with h5py.File(local_path_to_dataset_after_download_if_required(), "r") as h5f:
daq_identifier = "ZeMA_DAQ"
if verbose:
print(
f"\nShow data for sensor set {daq_identifier}:\n{'-'*(26 + len(daq_identifier))}"
)
for quantity in conditional_first_level_element(h5f, daq_identifier):
if verbose:
print(
f"\n Show data for quantity {quantity}:\n {'-'*(24 + len(quantity))}"
)
for dataset in hdf5_part(h5f, (daq_identifier, quantity)):
if verbose:
print(f" {hdf5_part(h5f, (daq_identifier, quantity, dataset))}")
if (
"qudt:standardUncertainty"
in hdf5_part(h5f, (daq_identifier, quantity, dataset)).name
):
if (
len(hdf5_part(h5f, (daq_identifier, quantity, dataset)).shape)
== 3
):
for sensor in hdf5_part(
h5f, (daq_identifier, quantity, dataset)
):
extracted_data = append_to_extraction(
extracted_data,
extract_sample_from_dataset(sensor, indices),
)
else:
extracted_data = append_to_extraction(
extracted_data,
extract_sample_from_dataset(
hdf5_part(h5f, (daq_identifier, quantity, dataset)),
indices,
),
)
return extracted_data
def conditional_first_level_element(hdf5_file: File, identifier: str) -> Group:
for sensor_set_descriptor in hdf5_file:
if identifier in sensor_set_descriptor:
return hdf5_file[sensor_set_descriptor]
def hdf5_part(hdf5_file: File, keys: tuple[str, ...]) -> Group | Dataset:
part = hdf5_file
for key in keys:
part = part[key]
return part
def extract_sample_from_dataset(
data_set: Dataset, ns_samples: tuple[int | slice]
) -> ndarray[float]:
return np.expand_dims(np.array(data_set[ns_samples]), 1)
def append_to_extraction(append_to: ndarray, appendix: ndarray) -> ndarray:
return np.append(append_to, appendix, axis=1)
```
%% Cell type:code id: tags:
``` python
uncertainties = extract_data(1, verbose=True)
print(uncertainties)
print(uncertainties.shape)
```
%% Output
Show data for sensor set ZeMA_DAQ:
----------------------------------
Show data for quantity Acceleration:
------------------------------------
<HDF5 dataset "qudt:standardUncertainty": shape (3, 2000, 4766), type "<f8">
<HDF5 dataset "qudt:value": shape (3, 2000, 4766), type "<f8">
Show data for quantity Active_Current:
--------------------------------------
<HDF5 dataset "qudt:standardUncertainty": shape (2000, 4766), type "<f8">
<HDF5 dataset "qudt:value": shape (2000, 4766), type "<f8">
Show data for quantity Force:
-----------------------------
<HDF5 dataset "qudt:standardUncertainty": shape (2000, 4766), type "<f8">
<HDF5 dataset "qudt:value": shape (2000, 4766), type "<f8">
Show data for quantity Motor_Current:
-------------------------------------
<HDF5 dataset "qudt:standardUncertainty": shape (3, 2000, 4766), type "<f8">
<HDF5 dataset "qudt:value": shape (3, 2000, 4766), type "<f8">
Show data for quantity Pressure:
--------------------------------
<HDF5 dataset "qudt:standardUncertainty": shape (2000, 4766), type "<f8">
<HDF5 dataset "qudt:value": shape (2000, 4766), type "<f8">
Show data for quantity Sound_Pressure:
--------------------------------------
<HDF5 dataset "qudt:standardUncertainty": shape (2000, 4766), type "<f8">
<HDF5 dataset "qudt:value": shape (2000, 4766), type "<f8">
Show data for quantity Velocity:
--------------------------------
<HDF5 dataset "qudt:standardUncertainty": shape (2000, 4766), type "<f8">
<HDF5 dataset "qudt:value": shape (2000, 4766), type "<f8">
[[2.83190307e+00 2.83190307e+00 2.83190307e+00 1.64743668e-02
1.24365050e-02 1.16511079e-02 2.13708300e-02 3.66123419e-02
1.68325082e+04 2.78848019e-05 1.20545254e+00]
[2.83190307e+00 2.83190307e+00 2.83190307e+00 3.01910282e-02
1.24365050e-02 5.74690879e-02 1.12427249e-02 6.80918703e-02
1.68325082e+04 2.78848019e-05 2.17917358e+00]
[2.83190307e+00 2.83190307e+00 2.83190307e+00 2.61650718e-02
1.24365050e-02 6.31271288e-02 4.34207110e-02 1.82414959e-02
1.68325082e+04 2.78848019e-05 1.92350168e+00]
[2.83190307e+00 2.83190307e+00 2.83190307e+00 2.82367380e-02
1.24365050e-02 3.59956144e-02 5.92208475e-02 2.20117766e-02
1.68325082e+04 2.78848019e-05 2.06234912e+00]
[2.83190307e+00 2.83190307e+00 2.83190307e+00 2.92119176e-02
1.24365050e-02 8.26010663e-03 4.94686133e-02 5.76812843e-02
1.68325082e+04 2.78848019e-05 1.97239442e+00]]
(5, 11)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment