diff --git a/src/zema_emc_annotated/data_types.py b/src/zema_emc_annotated/data_types.py
index 97288b36e967aecd8257b8a7aabef6dba4feac80..a529d8c55e194674a62c6a117f484ad112f186f5 100644
--- a/src/zema_emc_annotated/data_types.py
+++ b/src/zema_emc_annotated/data_types.py
@@ -1,6 +1,6 @@
 """This module contains type aliases and data types for type hints"""
 
-__all__ = ["RealMatrix", "RealVector", "UncertainArray"]
+__all__ = ["RealMatrix", "RealVector", "SampleSize", "UncertainArray"]
 
 from typing import NamedTuple, TypeAlias
 
@@ -20,3 +20,25 @@ class UncertainArray(NamedTuple):
     """the corresponding values"""
     uncertainties: RealMatrix | RealVector
     """... and their associated uncertainties"""
+
+
+class SampleSize(NamedTuple):
+    """A tuple to specify the size of the extracted data"""
+
+    idx_first_cycle: int = 0
+    """index of first sample to be extracted
+
+    defaults to 0 and must be between 0 and 4765
+    """
+    n_cycles: int = 1
+    """number of cycles extracted from the dataset
+
+    each cycle contains the first :attr:`datapoints_per_cycle` readings from each of
+    the eleven sensors for one of the cycles with associated standard uncertainties,
+    defaults to 1 and must be between 1 and 4766 - :attr:`idx_first_cycle`"""
+    datapoints_per_cycle: int = 1
+    """number of sensor readings from each of the individual sensors per sample/cycle
+
+    defaults to 1 and should be between 1 and 2000, as there are only 2000 readings
+    per cycle, higher values will be clipped to 2000
+    """