diff --git a/EIVPackage/EIVData/linear.py b/EIVPackage/EIVData/linear.py
index b1825905acafc69332d1c01418eb7db00e0f28eb..9b69b6b310cf61382e8d9f5204f6ae18a2d1c2fc 100644
--- a/EIVPackage/EIVData/linear.py
+++ b/EIVPackage/EIVData/linear.py
@@ -1,6 +1,6 @@
 import torch
 import sys
-from torch.utils.data import TensorDataset, random_split
+from torch.utils.data import TensorDataset
 
 total_number_of_datapoints = 2000
 input_range = [-1,1]
@@ -31,11 +31,13 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
     :param return_ground_truth: Boolean. If True, the unnoisy ground truth will
     also be returned. Defaults to False.
     :returns: linear_trainset, linear_testset if return_ground_truth is False,
-    else linear_trainset, linear_testset, (true_x, true_y)
+    else linear_trainset, linear_testset,  true_linear_trainset,
+    true_linear_testset. The later two return **four tensors**: The true x,y and
+    their noisy counterparts.
     """
     random_generator = torch.Generator().manual_seed(seed)
     # draw different seeds for noise and splitting
-    seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator)
+    seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator)
     # create new generators from tensor seeds
     create_generator = lambda tensor_seed:\
             torch.Generator().manual_seed(tensor_seed.item())
@@ -55,14 +57,22 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
         true_x = (true_x-normalization_x[0])/normalization_x[1]
         noisy_y = (noisy_y-normalization_y[0])/normalization_y[1]
         true_y = (true_y-normalization_y[0])/normalization_y[1]
-    linear_dataset = TensorDataset(noisy_x, noisy_y)
-    dataset_len = len(linear_dataset)
+    dataset_len = noisy_x.shape[0]
     train_len = int(dataset_len*splitting_part)
     test_len = dataset_len - train_len
-    linear_trainset, linear_testset = random_split(linear_dataset,
-            lengths=[train_len, test_len],
-            generator=create_generator(seeds[3]))
+    true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
+    true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
+    noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
+    noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
+    linear_trainset = TensorDataset(noisy_train_x, noisy_train_y)
+    linear_testset = TensorDataset(noisy_test_x, noisy_test_y)
+    true_linear_trainset = TensorDataset(true_train_x, true_train_y,
+            noisy_train_x, noisy_train_y)
+    true_linear_testset = TensorDataset(true_test_x, true_test_y,
+            noisy_test_x, noisy_test_y)
     if not return_ground_truth:
         return linear_trainset, linear_testset
     else:
-        return linear_trainset, linear_testset, (true_x, true_y)
+        return linear_trainset, linear_testset, true_linear_trainset,\
+            true_linear_testset
+
diff --git a/EIVPackage/EIVData/quadratic.py b/EIVPackage/EIVData/quadratic.py
index 9b817a5445b4790417e4dd07a2cc15d44950eb94..aa4f4605094822116d36e12f64e854c38849a406 100644
--- a/EIVPackage/EIVData/quadratic.py
+++ b/EIVPackage/EIVData/quadratic.py
@@ -1,6 +1,6 @@
 import torch
 import sys
-from torch.utils.data import TensorDataset, random_split
+from torch.utils.data import TensorDataset
 
 total_number_of_datapoints = 2000
 input_range = [-1,1]
@@ -11,7 +11,8 @@ y_noise_strength = 0.1
 
 def get_normalization(*args):
     """
-    Returns the mean and standard deviations (in tuples) of the tensors in *args.
+    Returns the mean and standard deviations (in tuples) of the tensors in
+    *args.
     """
     normalization_collection = []
     for t in args:
@@ -30,12 +31,14 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
     :param normalize: Whether to normalize the data, defaults to True.
     :param return_ground_truth: Boolean. If True, the unnoisy ground truth will
     also be returned. Defaults to False.
-    :returns: linear_trainset, linear_testset if return_ground_truth is False,
-    else linear_trainset, linear_testset, (true_x, true_y)
+    :returns: quadratic_trainset, quadratic_testset if return_ground_truth is False,
+    else quadratic_trainset, quadratic_testset,  true_quadratic_trainset,
+    true_quadratic_testset. The later two return **four tensors**: The true x,y and
+    their noisy counterparts.
     """
     random_generator = torch.Generator().manual_seed(seed)
     # draw different seeds for noise and splitting
-    seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator)
+    seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator)
     # create new generators from tensor seeds
     create_generator = lambda tensor_seed:\
             torch.Generator().manual_seed(tensor_seed.item())
@@ -55,14 +58,22 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
         true_x = (true_x-normalization_x[0])/normalization_x[1]
         noisy_y = (noisy_y-normalization_y[0])/normalization_y[1]
         true_y = (true_y-normalization_y[0])/normalization_y[1]
-    linear_dataset = TensorDataset(noisy_x, noisy_y)
-    dataset_len = len(linear_dataset)
+    dataset_len = noisy_x.shape[0]
     train_len = int(dataset_len*splitting_part)
     test_len = dataset_len - train_len
-    linear_trainset, linear_testset = random_split(linear_dataset,
-            lengths=[train_len, test_len],
-            generator=create_generator(seeds[3]))
+    true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
+    true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
+    noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
+    noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
+    quadratic_trainset = TensorDataset(noisy_train_x, noisy_train_y)
+    quadratic_testset = TensorDataset(noisy_test_x, noisy_test_y)
+    true_quadratic_trainset = TensorDataset(true_train_x, true_train_y,
+            noisy_train_x, noisy_train_y)
+    true_quadratic_testset = TensorDataset(true_test_x, true_test_y,
+            noisy_test_x, noisy_test_y)
     if not return_ground_truth:
-        return linear_trainset, linear_testset
+        return quadratic_trainset, quadratic_testset
     else:
-        return linear_trainset, linear_testset, (true_x, true_y)
+        return quadratic_trainset, quadratic_testset, true_quadratic_trainset,\
+            true_quadratic_testset
+
diff --git a/EIVPackage/EIVGeneral/coverage_metrics.py b/EIVPackage/EIVGeneral/coverage_metrics.py
index 3bafa96fcf3cb72e17ac075dff300309759dd9e9..7515497349f2e9636bd44b9c8955a9937a9e2224 100644
--- a/EIVPackage/EIVGeneral/coverage_metrics.py
+++ b/EIVPackage/EIVGeneral/coverage_metrics.py
@@ -24,7 +24,7 @@ def multivariate_interval_length(dim, q=0.95):
     Returns the half side length of multivariate cube, symmetrically centered
     around 0 such that its measure under a standard normal distribution equals
     `q`.
-    :param dim: A non-negative integer, the dimension.
+    :param dim: A positive integer, the dimension.
     :param q: Float, should be between 0 and 1. Defaults to 0.95.
     """
     # use independence of components to reduce to a univariate quantile
@@ -32,21 +32,24 @@ def multivariate_interval_length(dim, q=0.95):
     return scipy.stats.norm.ppf(univariate_quantile)
 
 
-def epistemic_coverage(not_averaged_predictions,  y, q=0.95, normalize_errors=False):
+def epistemic_coverage(not_averaged_predictions,  y, q=0.95,
+        normalize_errors=False,
+        noisy_y=True):
     """
     Returns the average coverage of `y` by the interval 
-    "prefactor * (predictions + q-Interval)",
-    where 
+    "predictions + prefactor * q-Interval", where 
     - "q-Interval" is the interval of measure `q` under the standard normal, 
-    where
     - "predictions" are the entries of the first component of the tuple
-      `not_averaged_predictions`, 
+      `not_averaged_predictions` averaged over their second dimension.
     - "prefactor either equals the epistemic uncertainty, computed from the
       first component of `not_averaged_predictions`,if
     `normalize_errors` is set to False, or 1 if it is true. 
     The coverage is returned as given by the `y` and as a theoretical_coverage
-    computed from the epistemic uncertainty  and the aleatoric uncertainty
+    computed from the epistemic uncertainty and the aleatoric uncertainty
     (second component of `not_averaged_predictions`).
+    ** Note **: If `noisy_y` is True, the `y` will be treated as the unnoisy
+    ground truth. If it is False (default) it will be treated as noisy. For
+    real data only use the later. 
     :param not_averaged_predictions: A tuple of tensors as in the output of
     `FNNEIV.predict` with `take_average_of_prediction` set to `False`, i.e.:
     the predictions of the neural net not averaged over the first dimension 
@@ -58,43 +61,48 @@ def epistemic_coverage(not_averaged_predictions,  y, q=0.95, normalize_errors=Fa
     :param normalize_errors: If True, the deviations between predictions and
     `y` are normalized by the total uncertainty, computed from the aleatoric
     and epistemic uncertainty and the coverage w.r.t. q-interval is computed.
+    :param noisy_y: Boolean. If True (the default), `y` is treated as noisy and
+    the total uncertainty is considered. If False, `y` is treated as the
+    unnoisy ground truth.
     :returns: numerical_coverage, theoretical_coverage
     """
     out, sigmas = not_averaged_predictions
-    y = y[:,None,...]
-    sigmas = sigmas[:,None,...]
     # add an output axis if necessary
-    if len(y.shape) <= 2:
+    if len(y.shape) <= 1:
         y = y[...,None]
-    if len(sigmas.shape) <= 2:
+    if len(sigmas.shape) <= 1:
         sigmas = sigmas[...,None]
     # squeeze last dimensions into one
-    y = y.view((*y.shape[:2], -1))
-    sigmas = sigmas.view((*sigmas.shape[:2], -1))
+    y = y.view((y.shape[0], -1))
+    sigmas = sigmas.view((sigmas.shape[0], -1))
     out = out.view((*out.shape[:2], -1))
-    # check if dimensions consistent
-    assert y.shape == sigmas.shape
-    assert y.shape[0] == out.shape[0]
-    assert y.shape[2] == out.shape[2]
+    # check if dimensions are consistent
     # compute epistemic uncertainty
-    epis_unc = torch.std(out, dim=1, keepdim=True)
-    # compute total uncertainty
+    epis_unc = torch.std(out, dim=1)
+    out = torch.mean(out, dim=1)
+    assert y.shape == sigmas.shape
+    assert y.shape == out.shape
     assert epis_unc.shape == sigmas.shape
-    total_unc = torch.sqrt(epis_unc**2 + sigmas **2)
+    # compute total uncertainty
+    if noisy_y:
+        total_unc = torch.sqrt(epis_unc**2 + sigmas **2)
+    else:
+        # for unnoisy y, the aleatoric uncertainty is treated as 0
+        total_unc = epis_unc
     # fix interval based on epis_unc
+    out_dim = y.shape[1]
     if not normalize_errors:
-        interval_length = multivariate_interval_length(dim=y.shape[1], q=q) \
+        interval_length = multivariate_interval_length(dim=out_dim, q=q) \
                 * epis_unc
     else:
-        interval_length = multivariate_interval_length(dim=y.shape[1], q=q) 
+        interval_length = multivariate_interval_length(dim=out_dim, q=q) 
     # numerical computation
     errors = out - y
     if normalize_errors:
-        assert errors.shape[0] == total_unc.shape[0]
-        assert errors.shape[2] == total_unc.shape[2]
+        assert errors.shape == total_unc.shape
         errors /= total_unc
     check_if_in_interval = logical_and_along_dimension(
-            torch.abs(errors) <= interval_length, dim=2)
+            torch.abs(errors) <= interval_length, dim=1)
     numerical_coverage = torch.mean(
             check_if_in_interval.to(torch.float32)
             ).cpu().detach().item()
@@ -103,11 +111,10 @@ def epistemic_coverage(not_averaged_predictions,  y, q=0.95, normalize_errors=Fa
         cdf_args = (interval_length/total_unc).detach().cpu().numpy()
         cdf_values = scipy.stats.norm.cdf(cdf_args)
         prob_values = 2*cdf_values -1
-        assert len(cdf_values.shape) == 3
-        assert cdf_values.shape[1] == 1
+        assert len(cdf_values.shape) == 2
         # take product over feature dimension 
         # and average over batch dimension
-        theoretical_coverage = np.mean(np.prod(prob_values, axis=2)).item()
+        theoretical_coverage = np.mean(np.prod(prob_values, axis=1)).item()
     else:
         theoretical_coverage = q
     return numerical_coverage, theoretical_coverage
diff --git a/Experiments/create_tabular.py b/Experiments/create_tabular.py
index bd7f32370c5e625caaa36bd206efe82c6ca9961a..15e4f826baa8bfc61520267190f46f63426d6c32 100644
--- a/Experiments/create_tabular.py
+++ b/Experiments/create_tabular.py
@@ -1,9 +1,10 @@
 import os
 import glob
+import argparse
 import json
 
-metrics_to_display = ['rmse','logdens','bias','coverage_normalized']
-
+metrics_to_display = ['rmse','logdens','bias','true_coverage_numerical']
+show_incomplete = True
 
 list_of_result_files = glob.glob(os.path.join('results','*.json'))
 results = {}
@@ -12,6 +13,16 @@ for filename in list_of_result_files:
     with open(filename,'r') as f:
        results[data] = json.load(f)
 
+def save_readout(dictionary, key):
+    """
+    Returns the value of the `dictionary` for `key`, unless
+    the later doesn't exist, in which case (None,None) is returned.
+    """
+    try:
+        return dictionary[key]
+    except KeyError:
+        return (None,None)
+
 ## header
 header_string = 'DATA                '
 offset = 20
@@ -21,19 +32,25 @@ print(header_string)
 print(offset * '_' + 70 * '_')
 ## results
 for data in results.keys():
-    noneiv_results = [results[data]['noneiv'][metric]
+    noneiv_results = [save_readout(results[data]['noneiv'], metric)
             for metric in metrics_to_display]
     noneiv_row_name  = f'{data} - nonEiV:'
     noneiv_results_string = noneiv_row_name + (offset - len(noneiv_row_name)) * ' '
     for [metric_mean, metric_std] in noneiv_results:
-        noneiv_results_string += f'  {metric_mean:.3f} ({metric_std:.3f})'
+        if metric_mean is None:
+            noneiv_results_string += '  None (None)'
+        else:
+            noneiv_results_string += f'  {metric_mean:.3f} ({metric_std:.3f})'
     print(noneiv_results_string)
-    eiv_results = [results[data]['eiv'][metric]
+    eiv_results = [save_readout(results[data]['eiv'],metric)
             for metric in metrics_to_display]
     eiv_row_name  = f'{data} - EiV:'
     eiv_results_string = eiv_row_name + (offset - len(eiv_row_name)) * ' '
     for [metric_mean, metric_std] in eiv_results:
-        eiv_results_string += f'  {metric_mean:.3f} ({metric_std:.3f})'
+        if metric_mean is None:
+            eiv_results_string += '  None (None)'
+        else:
+            eiv_results_string += f'  {metric_mean:.3f} ({metric_std:.3f})'
     print(eiv_results_string)
     print(offset * '_' + 70 * '_')
 
diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py
index d694558ba35333831660c26edab47bc0d3d014f1..e72dc1180fd7bcd041a58ac5ff895524e7198457 100644
--- a/Experiments/evaluate_metrics.py
+++ b/Experiments/evaluate_metrics.py
@@ -59,15 +59,17 @@ except KeyError:
     device = torch.device('cpu')
 
 
-def collect_metrics(x,y, seed=0,
+def collect_metrics(x_y_pairs, seed=0,
     noneiv_number_of_draws=100, eiv_number_of_draws=[100,5],
     decouple_dimensions=False, device=device,
     scale_outputs=scale_outputs):
     """
     Compute various metrics for EiV and non-EiV. Will be returned as
     dictionaries.
-    :param x: A torch.tensor, taken as input
-    :param y: A torch.tensor, taken as output
+    :param x_y_pairs: A tuple of either the shape (None,None,x,y) or 
+    (x_true,y_true,x,y) containing torch.tensor or None. x and y are
+    considered as input and corresponding label. If the first two components
+    are not None, they are considered to be the unnoisy counterparts.
     :param seed: Integer. The seed used for loading, defaults to 0.
     :param noneiv_number_of_draws: Number of draws for non-EiV model
     for sampling from the posterior predictive. Defaults to 100.
@@ -82,8 +84,13 @@ def collect_metrics(x,y, seed=0,
     the log-dens to make them comparable with the literature.
     :returns: Dictionaries noneiv_metrics, eiv_metrics
     """
+    true_x, true_y, x, y = x_y_pairs
     x,y = x.to(device), y.to(device)
-
+    if true_x is not None:
+        assert true_y is not None
+        true_x,true_y = true_x.to(device), true_y.to(device)
+    else:
+        assert true_y is None
 
     # non-EiV
     noneiv_metrics = {}
@@ -124,7 +131,15 @@ def collect_metrics(x,y, seed=0,
     noneiv_metrics['coverage_normalized'],_ =\
             epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
     noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y)
-    
+
+    # metrics that need a ground truth
+    if true_x is not None:
+        noneiv_metrics['true_coverage_numerical'],\
+                noneiv_metrics['true_coverage_theory'] =\
+                epistemic_coverage(not_averaged_predictions, true_y,
+                        normalize_errors=False, noisy_y=False)
+        true_res = true_y - noneiv_mean
+        noneiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2))
 
 
     # NLL
@@ -186,6 +201,15 @@ def collect_metrics(x,y, seed=0,
             epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
     eiv_metrics['res_std' ]= normalized_std(not_averaged_predictions, y)
 
+    # metrics that need a ground truth
+    if true_x is not None:
+        eiv_metrics['true_coverage_numerical'],\
+                eiv_metrics['true_coverage_theory'] =\
+                epistemic_coverage(not_averaged_predictions, true_y,
+                        normalize_errors=False, noisy_y=False)
+
+        true_res = true_y - eiv_mean
+        eiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2))
 
     # NLL
     if scale_outputs:
@@ -208,29 +232,47 @@ def collect_metrics(x,y, seed=0,
     return noneiv_metrics, eiv_metrics
 
 
-collection_keys = ['rmse','logdens','bias','coverage_numerical',
-        'coverage_theory','coverage_normalized','res_std']
 noneiv_metrics_collection = {}
 eiv_metrics_collection = {}
-for key in collection_keys:
-    noneiv_metrics_collection[key] = []
-    eiv_metrics_collection[key] = []
+collection_keys = []
 num_test_epochs = 10
 assert noneiv_conf_dict["seed_range"] == eiv_conf_dict["seed_range"]
 seed_list = range(noneiv_conf_dict["seed_range"][0],
         noneiv_conf_dict["seed_range"][1])
 max_batch_number = 2
 for seed in tqdm(seed_list):
-    train_data, test_data = load_data(seed=seed)
-    test_dataloader = DataLoader(test_data,
+    try:
+        train_data, test_data, true_train_data, true_test_data \
+                = load_data(seed=seed, return_ground_truth=True)
+    except TypeError:
+        train_data, test_data = load_data(seed=seed)
+        true_train_data, true_test_data = None, None
+    if true_test_data is None:
+        test_dataloader = DataLoader(test_data,
             batch_size=int(np.min((len(test_data),
         800))), shuffle=True)
+    else:
+        test_dataloader = DataLoader(true_test_data,
+                batch_size=int(np.min((len(true_test_data), 800))), shuffle=True)
     for i in tqdm(range(num_test_epochs)):
-        for j, (x,y) in enumerate(test_dataloader):
+        for j, x_y_pairs in enumerate(test_dataloader):
             if j > max_batch_number:
                 break
-
-            noneiv_metrics, eiv_metrics = collect_metrics(x,y, seed=seed)
+            # fill in ground truth with None, if not existent
+            if true_test_data is None:
+                x_y_pairs = (None, None, *x_y_pairs)
+            # should contain (true_x,true_y,x,y) or (None,None,x,y)
+            assert len(x_y_pairs) == 4
+            noneiv_metrics, eiv_metrics = collect_metrics(x_y_pairs,
+                    seed=seed)
+            if i==0 and j==0:
+                # fill collection keys
+                assert eiv_metrics.keys() == noneiv_metrics.keys()
+                collection_keys = list(eiv_metrics.keys())
+                for key in collection_keys:
+                    noneiv_metrics_collection[key] = []
+                    eiv_metrics_collection[key] = []
+            # collect results
             for key in collection_keys:
                 noneiv_metrics_collection[key].append(noneiv_metrics[key])
                 eiv_metrics_collection[key].append(eiv_metrics[key])
diff --git a/Experiments/train_eiv.py b/Experiments/train_eiv.py
index 68229469c167fc934a06bbf61e7c5673a3005b12..116408f467339a8bcef4e1b63c8b7896f693c87b 100644
--- a/Experiments/train_eiv.py
+++ b/Experiments/train_eiv.py
@@ -19,7 +19,7 @@ from EIVTrainingRoutines import train_and_store, loss_functions
 
 # read in data via --data option
 parser = argparse.ArgumentParser()
-parser.add_argument("--data", help="Loads data", default='california')
+parser.add_argument("--data", help="Loads data", default='linear')
 parser.add_argument("--no-autoindent", help="",
         action="store_true") # to avoid conflics in IPython
 args = parser.parse_args()
diff --git a/Experiments/train_noneiv.py b/Experiments/train_noneiv.py
index ee5687b268d762aeadac294ebdd6e43c37a25149..a585ce62db5287d8972210fc7604656678138440 100644
--- a/Experiments/train_noneiv.py
+++ b/Experiments/train_noneiv.py
@@ -19,7 +19,7 @@ from EIVTrainingRoutines import train_and_store, loss_functions
 
 # read in data via --data option
 parser = argparse.ArgumentParser()
-parser.add_argument("--data", help="Loads data", default='california')
+parser.add_argument("--data", help="Loads data", default='linear')
 parser.add_argument("--no-autoindent", help="",
         action="store_true") # to avoid conflics in IPython
 args = parser.parse_args()