diff --git a/EIVPackage/EIVGeneral/coverage_metrics.py b/EIVPackage/EIVGeneral/coverage_metrics.py
index 9fd3ff5dd92a580c976e6cbe658f0f21655b39b1..7515497349f2e9636bd44b9c8955a9937a9e2224 100644
--- a/EIVPackage/EIVGeneral/coverage_metrics.py
+++ b/EIVPackage/EIVGeneral/coverage_metrics.py
@@ -34,15 +34,13 @@ def multivariate_interval_length(dim, q=0.95):
 
 def epistemic_coverage(not_averaged_predictions,  y, q=0.95,
         normalize_errors=False,
-        average_predictions=True,
         noisy_y=True):
     """
     Returns the average coverage of `y` by the interval 
     "predictions + prefactor * q-Interval", where 
     - "q-Interval" is the interval of measure `q` under the standard normal, 
-    where
     - "predictions" are the entries of the first component of the tuple
-      `not_averaged_predictions`, averaged if `average_predictions` is True.
+      `not_averaged_predictions` averaged over their second dimension.
     - "prefactor either equals the epistemic uncertainty, computed from the
       first component of `not_averaged_predictions`,if
     `normalize_errors` is set to False, or 1 if it is true. 
@@ -63,34 +61,27 @@ def epistemic_coverage(not_averaged_predictions,  y, q=0.95,
     :param normalize_errors: If True, the deviations between predictions and
     `y` are normalized by the total uncertainty, computed from the aleatoric
     and epistemic uncertainty and the coverage w.r.t. q-interval is computed.
-    :param average_predictions: If True, average the predictions before
-    computing the coverage. Defaults to False.
     :param noisy_y: Boolean. If True (the default), `y` is treated as noisy and
     the total uncertainty is considered. If False, `y` is treated as the
     unnoisy ground truth.
     :returns: numerical_coverage, theoretical_coverage
     """
     out, sigmas = not_averaged_predictions
-    # add repetition axis
-    y = y[:,None,...]
-    sigmas = sigmas[:,None,...]
     # add an output axis if necessary
-    if len(y.shape) <= 2:
+    if len(y.shape) <= 1:
         y = y[...,None]
-    if len(sigmas.shape) <= 2:
+    if len(sigmas.shape) <= 1:
         sigmas = sigmas[...,None]
     # squeeze last dimensions into one
-    y = y.view((*y.shape[:2], -1))
-    sigmas = sigmas.view((*sigmas.shape[:2], -1))
+    y = y.view((y.shape[0], -1))
+    sigmas = sigmas.view((sigmas.shape[0], -1))
     out = out.view((*out.shape[:2], -1))
     # check if dimensions are consistent
-    assert y.shape == sigmas.shape
-    assert y.shape[0] == out.shape[0]
-    assert y.shape[2] == out.shape[2]
     # compute epistemic uncertainty
-    epis_unc = torch.std(out, dim=1, keepdim=True)
-    if average_predictions:
-        out = torch.mean(out, dim=1, keepdim=True)
+    epis_unc = torch.std(out, dim=1)
+    out = torch.mean(out, dim=1)
+    assert y.shape == sigmas.shape
+    assert y.shape == out.shape
     assert epis_unc.shape == sigmas.shape
     # compute total uncertainty
     if noisy_y:
@@ -99,7 +90,7 @@ def epistemic_coverage(not_averaged_predictions,  y, q=0.95,
         # for unnoisy y, the aleatoric uncertainty is treated as 0
         total_unc = epis_unc
     # fix interval based on epis_unc
-    out_dim = y.shape[2]
+    out_dim = y.shape[1]
     if not normalize_errors:
         interval_length = multivariate_interval_length(dim=out_dim, q=q) \
                 * epis_unc
@@ -108,11 +99,10 @@ def epistemic_coverage(not_averaged_predictions,  y, q=0.95,
     # numerical computation
     errors = out - y
     if normalize_errors:
-        assert errors.shape[0] == total_unc.shape[0]
-        assert errors.shape[2] == total_unc.shape[2]
+        assert errors.shape == total_unc.shape
         errors /= total_unc
     check_if_in_interval = logical_and_along_dimension(
-            torch.abs(errors) <= interval_length, dim=2)
+            torch.abs(errors) <= interval_length, dim=1)
     numerical_coverage = torch.mean(
             check_if_in_interval.to(torch.float32)
             ).cpu().detach().item()
@@ -121,11 +111,10 @@ def epistemic_coverage(not_averaged_predictions,  y, q=0.95,
         cdf_args = (interval_length/total_unc).detach().cpu().numpy()
         cdf_values = scipy.stats.norm.cdf(cdf_args)
         prob_values = 2*cdf_values -1
-        assert len(cdf_values.shape) == 3
-        assert cdf_values.shape[1] == 1
+        assert len(cdf_values.shape) == 2
         # take product over feature dimension 
         # and average over batch dimension
-        theoretical_coverage = np.mean(np.prod(prob_values, axis=2)).item()
+        theoretical_coverage = np.mean(np.prod(prob_values, axis=1)).item()
     else:
         theoretical_coverage = q
     return numerical_coverage, theoretical_coverage
diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py
index b415792857ac162211827b13f5e7a567a08f8fc6..e72dc1180fd7bcd041a58ac5ff895524e7198457 100644
--- a/Experiments/evaluate_metrics.py
+++ b/Experiments/evaluate_metrics.py
@@ -19,7 +19,7 @@ from EIVGeneral.coverage_metrics import epistemic_coverage, normalized_std
 
 # read in data via --data option
 parser = argparse.ArgumentParser()
-parser.add_argument("--data", help="Loads data", default='quadratic')
+parser.add_argument("--data", help="Loads data", default='linear')
 parser.add_argument("--no-autoindent", help="",
         action="store_true") # to avoid conflics in IPython
 args = parser.parse_args()
@@ -127,7 +127,7 @@ def collect_metrics(x_y_pairs, seed=0,
     noneiv_metrics['rmse'] = np.sqrt(np.mean(scaled_res**2))
     noneiv_metrics['bias'] = np.mean(scaled_res)
     noneiv_metrics['coverage_numerical'], noneiv_metrics['coverage_theory'] =\
-            epistemic_coverage(not_averaged_predictions, y, normalize_errors=False, average_predictions=True)
+            epistemic_coverage(not_averaged_predictions, y, normalize_errors=False)
     noneiv_metrics['coverage_normalized'],_ =\
             epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
     noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y)
@@ -137,10 +137,9 @@ def collect_metrics(x_y_pairs, seed=0,
         noneiv_metrics['true_coverage_numerical'],\
                 noneiv_metrics['true_coverage_theory'] =\
                 epistemic_coverage(not_averaged_predictions, true_y,
-                        average_predictions=True,
                         normalize_errors=False, noisy_y=False)
-    true_res = true_y - noneiv_mean
-    noneiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2))
+        true_res = true_y - noneiv_mean
+        noneiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2))
 
 
     # NLL
@@ -197,7 +196,7 @@ def collect_metrics(x_y_pairs, seed=0,
     eiv_metrics['rmse' ]= np.sqrt(np.mean(scaled_res**2))
     eiv_metrics['bias' ]= np.mean(scaled_res)
     eiv_metrics['coverage_numerical'], eiv_metrics['coverage_theory'] =\
-            epistemic_coverage(not_averaged_predictions, y, normalize_errors=False, average_predictions=True)
+            epistemic_coverage(not_averaged_predictions, y, normalize_errors=False)
     eiv_metrics['coverage_normalized'],_ =\
             epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
     eiv_metrics['res_std' ]= normalized_std(not_averaged_predictions, y)
@@ -207,11 +206,10 @@ def collect_metrics(x_y_pairs, seed=0,
         eiv_metrics['true_coverage_numerical'],\
                 eiv_metrics['true_coverage_theory'] =\
                 epistemic_coverage(not_averaged_predictions, true_y,
-                        average_predictions=True,
                         normalize_errors=False, noisy_y=False)
 
-    true_res = true_y - eiv_mean
-    eiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2))
+        true_res = true_y - eiv_mean
+        eiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2))
 
     # NLL
     if scale_outputs: