diff --git a/EIVPackage/EIVGeneral/coverage_metrics.py b/EIVPackage/EIVGeneral/coverage_metrics.py
index ee1b6515c6713db1342208091f6ca0afd20ec017..9fd3ff5dd92a580c976e6cbe658f0f21655b39b1 100644
--- a/EIVPackage/EIVGeneral/coverage_metrics.py
+++ b/EIVPackage/EIVGeneral/coverage_metrics.py
@@ -34,6 +34,7 @@ def multivariate_interval_length(dim, q=0.95):
 
 def epistemic_coverage(not_averaged_predictions,  y, q=0.95,
         normalize_errors=False,
+        average_predictions=True,
         noisy_y=True):
     """
     Returns the average coverage of `y` by the interval 
@@ -41,7 +42,7 @@ def epistemic_coverage(not_averaged_predictions,  y, q=0.95,
     - "q-Interval" is the interval of measure `q` under the standard normal, 
     where
     - "predictions" are the entries of the first component of the tuple
-      `not_averaged_predictions`, 
+      `not_averaged_predictions`, averaged if `average_predictions` is True.
     - "prefactor either equals the epistemic uncertainty, computed from the
       first component of `not_averaged_predictions`,if
     `normalize_errors` is set to False, or 1 if it is true. 
@@ -62,12 +63,15 @@ def epistemic_coverage(not_averaged_predictions,  y, q=0.95,
     :param normalize_errors: If True, the deviations between predictions and
     `y` are normalized by the total uncertainty, computed from the aleatoric
     and epistemic uncertainty and the coverage w.r.t. q-interval is computed.
+    :param average_predictions: If True, average the predictions before
+    computing the coverage. Defaults to False.
     :param noisy_y: Boolean. If True (the default), `y` is treated as noisy and
     the total uncertainty is considered. If False, `y` is treated as the
     unnoisy ground truth.
     :returns: numerical_coverage, theoretical_coverage
     """
     out, sigmas = not_averaged_predictions
+    # add repetition axis
     y = y[:,None,...]
     sigmas = sigmas[:,None,...]
     # add an output axis if necessary
@@ -85,6 +89,8 @@ def epistemic_coverage(not_averaged_predictions,  y, q=0.95,
     assert y.shape[2] == out.shape[2]
     # compute epistemic uncertainty
     epis_unc = torch.std(out, dim=1, keepdim=True)
+    if average_predictions:
+        out = torch.mean(out, dim=1, keepdim=True)
     assert epis_unc.shape == sigmas.shape
     # compute total uncertainty
     if noisy_y:
diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py
index db8d25d525bb2ccf019beb4e3f7e72409b57205f..599dd94c61d34f273d4403c76d4dbe6b04ebdc5c 100644
--- a/Experiments/evaluate_metrics.py
+++ b/Experiments/evaluate_metrics.py
@@ -19,7 +19,7 @@ from EIVGeneral.coverage_metrics import epistemic_coverage, normalized_std
 
 # read in data via --data option
 parser = argparse.ArgumentParser()
-parser.add_argument("--data", help="Loads data", default='linear')
+parser.add_argument("--data", help="Loads data", default='quadratic')
 parser.add_argument("--no-autoindent", help="",
         action="store_true") # to avoid conflics in IPython
 args = parser.parse_args()
@@ -127,7 +127,7 @@ def collect_metrics(x_y_pairs, seed=0,
     noneiv_metrics['rmse'] = np.sqrt(np.mean(scaled_res**2))
     noneiv_metrics['bias'] = np.mean(scaled_res)
     noneiv_metrics['coverage_numerical'], noneiv_metrics['coverage_theory'] =\
-            epistemic_coverage(not_averaged_predictions, y, normalize_errors=False)
+            epistemic_coverage(not_averaged_predictions, y, normalize_errors=False, average_predictions=True)
     noneiv_metrics['coverage_normalized'],_ =\
             epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
     noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y)
@@ -137,6 +137,7 @@ def collect_metrics(x_y_pairs, seed=0,
         noneiv_metrics['true_coverage_numerical'],\
                 noneiv_metrics['true_coverage_theory'] =\
                 epistemic_coverage(not_averaged_predictions, true_y,
+                        average_predictions=True,
                         normalize_errors=False, noisy_y=False)
 
 
@@ -194,7 +195,7 @@ def collect_metrics(x_y_pairs, seed=0,
     eiv_metrics['rmse' ]= np.sqrt(np.mean(scaled_res**2))
     eiv_metrics['bias' ]= np.mean(scaled_res)
     eiv_metrics['coverage_numerical'], eiv_metrics['coverage_theory'] =\
-            epistemic_coverage(not_averaged_predictions, y, normalize_errors=False)
+            epistemic_coverage(not_averaged_predictions, y, normalize_errors=False, average_predictions=True)
     eiv_metrics['coverage_normalized'],_ =\
             epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
     eiv_metrics['res_std' ]= normalized_std(not_averaged_predictions, y)
@@ -204,6 +205,7 @@ def collect_metrics(x_y_pairs, seed=0,
         eiv_metrics['true_coverage_numerical'],\
                 eiv_metrics['true_coverage_theory'] =\
                 epistemic_coverage(not_averaged_predictions, true_y,
+                        average_predictions=True,
                         normalize_errors=False, noisy_y=False)