diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py
index e72dc1180fd7bcd041a58ac5ff895524e7198457..2f8f6c849c90ea279fd0e90984dea3188715e1f9 100644
--- a/Experiments/evaluate_metrics.py
+++ b/Experiments/evaluate_metrics.py
@@ -111,7 +111,8 @@ def collect_metrics(x_y_pairs, seed=0,
     # RMSE
     training_state = net.training
     net.train()
-    not_averaged_predictions = net.predict(x, number_of_draws=noneiv_number_of_draws, 
+    not_averaged_predictions = net.predict(x,\
+            number_of_draws=noneiv_number_of_draws, 
             take_average_of_prediction=False)
     noneiv_mean = torch.mean(not_averaged_predictions[0], dim=1)
     if len(y.shape) <= 1:
@@ -127,9 +128,11 @@ def collect_metrics(x_y_pairs, seed=0,
     noneiv_metrics['rmse'] = np.sqrt(np.mean(scaled_res**2))
     noneiv_metrics['bias'] = np.mean(scaled_res)
     noneiv_metrics['coverage_numerical'], noneiv_metrics['coverage_theory'] =\
-            epistemic_coverage(not_averaged_predictions, y, normalize_errors=False)
+            epistemic_coverage(not_averaged_predictions, y,\
+            normalize_errors=False)
     noneiv_metrics['coverage_normalized'],_ =\
-            epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
+            epistemic_coverage(not_averaged_predictions, y,\
+            normalize_errors=True)
     noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y)
 
     # metrics that need a ground truth
@@ -138,8 +141,8 @@ def collect_metrics(x_y_pairs, seed=0,
                 noneiv_metrics['true_coverage_theory'] =\
                 epistemic_coverage(not_averaged_predictions, true_y,
                         normalize_errors=False, noisy_y=False)
-        true_res = true_y - noneiv_mean
-        noneiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2))
+        true_res = (true_y - noneiv_mean).detach().cpu().numpy().flatten()
+        noneiv_metrics['true_rmse'] = np.sqrt(np.mean(true_res**2))
 
 
     # NLL
@@ -208,8 +211,8 @@ def collect_metrics(x_y_pairs, seed=0,
                 epistemic_coverage(not_averaged_predictions, true_y,
                         normalize_errors=False, noisy_y=False)
 
-        true_res = true_y - eiv_mean
-        eiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2))
+        true_res = (true_y - eiv_mean).detach().cpu().numpy().flatten()
+        eiv_metrics['true_rmse'] = np.sqrt(np.mean(true_res**2))
 
     # NLL
     if scale_outputs: