diff --git a/EIVPackage/EIVArchitectures/Networks.py b/EIVPackage/EIVArchitectures/Networks.py
index 56c76336dacc479e25a4915e941fc336f77282ee..ccae84842e6a29979c0a72156606ddcfaf8ca3e0 100644
--- a/EIVPackage/EIVArchitectures/Networks.py
+++ b/EIVPackage/EIVArchitectures/Networks.py
@@ -244,6 +244,11 @@ class FNNEIV(nn.Module):
         :param average_batch_dimension: Boolean. If True (default) the values
         will be averaged over the batch dimension. If False, the batch
         dimension will be left untouched and all values will be returned.
+        :scale_labels: If not None (the default), scale labels in evaluation to
+        make result comparable with the literature. 
+        :decouple_dimensions: If True, treat dimensions seperate and finally
+        average, to make results comparable with the literature. Defaults to
+        False.
         """
         out, sigmas = self.predict(x, number_of_draws=number_of_draws,
                 number_of_parameter_chunks=number_of_parameter_chunks,
@@ -437,6 +442,11 @@ class FNNBer(nn.Module):
         :param average_batch_dimension: Boolean. If True (default) the values
         will be averaged over the batch dimension. If False, the batch
         dimension will be left untouched and all values will be returned.
+        :scale_labels: If not None (the default), scale labels in evaluation to
+        make result comparable with the literature. 
+        :decouple_dimensions: If True, treat dimensions seperate and finally
+        average, to make results comparable with the literature. Defaults to
+        False.
         """
         out, sigmas = self.predict(x, number_of_draws=number_of_draws,
                 take_average_of_prediction=False, remove_graph=remove_graph)
diff --git a/Experiments/evaluate_tabular.py b/Experiments/evaluate_tabular.py
index cfa6664eed259441c865c36612d00063f9d0098f..6a738bdaae782541924d8c474efb90c7eac6b8b0 100644
--- a/Experiments/evaluate_tabular.py
+++ b/Experiments/evaluate_tabular.py
@@ -14,6 +14,7 @@ from EIVGeneral.coverage_metrices import epistemic_coverage, normalized_std
 long_dataname = 'energy_efficiency'
 short_dataname = 'energy'
 
+scale_outputs = False 
 load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
 train_noneiv = importlib.import_module(f'train_noneiv_{short_dataname}')
 train_eiv = importlib.import_module(f'train_eiv_{short_dataname}')
@@ -24,7 +25,8 @@ output_dim = train_data[0][1].numel()
 
 def collect_metrics(x,y, seed=0,
     noneiv_number_of_draws=100, eiv_number_of_draws=[100,5],
-    decouple_dimensions=False, device=torch.device('cuda:1')):
+    decouple_dimensions=False, device=torch.device('cuda:1'),
+    scale_outputs=scale_outputs):
     """
     Compute various metrics for EiV and non-EiV. Will be returned as
     dictionaries.
@@ -39,6 +41,8 @@ def collect_metrics(x,y, seed=0,
     of Gal et al. is followed where, in the evaluation of the
     log-posterior-predictive, each dimension is treated independently and then
     averaged. If False (default), a multivariate distribution is used.
+    :param scale_output: Boolean, scale the outputs for the RMSE, the bias and
+    the log-dens to make them comparable with the literature.
     :returns: Dictionaries noneiv_metrics, eiv_metrics
     """
     x,y = x.to(device), y.to(device)
@@ -70,8 +74,11 @@ def collect_metrics(x,y, seed=0,
         y = y.view((-1,1))
     assert y.shape == prediction_triple[0].shape
     res = y-prediction_triple[0]
-    scale = train_data.dataset.std_labels.to(device)
-    scaled_res = res * scale.view((1,-1))
+    if scale_outputs:
+        scale = train_data.dataset.std_labels.to(device)
+        scaled_res = res * scale.view((1,-1))
+    else:
+        scaled_res = res
     scaled_res = scaled_res.detach().cpu().numpy().flatten()
     noneiv_metrics['rmse'] = np.sqrt(np.mean(scaled_res**2))
     noneiv_metrics['bias'] = np.mean(scaled_res)
@@ -84,11 +91,14 @@ def collect_metrics(x,y, seed=0,
 
 
     # NLL
-    noneiv_metrics['logdens' ]= net.predictive_logdensity(x, y, number_of_draws=100,
+    if scale_outputs:
+        scale_labels = train_data.dataset.std_labels.view((-1,)).to(device)
+    else:
+        scale_labels = None
+    noneiv_metrics['logdens' ]= net.predictive_logdensity(x, y,
+            number_of_draws=100,
             decouple_dimensions=decouple_dimensions,
-            scale_labels=\
-                   train_data.dataset.std_labels.view((-1,)).to(device)\
-                   ).mean().detach().cpu().numpy()
+            scale_labels=scale_labels).mean().detach().cpu().numpy()
     if training_state:
         net.train()
     else:
@@ -124,7 +134,11 @@ def collect_metrics(x,y, seed=0,
     assert y.shape == prediction_triple[0].shape
     res = y-prediction_triple[0]
     scale = train_data.dataset.std_labels.to(device)
-    scaled_res = res * scale.view((1,-1))
+    if scale_outputs:
+        scale = train_data.dataset.std_labels.to(device)
+        scaled_res = res * scale.view((1,-1))
+    else:
+        scaled_res = res
     scaled_res = scaled_res.detach().cpu().numpy().flatten()
     eiv_metrics['rmse' ]= np.sqrt(np.mean(scaled_res**2))
     eiv_metrics['bias' ]= np.mean(scaled_res)
@@ -136,12 +150,14 @@ def collect_metrics(x,y, seed=0,
 
 
     # NLL
+    if scale_outputs:
+        scale_labels = train_data.dataset.std_labels.view((-1,)).to(device)
+    else:
+        scale_labels = None
     eiv_metrics['logdens' ]= net.predictive_logdensity(x, y,
             number_of_draws=eiv_number_of_draws,
             decouple_dimensions=decouple_dimensions,
-            scale_labels=\
-            train_data.dataset.std_labels.view((-1,)).to(device)\
-            ).mean().detach().cpu().numpy()
+            scale_labels=scale_labels).mean().detach().cpu().numpy()
     if training_state:
         net.train()
     else:
@@ -153,7 +169,8 @@ def collect_metrics(x,y, seed=0,
     return noneiv_metrics, eiv_metrics
 
 
-collection_keys = ['rmse','logdens','bias','coverage_numerical','coverage_theory','coverage_normalized','res_std']
+collection_keys = ['rmse','logdens','bias','coverage_numerical',
+        'coverage_theory','coverage_normalized','res_std']
 noneiv_metrics_collection = {}
 eiv_metrics_collection = {}
 for key in collection_keys: