Skip to content
Snippets Groups Projects
Commit f58054ef authored by Jörg Martin's avatar Jörg Martin
Browse files

Do not scale labels in evaluate_tabular by default

parent b2eb16c6
No related merge requests found
......@@ -244,6 +244,11 @@ class FNNEIV(nn.Module):
:param average_batch_dimension: Boolean. If True (default) the values
will be averaged over the batch dimension. If False, the batch
dimension will be left untouched and all values will be returned.
:scale_labels: If not None (the default), scale labels in evaluation to
make result comparable with the literature.
:decouple_dimensions: If True, treat dimensions seperate and finally
average, to make results comparable with the literature. Defaults to
False.
"""
out, sigmas = self.predict(x, number_of_draws=number_of_draws,
number_of_parameter_chunks=number_of_parameter_chunks,
......@@ -437,6 +442,11 @@ class FNNBer(nn.Module):
:param average_batch_dimension: Boolean. If True (default) the values
will be averaged over the batch dimension. If False, the batch
dimension will be left untouched and all values will be returned.
:scale_labels: If not None (the default), scale labels in evaluation to
make result comparable with the literature.
:decouple_dimensions: If True, treat dimensions seperate and finally
average, to make results comparable with the literature. Defaults to
False.
"""
out, sigmas = self.predict(x, number_of_draws=number_of_draws,
take_average_of_prediction=False, remove_graph=remove_graph)
......
......@@ -14,6 +14,7 @@ from EIVGeneral.coverage_metrices import epistemic_coverage, normalized_std
long_dataname = 'energy_efficiency'
short_dataname = 'energy'
scale_outputs = False
load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
train_noneiv = importlib.import_module(f'train_noneiv_{short_dataname}')
train_eiv = importlib.import_module(f'train_eiv_{short_dataname}')
......@@ -24,7 +25,8 @@ output_dim = train_data[0][1].numel()
def collect_metrics(x,y, seed=0,
noneiv_number_of_draws=100, eiv_number_of_draws=[100,5],
decouple_dimensions=False, device=torch.device('cuda:1')):
decouple_dimensions=False, device=torch.device('cuda:1'),
scale_outputs=scale_outputs):
"""
Compute various metrics for EiV and non-EiV. Will be returned as
dictionaries.
......@@ -39,6 +41,8 @@ def collect_metrics(x,y, seed=0,
of Gal et al. is followed where, in the evaluation of the
log-posterior-predictive, each dimension is treated independently and then
averaged. If False (default), a multivariate distribution is used.
:param scale_output: Boolean, scale the outputs for the RMSE, the bias and
the log-dens to make them comparable with the literature.
:returns: Dictionaries noneiv_metrics, eiv_metrics
"""
x,y = x.to(device), y.to(device)
......@@ -70,8 +74,11 @@ def collect_metrics(x,y, seed=0,
y = y.view((-1,1))
assert y.shape == prediction_triple[0].shape
res = y-prediction_triple[0]
scale = train_data.dataset.std_labels.to(device)
scaled_res = res * scale.view((1,-1))
if scale_outputs:
scale = train_data.dataset.std_labels.to(device)
scaled_res = res * scale.view((1,-1))
else:
scaled_res = res
scaled_res = scaled_res.detach().cpu().numpy().flatten()
noneiv_metrics['rmse'] = np.sqrt(np.mean(scaled_res**2))
noneiv_metrics['bias'] = np.mean(scaled_res)
......@@ -84,11 +91,14 @@ def collect_metrics(x,y, seed=0,
# NLL
noneiv_metrics['logdens' ]= net.predictive_logdensity(x, y, number_of_draws=100,
if scale_outputs:
scale_labels = train_data.dataset.std_labels.view((-1,)).to(device)
else:
scale_labels = None
noneiv_metrics['logdens' ]= net.predictive_logdensity(x, y,
number_of_draws=100,
decouple_dimensions=decouple_dimensions,
scale_labels=\
train_data.dataset.std_labels.view((-1,)).to(device)\
).mean().detach().cpu().numpy()
scale_labels=scale_labels).mean().detach().cpu().numpy()
if training_state:
net.train()
else:
......@@ -124,7 +134,11 @@ def collect_metrics(x,y, seed=0,
assert y.shape == prediction_triple[0].shape
res = y-prediction_triple[0]
scale = train_data.dataset.std_labels.to(device)
scaled_res = res * scale.view((1,-1))
if scale_outputs:
scale = train_data.dataset.std_labels.to(device)
scaled_res = res * scale.view((1,-1))
else:
scaled_res = res
scaled_res = scaled_res.detach().cpu().numpy().flatten()
eiv_metrics['rmse' ]= np.sqrt(np.mean(scaled_res**2))
eiv_metrics['bias' ]= np.mean(scaled_res)
......@@ -136,12 +150,14 @@ def collect_metrics(x,y, seed=0,
# NLL
if scale_outputs:
scale_labels = train_data.dataset.std_labels.view((-1,)).to(device)
else:
scale_labels = None
eiv_metrics['logdens' ]= net.predictive_logdensity(x, y,
number_of_draws=eiv_number_of_draws,
decouple_dimensions=decouple_dimensions,
scale_labels=\
train_data.dataset.std_labels.view((-1,)).to(device)\
).mean().detach().cpu().numpy()
scale_labels=scale_labels).mean().detach().cpu().numpy()
if training_state:
net.train()
else:
......@@ -153,7 +169,8 @@ def collect_metrics(x,y, seed=0,
return noneiv_metrics, eiv_metrics
collection_keys = ['rmse','logdens','bias','coverage_numerical','coverage_theory','coverage_normalized','res_std']
collection_keys = ['rmse','logdens','bias','coverage_numerical',
'coverage_theory','coverage_normalized','res_std']
noneiv_metrics_collection = {}
eiv_metrics_collection = {}
for key in collection_keys:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment