Skip to content
Snippets Groups Projects
Commit 0a001305 authored by Jörg Martin's avatar Jörg Martin
Browse files

Included true_y in evaluate_metrics

parent a4f80144
Branches
Tags
No related merge requests found
import torch
import sys
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import TensorDataset
total_number_of_datapoints = 2000
input_range = [-1,1]
......@@ -31,8 +31,9 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False.
:returns: linear_trainset, linear_testset if return_ground_truth is False,
else linear_trainset, linear_testset, (true_train_x, true_train_y),
(true_test_x, true_test_y)
else linear_trainset, linear_testset, true_linear_trainset,
true_linear_testset. The later two return **four tensors**: The true x,y and
their noisy counterparts.
"""
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting
......@@ -65,9 +66,13 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
linear_trainset = TensorDataset(noisy_train_x, noisy_train_y)
linear_testset = TensorDataset(noisy_test_x, noisy_test_y)
true_linear_trainset = TensorDataset(true_train_x, true_train_y,
noisy_train_x, noisy_train_y)
true_linear_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y)
if not return_ground_truth:
return linear_trainset, linear_testset
else:
return linear_trainset, linear_testset, (true_train_x, true_train_y),\
(true_test_x, true_test_y)
return linear_trainset, linear_testset, true_linear_trainset,\
true_linear_testset
import torch
import sys
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import TensorDataset
total_number_of_datapoints = 2000
input_range = [-1,1]
......@@ -11,7 +11,8 @@ y_noise_strength = 0.1
def get_normalization(*args):
"""
Returns the mean and standard deviations (in tuples) of the tensors in *args.
Returns the mean and standard deviations (in tuples) of the tensors in
*args.
"""
normalization_collection = []
for t in args:
......@@ -30,9 +31,10 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
:param normalize: Whether to normalize the data, defaults to True.
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False.
:returns: linear_trainset, linear_testset if return_ground_truth is False,
else linear_trainset, linear_testset, (true_train_x, true_train_y),
(true_test_x, true_test_y)
:returns: quadratic_trainset, quadratic_testset if return_ground_truth is False,
else quadratic_trainset, quadratic_testset, true_quadratic_trainset,
true_quadratic_testset. The later two return **four tensors**: The true x,y and
their noisy counterparts.
"""
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting
......@@ -63,11 +65,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
linear_trainset = TensorDataset(noisy_train_x, noisy_train_y)
linear_testset = TensorDataset(noisy_test_x, noisy_test_y)
quadratic_trainset = TensorDataset(noisy_train_x, noisy_train_y)
quadratic_testset = TensorDataset(noisy_test_x, noisy_test_y)
true_quadratic_trainset = TensorDataset(true_train_x, true_train_y,
noisy_train_x, noisy_train_y)
true_quadratic_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y)
if not return_ground_truth:
return linear_trainset, linear_testset
return quadratic_trainset, quadratic_testset
else:
return linear_trainset, linear_testset, (true_train_x, true_train_y),\
(true_test_x, true_test_y)
return quadratic_trainset, quadratic_testset, true_quadratic_trainset,\
true_quadratic_testset
......@@ -24,7 +24,7 @@ def multivariate_interval_length(dim, q=0.95):
Returns the half side length of multivariate cube, symmetrically centered
around 0 such that its measure under a standard normal distribution equals
`q`.
:param dim: A non-negative integer, the dimension.
:param dim: A positive integer, the dimension.
:param q: Float, should be between 0 and 1. Defaults to 0.95.
"""
# use independence of components to reduce to a univariate quantile
......@@ -32,11 +32,12 @@ def multivariate_interval_length(dim, q=0.95):
return scipy.stats.norm.ppf(univariate_quantile)
def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=False):
def epistemic_coverage(not_averaged_predictions, y, q=0.95,
normalize_errors=False,
noisy_y=True):
"""
Returns the average coverage of `y` by the interval
"prefactor * (predictions + q-Interval)",
where
"predictions + prefactor * q-Interval", where
- "q-Interval" is the interval of measure `q` under the standard normal,
where
- "predictions" are the entries of the first component of the tuple
......@@ -45,8 +46,11 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=Fa
first component of `not_averaged_predictions`,if
`normalize_errors` is set to False, or 1 if it is true.
The coverage is returned as given by the `y` and as a theoretical_coverage
computed from the epistemic uncertainty and the aleatoric uncertainty
computed from the epistemic uncertainty and the aleatoric uncertainty
(second component of `not_averaged_predictions`).
** Note **: If `noisy_y` is True, the `y` will be treated as the unnoisy
ground truth. If it is False (default) it will be treated as noisy. For
real data only use the later.
:param not_averaged_predictions: A tuple of tensors as in the output of
`FNNEIV.predict` with `take_average_of_prediction` set to `False`, i.e.:
the predictions of the neural net not averaged over the first dimension
......@@ -58,6 +62,9 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=Fa
:param normalize_errors: If True, the deviations between predictions and
`y` are normalized by the total uncertainty, computed from the aleatoric
and epistemic uncertainty and the coverage w.r.t. q-interval is computed.
:param noisy_y: Boolean. If True (the default), `y` is treated as noisy and
the total uncertainty is considered. If False, `y` is treated as the
unnoisy ground truth.
:returns: numerical_coverage, theoretical_coverage
"""
out, sigmas = not_averaged_predictions
......@@ -72,21 +79,26 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=Fa
y = y.view((*y.shape[:2], -1))
sigmas = sigmas.view((*sigmas.shape[:2], -1))
out = out.view((*out.shape[:2], -1))
# check if dimensions consistent
# check if dimensions are consistent
assert y.shape == sigmas.shape
assert y.shape[0] == out.shape[0]
assert y.shape[2] == out.shape[2]
# compute epistemic uncertainty
epis_unc = torch.std(out, dim=1, keepdim=True)
# compute total uncertainty
assert epis_unc.shape == sigmas.shape
total_unc = torch.sqrt(epis_unc**2 + sigmas **2)
# compute total uncertainty
if noisy_y:
total_unc = torch.sqrt(epis_unc**2 + sigmas **2)
else:
# for unnoisy y, the aleatoric uncertainty is treated as 0
total_unc = epis_unc
# fix interval based on epis_unc
out_dim = y.shape[2]
if not normalize_errors:
interval_length = multivariate_interval_length(dim=y.shape[1], q=q) \
interval_length = multivariate_interval_length(dim=out_dim, q=q) \
* epis_unc
else:
interval_length = multivariate_interval_length(dim=y.shape[1], q=q)
interval_length = multivariate_interval_length(dim=out_dim, q=q)
# numerical computation
errors = out - y
if normalize_errors:
......
......@@ -59,15 +59,17 @@ except KeyError:
device = torch.device('cpu')
def collect_metrics(x,y, seed=0,
def collect_metrics(x_y_pairs, seed=0,
noneiv_number_of_draws=100, eiv_number_of_draws=[100,5],
decouple_dimensions=False, device=device,
scale_outputs=scale_outputs):
"""
Compute various metrics for EiV and non-EiV. Will be returned as
dictionaries.
:param x: A torch.tensor, taken as input
:param y: A torch.tensor, taken as output
:param x_y_pairs: A tuple of either the shape (None,None,x,y) or
(x_true,y_true,x,y) containing torch.tensor or None. x and y are
considered as input and corresponding label. If the first two components
are not None, they are considered to be the unnoisy counterparts.
:param seed: Integer. The seed used for loading, defaults to 0.
:param noneiv_number_of_draws: Number of draws for non-EiV model
for sampling from the posterior predictive. Defaults to 100.
......@@ -82,8 +84,13 @@ def collect_metrics(x,y, seed=0,
the log-dens to make them comparable with the literature.
:returns: Dictionaries noneiv_metrics, eiv_metrics
"""
true_x, true_y, x, y = x_y_pairs
x,y = x.to(device), y.to(device)
if true_x is not None:
assert true_y is not None
true_x,true_y = true_x.to(device), true_y.to(device)
else:
assert true_y is None
# non-EiV
noneiv_metrics = {}
......@@ -126,6 +133,11 @@ def collect_metrics(x,y, seed=0,
noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y)
# metrics that need a ground truth
if true_x is not None:
noneiv_metrics['true_coverage_numerical'],\
noneiv_metrics['true_coverage_theory'] =\
epistemic_coverage(not_averaged_predictions, true_y,
normalize_errors=False, noisy_y=False)
# NLL
......@@ -187,6 +199,14 @@ def collect_metrics(x,y, seed=0,
epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
eiv_metrics['res_std' ]= normalized_std(not_averaged_predictions, y)
# metrics that need a ground truth
if true_x is not None:
eiv_metrics['true_coverage_numerical'],\
eiv_metrics['true_coverage_theory'] =\
epistemic_coverage(not_averaged_predictions, true_y,
normalize_errors=False, noisy_y=False)
# NLL
if scale_outputs:
......@@ -209,13 +229,9 @@ def collect_metrics(x,y, seed=0,
return noneiv_metrics, eiv_metrics
collection_keys = ['rmse','logdens','bias','coverage_numerical',
'coverage_theory','coverage_normalized','res_std']
noneiv_metrics_collection = {}
eiv_metrics_collection = {}
for key in collection_keys:
noneiv_metrics_collection[key] = []
eiv_metrics_collection[key] = []
collection_keys = []
num_test_epochs = 10
assert noneiv_conf_dict["seed_range"] == eiv_conf_dict["seed_range"]
seed_list = range(noneiv_conf_dict["seed_range"][0],
......@@ -223,22 +239,37 @@ seed_list = range(noneiv_conf_dict["seed_range"][0],
max_batch_number = 2
for seed in tqdm(seed_list):
try:
train_data, test_data, (true_train_x, true_train_y),\
(true_test_x, true_test_y) \
train_data, test_data, true_train_data, true_test_data \
= load_data(seed=seed, return_ground_truth=True)
except TypeError:
train_data, test_data = load_data(seed=seed)
(true_train_x, true_train_y), (true_test_x, true_test_y)\
= (None,None), (None,None)
test_dataloader = DataLoader(test_data,
true_train_data, true_test_data = None, None
if true_test_data is None:
test_dataloader = DataLoader(test_data,
batch_size=int(np.min((len(test_data),
800))), shuffle=True)
else:
test_dataloader = DataLoader(true_test_data,
batch_size=int(np.min((len(true_test_data), 800))), shuffle=True)
for i in tqdm(range(num_test_epochs)):
for j, (x,y) in enumerate(test_dataloader):
for j, x_y_pairs in enumerate(test_dataloader):
if j > max_batch_number:
break
noneiv_metrics, eiv_metrics = collect_metrics(x,y, seed=seed)
# fill in ground truth with None, if not existent
if true_test_data is None:
x_y_pairs = (None, None, *x_y_pairs)
# should contain (true_x,true_y,x,y) or (None,None,x,y)
assert len(x_y_pairs) == 4
noneiv_metrics, eiv_metrics = collect_metrics(x_y_pairs,
seed=seed)
if i==0 and j==0:
# fill collection keys
assert eiv_metrics.keys() == noneiv_metrics.keys()
collection_keys = list(eiv_metrics.keys())
for key in collection_keys:
noneiv_metrics_collection[key] = []
eiv_metrics_collection[key] = []
# collect results
for key in collection_keys:
noneiv_metrics_collection[key].append(noneiv_metrics[key])
eiv_metrics_collection[key].append(eiv_metrics[key])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment