diff --git a/EIVPackage/EIVData/linear.py b/EIVPackage/EIVData/linear.py index 89dd7a18a26f72293de31b39b1eb5f01ae88d0fe..9b69b6b310cf61382e8d9f5204f6ae18a2d1c2fc 100644 --- a/EIVPackage/EIVData/linear.py +++ b/EIVPackage/EIVData/linear.py @@ -1,6 +1,6 @@ import torch import sys -from torch.utils.data import TensorDataset, random_split +from torch.utils.data import TensorDataset total_number_of_datapoints = 2000 input_range = [-1,1] @@ -31,8 +31,9 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, :param return_ground_truth: Boolean. If True, the unnoisy ground truth will also be returned. Defaults to False. :returns: linear_trainset, linear_testset if return_ground_truth is False, - else linear_trainset, linear_testset, (true_train_x, true_train_y), - (true_test_x, true_test_y) + else linear_trainset, linear_testset, true_linear_trainset, + true_linear_testset. The later two return **four tensors**: The true x,y and + their noisy counterparts. """ random_generator = torch.Generator().manual_seed(seed) # draw different seeds for noise and splitting @@ -65,9 +66,13 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len]) linear_trainset = TensorDataset(noisy_train_x, noisy_train_y) linear_testset = TensorDataset(noisy_test_x, noisy_test_y) + true_linear_trainset = TensorDataset(true_train_x, true_train_y, + noisy_train_x, noisy_train_y) + true_linear_testset = TensorDataset(true_test_x, true_test_y, + noisy_test_x, noisy_test_y) if not return_ground_truth: return linear_trainset, linear_testset else: - return linear_trainset, linear_testset, (true_train_x, true_train_y),\ - (true_test_x, true_test_y) + return linear_trainset, linear_testset, true_linear_trainset,\ + true_linear_testset diff --git a/EIVPackage/EIVData/quadratic.py b/EIVPackage/EIVData/quadratic.py index eab8b6c6810371573d7885f13561d5c74e84fddb..aa4f4605094822116d36e12f64e854c38849a406 100644 --- a/EIVPackage/EIVData/quadratic.py +++ b/EIVPackage/EIVData/quadratic.py @@ -1,6 +1,6 @@ import torch import sys -from torch.utils.data import TensorDataset, random_split +from torch.utils.data import TensorDataset total_number_of_datapoints = 2000 input_range = [-1,1] @@ -11,7 +11,8 @@ y_noise_strength = 0.1 def get_normalization(*args): """ - Returns the mean and standard deviations (in tuples) of the tensors in *args. + Returns the mean and standard deviations (in tuples) of the tensors in + *args. """ normalization_collection = [] for t in args: @@ -30,9 +31,10 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, :param normalize: Whether to normalize the data, defaults to True. :param return_ground_truth: Boolean. If True, the unnoisy ground truth will also be returned. Defaults to False. - :returns: linear_trainset, linear_testset if return_ground_truth is False, - else linear_trainset, linear_testset, (true_train_x, true_train_y), - (true_test_x, true_test_y) + :returns: quadratic_trainset, quadratic_testset if return_ground_truth is False, + else quadratic_trainset, quadratic_testset, true_quadratic_trainset, + true_quadratic_testset. The later two return **four tensors**: The true x,y and + their noisy counterparts. """ random_generator = torch.Generator().manual_seed(seed) # draw different seeds for noise and splitting @@ -63,11 +65,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, true_train_y, true_test_y = torch.split(true_y, [train_len, test_len]) noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len]) noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len]) - linear_trainset = TensorDataset(noisy_train_x, noisy_train_y) - linear_testset = TensorDataset(noisy_test_x, noisy_test_y) + quadratic_trainset = TensorDataset(noisy_train_x, noisy_train_y) + quadratic_testset = TensorDataset(noisy_test_x, noisy_test_y) + true_quadratic_trainset = TensorDataset(true_train_x, true_train_y, + noisy_train_x, noisy_train_y) + true_quadratic_testset = TensorDataset(true_test_x, true_test_y, + noisy_test_x, noisy_test_y) if not return_ground_truth: - return linear_trainset, linear_testset + return quadratic_trainset, quadratic_testset else: - return linear_trainset, linear_testset, (true_train_x, true_train_y),\ - (true_test_x, true_test_y) + return quadratic_trainset, quadratic_testset, true_quadratic_trainset,\ + true_quadratic_testset diff --git a/EIVPackage/EIVGeneral/coverage_metrics.py b/EIVPackage/EIVGeneral/coverage_metrics.py index 3bafa96fcf3cb72e17ac075dff300309759dd9e9..ee1b6515c6713db1342208091f6ca0afd20ec017 100644 --- a/EIVPackage/EIVGeneral/coverage_metrics.py +++ b/EIVPackage/EIVGeneral/coverage_metrics.py @@ -24,7 +24,7 @@ def multivariate_interval_length(dim, q=0.95): Returns the half side length of multivariate cube, symmetrically centered around 0 such that its measure under a standard normal distribution equals `q`. - :param dim: A non-negative integer, the dimension. + :param dim: A positive integer, the dimension. :param q: Float, should be between 0 and 1. Defaults to 0.95. """ # use independence of components to reduce to a univariate quantile @@ -32,11 +32,12 @@ def multivariate_interval_length(dim, q=0.95): return scipy.stats.norm.ppf(univariate_quantile) -def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=False): +def epistemic_coverage(not_averaged_predictions, y, q=0.95, + normalize_errors=False, + noisy_y=True): """ Returns the average coverage of `y` by the interval - "prefactor * (predictions + q-Interval)", - where + "predictions + prefactor * q-Interval", where - "q-Interval" is the interval of measure `q` under the standard normal, where - "predictions" are the entries of the first component of the tuple @@ -45,8 +46,11 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=Fa first component of `not_averaged_predictions`,if `normalize_errors` is set to False, or 1 if it is true. The coverage is returned as given by the `y` and as a theoretical_coverage - computed from the epistemic uncertainty and the aleatoric uncertainty + computed from the epistemic uncertainty and the aleatoric uncertainty (second component of `not_averaged_predictions`). + ** Note **: If `noisy_y` is True, the `y` will be treated as the unnoisy + ground truth. If it is False (default) it will be treated as noisy. For + real data only use the later. :param not_averaged_predictions: A tuple of tensors as in the output of `FNNEIV.predict` with `take_average_of_prediction` set to `False`, i.e.: the predictions of the neural net not averaged over the first dimension @@ -58,6 +62,9 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=Fa :param normalize_errors: If True, the deviations between predictions and `y` are normalized by the total uncertainty, computed from the aleatoric and epistemic uncertainty and the coverage w.r.t. q-interval is computed. + :param noisy_y: Boolean. If True (the default), `y` is treated as noisy and + the total uncertainty is considered. If False, `y` is treated as the + unnoisy ground truth. :returns: numerical_coverage, theoretical_coverage """ out, sigmas = not_averaged_predictions @@ -72,21 +79,26 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=Fa y = y.view((*y.shape[:2], -1)) sigmas = sigmas.view((*sigmas.shape[:2], -1)) out = out.view((*out.shape[:2], -1)) - # check if dimensions consistent + # check if dimensions are consistent assert y.shape == sigmas.shape assert y.shape[0] == out.shape[0] assert y.shape[2] == out.shape[2] # compute epistemic uncertainty epis_unc = torch.std(out, dim=1, keepdim=True) - # compute total uncertainty assert epis_unc.shape == sigmas.shape - total_unc = torch.sqrt(epis_unc**2 + sigmas **2) + # compute total uncertainty + if noisy_y: + total_unc = torch.sqrt(epis_unc**2 + sigmas **2) + else: + # for unnoisy y, the aleatoric uncertainty is treated as 0 + total_unc = epis_unc # fix interval based on epis_unc + out_dim = y.shape[2] if not normalize_errors: - interval_length = multivariate_interval_length(dim=y.shape[1], q=q) \ + interval_length = multivariate_interval_length(dim=out_dim, q=q) \ * epis_unc else: - interval_length = multivariate_interval_length(dim=y.shape[1], q=q) + interval_length = multivariate_interval_length(dim=out_dim, q=q) # numerical computation errors = out - y if normalize_errors: diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py index 31af058a19075c616231689acacdba39c053f14c..db8d25d525bb2ccf019beb4e3f7e72409b57205f 100644 --- a/Experiments/evaluate_metrics.py +++ b/Experiments/evaluate_metrics.py @@ -59,15 +59,17 @@ except KeyError: device = torch.device('cpu') -def collect_metrics(x,y, seed=0, +def collect_metrics(x_y_pairs, seed=0, noneiv_number_of_draws=100, eiv_number_of_draws=[100,5], decouple_dimensions=False, device=device, scale_outputs=scale_outputs): """ Compute various metrics for EiV and non-EiV. Will be returned as dictionaries. - :param x: A torch.tensor, taken as input - :param y: A torch.tensor, taken as output + :param x_y_pairs: A tuple of either the shape (None,None,x,y) or + (x_true,y_true,x,y) containing torch.tensor or None. x and y are + considered as input and corresponding label. If the first two components + are not None, they are considered to be the unnoisy counterparts. :param seed: Integer. The seed used for loading, defaults to 0. :param noneiv_number_of_draws: Number of draws for non-EiV model for sampling from the posterior predictive. Defaults to 100. @@ -82,8 +84,13 @@ def collect_metrics(x,y, seed=0, the log-dens to make them comparable with the literature. :returns: Dictionaries noneiv_metrics, eiv_metrics """ + true_x, true_y, x, y = x_y_pairs x,y = x.to(device), y.to(device) - + if true_x is not None: + assert true_y is not None + true_x,true_y = true_x.to(device), true_y.to(device) + else: + assert true_y is None # non-EiV noneiv_metrics = {} @@ -126,6 +133,11 @@ def collect_metrics(x,y, seed=0, noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y) # metrics that need a ground truth + if true_x is not None: + noneiv_metrics['true_coverage_numerical'],\ + noneiv_metrics['true_coverage_theory'] =\ + epistemic_coverage(not_averaged_predictions, true_y, + normalize_errors=False, noisy_y=False) # NLL @@ -187,6 +199,14 @@ def collect_metrics(x,y, seed=0, epistemic_coverage(not_averaged_predictions, y, normalize_errors=True) eiv_metrics['res_std' ]= normalized_std(not_averaged_predictions, y) + # metrics that need a ground truth + if true_x is not None: + eiv_metrics['true_coverage_numerical'],\ + eiv_metrics['true_coverage_theory'] =\ + epistemic_coverage(not_averaged_predictions, true_y, + normalize_errors=False, noisy_y=False) + + # NLL if scale_outputs: @@ -209,13 +229,9 @@ def collect_metrics(x,y, seed=0, return noneiv_metrics, eiv_metrics -collection_keys = ['rmse','logdens','bias','coverage_numerical', - 'coverage_theory','coverage_normalized','res_std'] noneiv_metrics_collection = {} eiv_metrics_collection = {} -for key in collection_keys: - noneiv_metrics_collection[key] = [] - eiv_metrics_collection[key] = [] +collection_keys = [] num_test_epochs = 10 assert noneiv_conf_dict["seed_range"] == eiv_conf_dict["seed_range"] seed_list = range(noneiv_conf_dict["seed_range"][0], @@ -223,22 +239,37 @@ seed_list = range(noneiv_conf_dict["seed_range"][0], max_batch_number = 2 for seed in tqdm(seed_list): try: - train_data, test_data, (true_train_x, true_train_y),\ - (true_test_x, true_test_y) \ + train_data, test_data, true_train_data, true_test_data \ = load_data(seed=seed, return_ground_truth=True) except TypeError: train_data, test_data = load_data(seed=seed) - (true_train_x, true_train_y), (true_test_x, true_test_y)\ - = (None,None), (None,None) - test_dataloader = DataLoader(test_data, + true_train_data, true_test_data = None, None + if true_test_data is None: + test_dataloader = DataLoader(test_data, batch_size=int(np.min((len(test_data), 800))), shuffle=True) + else: + test_dataloader = DataLoader(true_test_data, + batch_size=int(np.min((len(true_test_data), 800))), shuffle=True) for i in tqdm(range(num_test_epochs)): - for j, (x,y) in enumerate(test_dataloader): + for j, x_y_pairs in enumerate(test_dataloader): if j > max_batch_number: break - - noneiv_metrics, eiv_metrics = collect_metrics(x,y, seed=seed) + # fill in ground truth with None, if not existent + if true_test_data is None: + x_y_pairs = (None, None, *x_y_pairs) + # should contain (true_x,true_y,x,y) or (None,None,x,y) + assert len(x_y_pairs) == 4 + noneiv_metrics, eiv_metrics = collect_metrics(x_y_pairs, + seed=seed) + if i==0 and j==0: + # fill collection keys + assert eiv_metrics.keys() == noneiv_metrics.keys() + collection_keys = list(eiv_metrics.keys()) + for key in collection_keys: + noneiv_metrics_collection[key] = [] + eiv_metrics_collection[key] = [] + # collect results for key in collection_keys: noneiv_metrics_collection[key].append(noneiv_metrics[key]) eiv_metrics_collection[key].append(eiv_metrics[key])