diff --git a/EIVPackage/EIVData/linear.py b/EIVPackage/EIVData/linear.py index b1825905acafc69332d1c01418eb7db00e0f28eb..9b69b6b310cf61382e8d9f5204f6ae18a2d1c2fc 100644 --- a/EIVPackage/EIVData/linear.py +++ b/EIVPackage/EIVData/linear.py @@ -1,6 +1,6 @@ import torch import sys -from torch.utils.data import TensorDataset, random_split +from torch.utils.data import TensorDataset total_number_of_datapoints = 2000 input_range = [-1,1] @@ -31,11 +31,13 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, :param return_ground_truth: Boolean. If True, the unnoisy ground truth will also be returned. Defaults to False. :returns: linear_trainset, linear_testset if return_ground_truth is False, - else linear_trainset, linear_testset, (true_x, true_y) + else linear_trainset, linear_testset, true_linear_trainset, + true_linear_testset. The later two return **four tensors**: The true x,y and + their noisy counterparts. """ random_generator = torch.Generator().manual_seed(seed) # draw different seeds for noise and splitting - seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator) + seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator) # create new generators from tensor seeds create_generator = lambda tensor_seed:\ torch.Generator().manual_seed(tensor_seed.item()) @@ -55,14 +57,22 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, true_x = (true_x-normalization_x[0])/normalization_x[1] noisy_y = (noisy_y-normalization_y[0])/normalization_y[1] true_y = (true_y-normalization_y[0])/normalization_y[1] - linear_dataset = TensorDataset(noisy_x, noisy_y) - dataset_len = len(linear_dataset) + dataset_len = noisy_x.shape[0] train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len - linear_trainset, linear_testset = random_split(linear_dataset, - lengths=[train_len, test_len], - generator=create_generator(seeds[3])) + true_train_x, true_test_x = torch.split(true_x, [train_len, test_len]) + true_train_y, true_test_y = torch.split(true_y, [train_len, test_len]) + noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len]) + noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len]) + linear_trainset = TensorDataset(noisy_train_x, noisy_train_y) + linear_testset = TensorDataset(noisy_test_x, noisy_test_y) + true_linear_trainset = TensorDataset(true_train_x, true_train_y, + noisy_train_x, noisy_train_y) + true_linear_testset = TensorDataset(true_test_x, true_test_y, + noisy_test_x, noisy_test_y) if not return_ground_truth: return linear_trainset, linear_testset else: - return linear_trainset, linear_testset, (true_x, true_y) + return linear_trainset, linear_testset, true_linear_trainset,\ + true_linear_testset + diff --git a/EIVPackage/EIVData/quadratic.py b/EIVPackage/EIVData/quadratic.py index 9b817a5445b4790417e4dd07a2cc15d44950eb94..aa4f4605094822116d36e12f64e854c38849a406 100644 --- a/EIVPackage/EIVData/quadratic.py +++ b/EIVPackage/EIVData/quadratic.py @@ -1,6 +1,6 @@ import torch import sys -from torch.utils.data import TensorDataset, random_split +from torch.utils.data import TensorDataset total_number_of_datapoints = 2000 input_range = [-1,1] @@ -11,7 +11,8 @@ y_noise_strength = 0.1 def get_normalization(*args): """ - Returns the mean and standard deviations (in tuples) of the tensors in *args. + Returns the mean and standard deviations (in tuples) of the tensors in + *args. """ normalization_collection = [] for t in args: @@ -30,12 +31,14 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, :param normalize: Whether to normalize the data, defaults to True. :param return_ground_truth: Boolean. If True, the unnoisy ground truth will also be returned. Defaults to False. - :returns: linear_trainset, linear_testset if return_ground_truth is False, - else linear_trainset, linear_testset, (true_x, true_y) + :returns: quadratic_trainset, quadratic_testset if return_ground_truth is False, + else quadratic_trainset, quadratic_testset, true_quadratic_trainset, + true_quadratic_testset. The later two return **four tensors**: The true x,y and + their noisy counterparts. """ random_generator = torch.Generator().manual_seed(seed) # draw different seeds for noise and splitting - seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator) + seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator) # create new generators from tensor seeds create_generator = lambda tensor_seed:\ torch.Generator().manual_seed(tensor_seed.item()) @@ -55,14 +58,22 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, true_x = (true_x-normalization_x[0])/normalization_x[1] noisy_y = (noisy_y-normalization_y[0])/normalization_y[1] true_y = (true_y-normalization_y[0])/normalization_y[1] - linear_dataset = TensorDataset(noisy_x, noisy_y) - dataset_len = len(linear_dataset) + dataset_len = noisy_x.shape[0] train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len - linear_trainset, linear_testset = random_split(linear_dataset, - lengths=[train_len, test_len], - generator=create_generator(seeds[3])) + true_train_x, true_test_x = torch.split(true_x, [train_len, test_len]) + true_train_y, true_test_y = torch.split(true_y, [train_len, test_len]) + noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len]) + noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len]) + quadratic_trainset = TensorDataset(noisy_train_x, noisy_train_y) + quadratic_testset = TensorDataset(noisy_test_x, noisy_test_y) + true_quadratic_trainset = TensorDataset(true_train_x, true_train_y, + noisy_train_x, noisy_train_y) + true_quadratic_testset = TensorDataset(true_test_x, true_test_y, + noisy_test_x, noisy_test_y) if not return_ground_truth: - return linear_trainset, linear_testset + return quadratic_trainset, quadratic_testset else: - return linear_trainset, linear_testset, (true_x, true_y) + return quadratic_trainset, quadratic_testset, true_quadratic_trainset,\ + true_quadratic_testset + diff --git a/EIVPackage/EIVGeneral/coverage_metrics.py b/EIVPackage/EIVGeneral/coverage_metrics.py index 3bafa96fcf3cb72e17ac075dff300309759dd9e9..7515497349f2e9636bd44b9c8955a9937a9e2224 100644 --- a/EIVPackage/EIVGeneral/coverage_metrics.py +++ b/EIVPackage/EIVGeneral/coverage_metrics.py @@ -24,7 +24,7 @@ def multivariate_interval_length(dim, q=0.95): Returns the half side length of multivariate cube, symmetrically centered around 0 such that its measure under a standard normal distribution equals `q`. - :param dim: A non-negative integer, the dimension. + :param dim: A positive integer, the dimension. :param q: Float, should be between 0 and 1. Defaults to 0.95. """ # use independence of components to reduce to a univariate quantile @@ -32,21 +32,24 @@ def multivariate_interval_length(dim, q=0.95): return scipy.stats.norm.ppf(univariate_quantile) -def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=False): +def epistemic_coverage(not_averaged_predictions, y, q=0.95, + normalize_errors=False, + noisy_y=True): """ Returns the average coverage of `y` by the interval - "prefactor * (predictions + q-Interval)", - where + "predictions + prefactor * q-Interval", where - "q-Interval" is the interval of measure `q` under the standard normal, - where - "predictions" are the entries of the first component of the tuple - `not_averaged_predictions`, + `not_averaged_predictions` averaged over their second dimension. - "prefactor either equals the epistemic uncertainty, computed from the first component of `not_averaged_predictions`,if `normalize_errors` is set to False, or 1 if it is true. The coverage is returned as given by the `y` and as a theoretical_coverage - computed from the epistemic uncertainty and the aleatoric uncertainty + computed from the epistemic uncertainty and the aleatoric uncertainty (second component of `not_averaged_predictions`). + ** Note **: If `noisy_y` is True, the `y` will be treated as the unnoisy + ground truth. If it is False (default) it will be treated as noisy. For + real data only use the later. :param not_averaged_predictions: A tuple of tensors as in the output of `FNNEIV.predict` with `take_average_of_prediction` set to `False`, i.e.: the predictions of the neural net not averaged over the first dimension @@ -58,43 +61,48 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=Fa :param normalize_errors: If True, the deviations between predictions and `y` are normalized by the total uncertainty, computed from the aleatoric and epistemic uncertainty and the coverage w.r.t. q-interval is computed. + :param noisy_y: Boolean. If True (the default), `y` is treated as noisy and + the total uncertainty is considered. If False, `y` is treated as the + unnoisy ground truth. :returns: numerical_coverage, theoretical_coverage """ out, sigmas = not_averaged_predictions - y = y[:,None,...] - sigmas = sigmas[:,None,...] # add an output axis if necessary - if len(y.shape) <= 2: + if len(y.shape) <= 1: y = y[...,None] - if len(sigmas.shape) <= 2: + if len(sigmas.shape) <= 1: sigmas = sigmas[...,None] # squeeze last dimensions into one - y = y.view((*y.shape[:2], -1)) - sigmas = sigmas.view((*sigmas.shape[:2], -1)) + y = y.view((y.shape[0], -1)) + sigmas = sigmas.view((sigmas.shape[0], -1)) out = out.view((*out.shape[:2], -1)) - # check if dimensions consistent - assert y.shape == sigmas.shape - assert y.shape[0] == out.shape[0] - assert y.shape[2] == out.shape[2] + # check if dimensions are consistent # compute epistemic uncertainty - epis_unc = torch.std(out, dim=1, keepdim=True) - # compute total uncertainty + epis_unc = torch.std(out, dim=1) + out = torch.mean(out, dim=1) + assert y.shape == sigmas.shape + assert y.shape == out.shape assert epis_unc.shape == sigmas.shape - total_unc = torch.sqrt(epis_unc**2 + sigmas **2) + # compute total uncertainty + if noisy_y: + total_unc = torch.sqrt(epis_unc**2 + sigmas **2) + else: + # for unnoisy y, the aleatoric uncertainty is treated as 0 + total_unc = epis_unc # fix interval based on epis_unc + out_dim = y.shape[1] if not normalize_errors: - interval_length = multivariate_interval_length(dim=y.shape[1], q=q) \ + interval_length = multivariate_interval_length(dim=out_dim, q=q) \ * epis_unc else: - interval_length = multivariate_interval_length(dim=y.shape[1], q=q) + interval_length = multivariate_interval_length(dim=out_dim, q=q) # numerical computation errors = out - y if normalize_errors: - assert errors.shape[0] == total_unc.shape[0] - assert errors.shape[2] == total_unc.shape[2] + assert errors.shape == total_unc.shape errors /= total_unc check_if_in_interval = logical_and_along_dimension( - torch.abs(errors) <= interval_length, dim=2) + torch.abs(errors) <= interval_length, dim=1) numerical_coverage = torch.mean( check_if_in_interval.to(torch.float32) ).cpu().detach().item() @@ -103,11 +111,10 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=Fa cdf_args = (interval_length/total_unc).detach().cpu().numpy() cdf_values = scipy.stats.norm.cdf(cdf_args) prob_values = 2*cdf_values -1 - assert len(cdf_values.shape) == 3 - assert cdf_values.shape[1] == 1 + assert len(cdf_values.shape) == 2 # take product over feature dimension # and average over batch dimension - theoretical_coverage = np.mean(np.prod(prob_values, axis=2)).item() + theoretical_coverage = np.mean(np.prod(prob_values, axis=1)).item() else: theoretical_coverage = q return numerical_coverage, theoretical_coverage diff --git a/Experiments/create_tabular.py b/Experiments/create_tabular.py index bd7f32370c5e625caaa36bd206efe82c6ca9961a..15e4f826baa8bfc61520267190f46f63426d6c32 100644 --- a/Experiments/create_tabular.py +++ b/Experiments/create_tabular.py @@ -1,9 +1,10 @@ import os import glob +import argparse import json -metrics_to_display = ['rmse','logdens','bias','coverage_normalized'] - +metrics_to_display = ['rmse','logdens','bias','true_coverage_numerical'] +show_incomplete = True list_of_result_files = glob.glob(os.path.join('results','*.json')) results = {} @@ -12,6 +13,16 @@ for filename in list_of_result_files: with open(filename,'r') as f: results[data] = json.load(f) +def save_readout(dictionary, key): + """ + Returns the value of the `dictionary` for `key`, unless + the later doesn't exist, in which case (None,None) is returned. + """ + try: + return dictionary[key] + except KeyError: + return (None,None) + ## header header_string = 'DATA ' offset = 20 @@ -21,19 +32,25 @@ print(header_string) print(offset * '_' + 70 * '_') ## results for data in results.keys(): - noneiv_results = [results[data]['noneiv'][metric] + noneiv_results = [save_readout(results[data]['noneiv'], metric) for metric in metrics_to_display] noneiv_row_name = f'{data} - nonEiV:' noneiv_results_string = noneiv_row_name + (offset - len(noneiv_row_name)) * ' ' for [metric_mean, metric_std] in noneiv_results: - noneiv_results_string += f' {metric_mean:.3f} ({metric_std:.3f})' + if metric_mean is None: + noneiv_results_string += ' None (None)' + else: + noneiv_results_string += f' {metric_mean:.3f} ({metric_std:.3f})' print(noneiv_results_string) - eiv_results = [results[data]['eiv'][metric] + eiv_results = [save_readout(results[data]['eiv'],metric) for metric in metrics_to_display] eiv_row_name = f'{data} - EiV:' eiv_results_string = eiv_row_name + (offset - len(eiv_row_name)) * ' ' for [metric_mean, metric_std] in eiv_results: - eiv_results_string += f' {metric_mean:.3f} ({metric_std:.3f})' + if metric_mean is None: + eiv_results_string += ' None (None)' + else: + eiv_results_string += f' {metric_mean:.3f} ({metric_std:.3f})' print(eiv_results_string) print(offset * '_' + 70 * '_') diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py index d694558ba35333831660c26edab47bc0d3d014f1..e72dc1180fd7bcd041a58ac5ff895524e7198457 100644 --- a/Experiments/evaluate_metrics.py +++ b/Experiments/evaluate_metrics.py @@ -59,15 +59,17 @@ except KeyError: device = torch.device('cpu') -def collect_metrics(x,y, seed=0, +def collect_metrics(x_y_pairs, seed=0, noneiv_number_of_draws=100, eiv_number_of_draws=[100,5], decouple_dimensions=False, device=device, scale_outputs=scale_outputs): """ Compute various metrics for EiV and non-EiV. Will be returned as dictionaries. - :param x: A torch.tensor, taken as input - :param y: A torch.tensor, taken as output + :param x_y_pairs: A tuple of either the shape (None,None,x,y) or + (x_true,y_true,x,y) containing torch.tensor or None. x and y are + considered as input and corresponding label. If the first two components + are not None, they are considered to be the unnoisy counterparts. :param seed: Integer. The seed used for loading, defaults to 0. :param noneiv_number_of_draws: Number of draws for non-EiV model for sampling from the posterior predictive. Defaults to 100. @@ -82,8 +84,13 @@ def collect_metrics(x,y, seed=0, the log-dens to make them comparable with the literature. :returns: Dictionaries noneiv_metrics, eiv_metrics """ + true_x, true_y, x, y = x_y_pairs x,y = x.to(device), y.to(device) - + if true_x is not None: + assert true_y is not None + true_x,true_y = true_x.to(device), true_y.to(device) + else: + assert true_y is None # non-EiV noneiv_metrics = {} @@ -124,7 +131,15 @@ def collect_metrics(x,y, seed=0, noneiv_metrics['coverage_normalized'],_ =\ epistemic_coverage(not_averaged_predictions, y, normalize_errors=True) noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y) - + + # metrics that need a ground truth + if true_x is not None: + noneiv_metrics['true_coverage_numerical'],\ + noneiv_metrics['true_coverage_theory'] =\ + epistemic_coverage(not_averaged_predictions, true_y, + normalize_errors=False, noisy_y=False) + true_res = true_y - noneiv_mean + noneiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2)) # NLL @@ -186,6 +201,15 @@ def collect_metrics(x,y, seed=0, epistemic_coverage(not_averaged_predictions, y, normalize_errors=True) eiv_metrics['res_std' ]= normalized_std(not_averaged_predictions, y) + # metrics that need a ground truth + if true_x is not None: + eiv_metrics['true_coverage_numerical'],\ + eiv_metrics['true_coverage_theory'] =\ + epistemic_coverage(not_averaged_predictions, true_y, + normalize_errors=False, noisy_y=False) + + true_res = true_y - eiv_mean + eiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2)) # NLL if scale_outputs: @@ -208,29 +232,47 @@ def collect_metrics(x,y, seed=0, return noneiv_metrics, eiv_metrics -collection_keys = ['rmse','logdens','bias','coverage_numerical', - 'coverage_theory','coverage_normalized','res_std'] noneiv_metrics_collection = {} eiv_metrics_collection = {} -for key in collection_keys: - noneiv_metrics_collection[key] = [] - eiv_metrics_collection[key] = [] +collection_keys = [] num_test_epochs = 10 assert noneiv_conf_dict["seed_range"] == eiv_conf_dict["seed_range"] seed_list = range(noneiv_conf_dict["seed_range"][0], noneiv_conf_dict["seed_range"][1]) max_batch_number = 2 for seed in tqdm(seed_list): - train_data, test_data = load_data(seed=seed) - test_dataloader = DataLoader(test_data, + try: + train_data, test_data, true_train_data, true_test_data \ + = load_data(seed=seed, return_ground_truth=True) + except TypeError: + train_data, test_data = load_data(seed=seed) + true_train_data, true_test_data = None, None + if true_test_data is None: + test_dataloader = DataLoader(test_data, batch_size=int(np.min((len(test_data), 800))), shuffle=True) + else: + test_dataloader = DataLoader(true_test_data, + batch_size=int(np.min((len(true_test_data), 800))), shuffle=True) for i in tqdm(range(num_test_epochs)): - for j, (x,y) in enumerate(test_dataloader): + for j, x_y_pairs in enumerate(test_dataloader): if j > max_batch_number: break - - noneiv_metrics, eiv_metrics = collect_metrics(x,y, seed=seed) + # fill in ground truth with None, if not existent + if true_test_data is None: + x_y_pairs = (None, None, *x_y_pairs) + # should contain (true_x,true_y,x,y) or (None,None,x,y) + assert len(x_y_pairs) == 4 + noneiv_metrics, eiv_metrics = collect_metrics(x_y_pairs, + seed=seed) + if i==0 and j==0: + # fill collection keys + assert eiv_metrics.keys() == noneiv_metrics.keys() + collection_keys = list(eiv_metrics.keys()) + for key in collection_keys: + noneiv_metrics_collection[key] = [] + eiv_metrics_collection[key] = [] + # collect results for key in collection_keys: noneiv_metrics_collection[key].append(noneiv_metrics[key]) eiv_metrics_collection[key].append(eiv_metrics[key]) diff --git a/Experiments/train_eiv.py b/Experiments/train_eiv.py index 68229469c167fc934a06bbf61e7c5673a3005b12..116408f467339a8bcef4e1b63c8b7896f693c87b 100644 --- a/Experiments/train_eiv.py +++ b/Experiments/train_eiv.py @@ -19,7 +19,7 @@ from EIVTrainingRoutines import train_and_store, loss_functions # read in data via --data option parser = argparse.ArgumentParser() -parser.add_argument("--data", help="Loads data", default='california') +parser.add_argument("--data", help="Loads data", default='linear') parser.add_argument("--no-autoindent", help="", action="store_true") # to avoid conflics in IPython args = parser.parse_args() diff --git a/Experiments/train_noneiv.py b/Experiments/train_noneiv.py index ee5687b268d762aeadac294ebdd6e43c37a25149..a585ce62db5287d8972210fc7604656678138440 100644 --- a/Experiments/train_noneiv.py +++ b/Experiments/train_noneiv.py @@ -19,7 +19,7 @@ from EIVTrainingRoutines import train_and_store, loss_functions # read in data via --data option parser = argparse.ArgumentParser() -parser.add_argument("--data", help="Loads data", default='california') +parser.add_argument("--data", help="Loads data", default='linear') parser.add_argument("--no-autoindent", help="", action="store_true") # to avoid conflics in IPython args = parser.parse_args()