From e2c57f2dbe9c6184873739cc965c23bf8fd0a077 Mon Sep 17 00:00:00 2001 From: Joerg Martin <joerg.martin@ptb.de> Date: Wed, 2 Feb 2022 11:16:33 +0100 Subject: [PATCH] Introduced normalize for normalized data --- EIVPackage/EIVData/cubic.py | 14 ++++++---- EIVPackage/EIVData/linear.py | 14 ++++++---- EIVPackage/EIVData/quadratic.py | 14 ++++++---- EIVPackage/EIVData/repeated_sampling.py | 14 ++++++---- EIVPackage/EIVData/sine.py | 14 ++++++---- Experiments/configurations/eiv_cubic.json | 1 + Experiments/configurations/eiv_linear.json | 1 + Experiments/configurations/eiv_quadratic.json | 1 + Experiments/configurations/eiv_sine.json | 1 + Experiments/configurations/noneiv_cubic.json | 1 + Experiments/configurations/noneiv_linear.json | 1 + .../configurations/noneiv_quadratic.json | 1 + Experiments/configurations/noneiv_sine.json | 1 + Experiments/evaluate_metrics.py | 22 +++++++++++----- Experiments/plot_coverage.py | 15 ++++++++--- Experiments/plot_prediction.py | 26 ++++++++++++++----- Experiments/train_eiv.py | 7 ++++- 17 files changed, 105 insertions(+), 43 deletions(-) diff --git a/EIVPackage/EIVData/cubic.py b/EIVPackage/EIVData/cubic.py index c4a0654..6b11da7 100644 --- a/EIVPackage/EIVData/cubic.py +++ b/EIVPackage/EIVData/cubic.py @@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, seed_list=seeds[1:3], normalize=normalize, return_normalization=True) - def normalized_func(x): - unnormalized_x = unnormalize_tensor(x, normalization_list[0]) - y = func(unnormalized_x) - normalized_y = normalize_tensor(y, normalization_list[1]) - return normalized_y + if normalize: + def normalized_func(x): + unnormalized_x = unnormalize_tensor(x, normalization_list[0]) + y = func(unnormalized_x) + normalized_y = normalize_tensor(y, normalization_list[1]) + return normalized_y + else: + def normalized_func(x): + return func(x) dataset_len = noisy_x.shape[0] # shuffle via seed diff --git a/EIVPackage/EIVData/linear.py b/EIVPackage/EIVData/linear.py index 499e9d1..dc2959d 100644 --- a/EIVPackage/EIVData/linear.py +++ b/EIVPackage/EIVData/linear.py @@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, seed_list=seeds[1:3], normalize=normalize, return_normalization=True) - def normalized_func(x): - unnormalized_x = unnormalize_tensor(x, normalization_list[0]) - y = func(unnormalized_x) - normalized_y = normalize_tensor(y, normalization_list[1]) - return normalized_y + if normalize: + def normalized_func(x): + unnormalized_x = unnormalize_tensor(x, normalization_list[0]) + y = func(unnormalized_x) + normalized_y = normalize_tensor(y, normalization_list[1]) + return normalized_y + else: + def normalized_func(x): + return func(x) dataset_len = noisy_x.shape[0] # shuffle via seed diff --git a/EIVPackage/EIVData/quadratic.py b/EIVPackage/EIVData/quadratic.py index 13ab3f1..27e83d6 100644 --- a/EIVPackage/EIVData/quadratic.py +++ b/EIVPackage/EIVData/quadratic.py @@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, seed_list=seeds[1:3], normalize=normalize, return_normalization=True) - def normalized_func(x): - unnormalized_x = unnormalize_tensor(x, normalization_list[0]) - y = func(unnormalized_x) - normalized_y = normalize_tensor(y, normalization_list[1]) - return normalized_y + if normalize: + def normalized_func(x): + unnormalized_x = unnormalize_tensor(x, normalization_list[0]) + y = func(unnormalized_x) + normalized_y = normalize_tensor(y, normalization_list[1]) + return normalized_y + else: + def normalized_func(x): + return func(x) dataset_len = noisy_x.shape[0] # shuffle via seed diff --git a/EIVPackage/EIVData/repeated_sampling.py b/EIVPackage/EIVData/repeated_sampling.py index f6339df..16f27ec 100644 --- a/EIVPackage/EIVData/repeated_sampling.py +++ b/EIVPackage/EIVData/repeated_sampling.py @@ -78,11 +78,15 @@ class repeated_sampling(): normalize=normalize, normalization_list=[full_noisy_x, full_noisy_y], return_normalization=False) # same normalization - def normalized_func(x): - unnormalized_x = unnormalize_tensor(x, normalization_list[0]) - y = self.func(unnormalized_x) - normalized_y = normalize_tensor(y, normalization_list[1]) - return normalized_y + if normalize: + def normalized_func(x): + unnormalized_x = unnormalize_tensor(x, normalization_list[0]) + y = self.func(unnormalized_x) + normalized_y = normalize_tensor(y, normalization_list[1]) + return normalized_y + else: + def normalized_func(x): + return self.func(x) trainset = TensorDataset(noisy_train_x, noisy_train_y) testset = TensorDataset(noisy_test_x, noisy_test_y) true_trainset = TensorDataset(true_train_x, true_train_y, diff --git a/EIVPackage/EIVData/sine.py b/EIVPackage/EIVData/sine.py index a420654..308a308 100644 --- a/EIVPackage/EIVData/sine.py +++ b/EIVPackage/EIVData/sine.py @@ -54,11 +54,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, seed_list=seeds[1:3], normalize=normalize, return_normalization=True) - def normalized_func(x): - unnormalized_x = unnormalize_tensor(x, normalization_list[0]) - y = func(unnormalized_x) - normalized_y = normalize_tensor(y, normalization_list[1]) - return normalized_y + if normalize: + def normalized_func(x): + unnormalized_x = unnormalize_tensor(x, normalization_list[0]) + y = func(unnormalized_x) + normalized_y = normalize_tensor(y, normalization_list[1]) + return normalized_y + else: + def normalized_func(x): + return func(x) dataset_len = noisy_x.shape[0] # shuffle via seed diff --git a/Experiments/configurations/eiv_cubic.json b/Experiments/configurations/eiv_cubic.json index 6b81fef..9f9f7d8 100644 --- a/Experiments/configurations/eiv_cubic.json +++ b/Experiments/configurations/eiv_cubic.json @@ -1,6 +1,7 @@ { "long_dataname": "cubic", "short_dataname": "cubic", + "normalize": false, "lr": 1e-3, "batch_size": 64, "test_batch_size": 800, diff --git a/Experiments/configurations/eiv_linear.json b/Experiments/configurations/eiv_linear.json index a83fc15..5754733 100644 --- a/Experiments/configurations/eiv_linear.json +++ b/Experiments/configurations/eiv_linear.json @@ -1,6 +1,7 @@ { "long_dataname": "linear", "short_dataname": "linear", + "normalize": false, "lr": 1e-3, "batch_size": 64, "test_batch_size": 800, diff --git a/Experiments/configurations/eiv_quadratic.json b/Experiments/configurations/eiv_quadratic.json index 9b5c52e..7fd2bad 100644 --- a/Experiments/configurations/eiv_quadratic.json +++ b/Experiments/configurations/eiv_quadratic.json @@ -1,6 +1,7 @@ { "long_dataname": "quadratic", "short_dataname": "quadratic", + "normalize": false, "lr": 1e-3, "batch_size": 64, "test_batch_size": 800, diff --git a/Experiments/configurations/eiv_sine.json b/Experiments/configurations/eiv_sine.json index b632e1d..6d0a36d 100644 --- a/Experiments/configurations/eiv_sine.json +++ b/Experiments/configurations/eiv_sine.json @@ -1,6 +1,7 @@ { "long_dataname": "sine", "short_dataname": "sine", + "normalize": false, "lr": 1e-3, "batch_size": 64, "test_batch_size": 800, diff --git a/Experiments/configurations/noneiv_cubic.json b/Experiments/configurations/noneiv_cubic.json index 00fdce9..06a1cf3 100644 --- a/Experiments/configurations/noneiv_cubic.json +++ b/Experiments/configurations/noneiv_cubic.json @@ -1,6 +1,7 @@ { "long_dataname": "cubic", "short_dataname": "cubic", + "normalize": false, "lr": 1e-3, "batch_size": 64, "test_batch_size": 800, diff --git a/Experiments/configurations/noneiv_linear.json b/Experiments/configurations/noneiv_linear.json index 1b2110a..7e77536 100644 --- a/Experiments/configurations/noneiv_linear.json +++ b/Experiments/configurations/noneiv_linear.json @@ -1,6 +1,7 @@ { "long_dataname": "linear", "short_dataname": "linear", + "normalize": false, "lr": 1e-3, "batch_size": 64, "test_batch_size": 800, diff --git a/Experiments/configurations/noneiv_quadratic.json b/Experiments/configurations/noneiv_quadratic.json index 573d787..699cfa7 100644 --- a/Experiments/configurations/noneiv_quadratic.json +++ b/Experiments/configurations/noneiv_quadratic.json @@ -1,6 +1,7 @@ { "long_dataname": "quadratic", "short_dataname": "quadratic", + "normalize": false, "lr": 1e-3, "batch_size": 64, "test_batch_size": 800, diff --git a/Experiments/configurations/noneiv_sine.json b/Experiments/configurations/noneiv_sine.json index c94c589..b7f9e15 100644 --- a/Experiments/configurations/noneiv_sine.json +++ b/Experiments/configurations/noneiv_sine.json @@ -1,6 +1,7 @@ { "long_dataname": "sine", "short_dataname": "sine", + "normalize": false, "lr": 1e-3, "batch_size": 64, "test_batch_size": 800, diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py index bf052e8..6912c01 100644 --- a/Experiments/evaluate_metrics.py +++ b/Experiments/evaluate_metrics.py @@ -31,6 +31,12 @@ with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as conf_file: eiv_conf_dict = json.load(conf_file) with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as conf_file: noneiv_conf_dict = json.load(conf_file) +try: + normalize = eiv_conf_dict['normalize'] + assert normalize == noneiv_conf_dict['normalize'] +except KeyError: + # normalize by default + normalize = True long_dataname = eiv_conf_dict["long_dataname"] short_dataname = eiv_conf_dict["short_dataname"] @@ -40,7 +46,7 @@ print(f"Evaluating {long_dataname}") scale_outputs = False load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data -train_data, test_data = load_data() +train_data, test_data = load_data(normalize=normalize) input_dim = train_data[0][0].numel() output_dim = train_data[0][1].numel() @@ -240,7 +246,8 @@ def collect_metrics(x_y_pairs, seed=0, def collect_full_seed_range_metrics(load_data, seed_range,test_batch_size = 100, test_samples = 10, noneiv_number_of_draws=100, eiv_number_of_draws=[100,5], device=device, - scale_outputs=scale_outputs): + scale_outputs=scale_outputs, + normalize=normalize): """ Collect metrics that need all seeds for their computation. :param load_data: load_data map should take seed as an argument and, @@ -257,6 +264,7 @@ def collect_full_seed_range_metrics(load_data, :param device: The torch.device to use :param scale_output: Boolean, scale the outputs for some metrics. Defaults to False. + :param normalize: Boolean, whether to normalize the data :returns: Dictionaries noneiv_metrics, eiv_metrics """ noneiv_metrics = {} @@ -267,9 +275,10 @@ def collect_full_seed_range_metrics(load_data, # load data according toseed try: train_data, test_data, true_train_data, true_test_data \ - = load_data(seed=seed, return_ground_truth=True) + = load_data(seed=seed, return_ground_truth=True, + normalize=normalize) except TypeError: - train_data, test_data = load_data(seed=seed) + train_data, test_data = load_data(seed=seed, normalize=normalize) true_train_data, true_test_data = None, None ## Compute x-dependant bias @@ -460,9 +469,10 @@ number_of_test_samples = 2 for seed in tqdm(seed_list): try: train_data, test_data, true_train_data, true_test_data \ - = load_data(seed=seed, return_ground_truth=True) + = load_data(seed=seed, return_ground_truth=True, + normalize=normalize) except TypeError: - train_data, test_data = load_data(seed=seed) + train_data, test_data = load_data(seed=seed, normalize=normalize) true_train_data, true_test_data = None, None if true_test_data is None: test_dataloader = DataLoader(test_data, diff --git a/Experiments/plot_coverage.py b/Experiments/plot_coverage.py index 455d283..d8f5485 100644 --- a/Experiments/plot_coverage.py +++ b/Experiments/plot_coverage.py @@ -48,6 +48,11 @@ def compute_coverages(data, eiv, number_of_draws, long_dataname = conf_dict["long_dataname"] short_dataname = conf_dict["short_dataname"] + try: + normalize = conf_dict['normalize'] + except KeyError: + # normalize by default + normalize = True load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data @@ -73,12 +78,13 @@ def compute_coverages(data, eiv, number_of_draws, # test whether there is a ground truth try: train_data, _, _,_ \ - = load_data(seed=0, return_ground_truth=True) + = load_data(seed=0, return_ground_truth=True, + normalize=normalize) except TypeError: # if not, end function return None,None else: - train_data, _ = load_data() + train_data, _ = load_data(normalize=normalize) print(f"Computing {'EiV' if eiv else 'non-EiV'} coverage for {long_dataname}") @@ -145,14 +151,15 @@ def compute_coverages(data, eiv, number_of_draws, """ for seed in seed_list: if not use_ground_truth: - _, test_data = load_data(seed=seed) + _, test_data = load_data(seed=seed, normalize=normalize) test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True) yield test_dataloader else: _, _, _, true_test =\ - load_data(seed=seed, return_ground_truth=True) + load_data(seed=seed, return_ground_truth=True, + normalize=normalize) # take noisy x but unnoisy y cut_true_test = VerticalCut(true_test, components_to_pick=[2,1]) diff --git a/Experiments/plot_prediction.py b/Experiments/plot_prediction.py index 61fac8d..6702506 100644 --- a/Experiments/plot_prediction.py +++ b/Experiments/plot_prediction.py @@ -98,7 +98,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws, # determine dimensions - _, test_data = load_data(seed=plotting_seed, return_ground_truth=False) + _, test_data, normalized_func = load_data(seed=plotting_seed, return_ground_truth=False, + return_normalized_func=True) input_dim = test_data[0][0].numel() output_dim = test_data[0][1].numel() assert output_dim == 1 @@ -209,7 +210,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws, data_list = ['sine'] # short datanames -list_x_range = [torch.linspace(0.0,1.0, 50)] +#TODO: Check which ranges are "correct" +list_x_range = [torch.linspace(-2.5,2.5, 50)] list_color = [('red','blue')] list_number_of_draws = [((100,5), 100)] for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list, @@ -229,11 +231,21 @@ for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list, plt.figure(i) plt.clf() x_values, y_values = eiv_plotting_dictionary['range_points'] - plt.plot(x_values, y_values,'-', color='k') - # plt.plot(x_values, eiv_plotting_dictionary['prediction'],'-', - # color=color[0]) - # plt.plot(x_values, noneiv_plotting_dictionary['prediction'],'-', - # color=color[1]) + plt.plot(x_values.flatten(), y_values.flatten(),'-', color='k') + eiv_pred = eiv_plotting_dictionary['prediction'] + eiv_unc = eiv_plotting_dictionary['uncertainty'] + plt.plot(x_values, eiv_pred,'-', + color=color[0]) + plt.fill_between(x_values.flatten(), eiv_pred-k * eiv_unc, + eiv_pred + k * eiv_unc, + color=color[0], alpha=0.5) + noneiv_pred = noneiv_plotting_dictionary['prediction'] + noneiv_unc = noneiv_plotting_dictionary['uncertainty'] + plt.plot(x_values.flatten(), noneiv_pred,'-', + color=color[1]) + plt.fill_between(x_values.flatten(), noneiv_pred-k * noneiv_unc, + noneiv_pred + k * noneiv_unc, + color=color[1], alpha=0.5) else: # multidimensional handling not included yet pass diff --git a/Experiments/train_eiv.py b/Experiments/train_eiv.py index 116408f..3ecccea 100644 --- a/Experiments/train_eiv.py +++ b/Experiments/train_eiv.py @@ -50,6 +50,11 @@ fixed_std_x = conf_dict['fixed_std_x'] gamma = conf_dict["gamma"] hidden_layers = conf_dict["hidden_layers"] seed_range = conf_dict['seed_range'] +try: + normalize = conf_dict['normalize'] +except KeyError: + # normalize by default + normalize = True print(f"Training on {long_dataname} data") @@ -199,7 +204,7 @@ def train_on_data(init_std_y, seed): set_seeds(seed) # load Datasets train_data, test_data = load_data(seed=seed, splitting_part=0.8, - normalize=True) + normalize=normalize) # make dataloaders train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True) -- GitLab