From e2c57f2dbe9c6184873739cc965c23bf8fd0a077 Mon Sep 17 00:00:00 2001
From: Joerg Martin <joerg.martin@ptb.de>
Date: Wed, 2 Feb 2022 11:16:33 +0100
Subject: [PATCH] Introduced normalize for normalized data

---
 EIVPackage/EIVData/cubic.py                   | 14 ++++++----
 EIVPackage/EIVData/linear.py                  | 14 ++++++----
 EIVPackage/EIVData/quadratic.py               | 14 ++++++----
 EIVPackage/EIVData/repeated_sampling.py       | 14 ++++++----
 EIVPackage/EIVData/sine.py                    | 14 ++++++----
 Experiments/configurations/eiv_cubic.json     |  1 +
 Experiments/configurations/eiv_linear.json    |  1 +
 Experiments/configurations/eiv_quadratic.json |  1 +
 Experiments/configurations/eiv_sine.json      |  1 +
 Experiments/configurations/noneiv_cubic.json  |  1 +
 Experiments/configurations/noneiv_linear.json |  1 +
 .../configurations/noneiv_quadratic.json      |  1 +
 Experiments/configurations/noneiv_sine.json   |  1 +
 Experiments/evaluate_metrics.py               | 22 +++++++++++-----
 Experiments/plot_coverage.py                  | 15 ++++++++---
 Experiments/plot_prediction.py                | 26 ++++++++++++++-----
 Experiments/train_eiv.py                      |  7 ++++-
 17 files changed, 105 insertions(+), 43 deletions(-)

diff --git a/EIVPackage/EIVData/cubic.py b/EIVPackage/EIVData/cubic.py
index c4a0654..6b11da7 100644
--- a/EIVPackage/EIVData/cubic.py
+++ b/EIVPackage/EIVData/cubic.py
@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
             seed_list=seeds[1:3],
             normalize=normalize,
             return_normalization=True)
-    def normalized_func(x):
-        unnormalized_x = unnormalize_tensor(x, normalization_list[0])
-        y = func(unnormalized_x)
-        normalized_y = normalize_tensor(y, normalization_list[1])
-        return normalized_y
+    if normalize:
+        def normalized_func(x):
+            unnormalized_x = unnormalize_tensor(x, normalization_list[0])
+            y = func(unnormalized_x)
+            normalized_y = normalize_tensor(y, normalization_list[1])
+            return normalized_y
+    else:
+        def normalized_func(x):
+            return func(x)
     dataset_len = noisy_x.shape[0]
 
     # shuffle via seed
diff --git a/EIVPackage/EIVData/linear.py b/EIVPackage/EIVData/linear.py
index 499e9d1..dc2959d 100644
--- a/EIVPackage/EIVData/linear.py
+++ b/EIVPackage/EIVData/linear.py
@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
             seed_list=seeds[1:3],
             normalize=normalize,
             return_normalization=True)
-    def normalized_func(x):
-        unnormalized_x = unnormalize_tensor(x, normalization_list[0])
-        y = func(unnormalized_x)
-        normalized_y = normalize_tensor(y, normalization_list[1])
-        return normalized_y
+    if normalize:
+        def normalized_func(x):
+            unnormalized_x = unnormalize_tensor(x, normalization_list[0])
+            y = func(unnormalized_x)
+            normalized_y = normalize_tensor(y, normalization_list[1])
+            return normalized_y
+    else:
+        def normalized_func(x):
+            return func(x)
     dataset_len = noisy_x.shape[0]
 
     # shuffle via seed
diff --git a/EIVPackage/EIVData/quadratic.py b/EIVPackage/EIVData/quadratic.py
index 13ab3f1..27e83d6 100644
--- a/EIVPackage/EIVData/quadratic.py
+++ b/EIVPackage/EIVData/quadratic.py
@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
             seed_list=seeds[1:3],
             normalize=normalize,
             return_normalization=True)
-    def normalized_func(x):
-        unnormalized_x = unnormalize_tensor(x, normalization_list[0])
-        y = func(unnormalized_x)
-        normalized_y = normalize_tensor(y, normalization_list[1])
-        return normalized_y
+    if normalize:
+        def normalized_func(x):
+            unnormalized_x = unnormalize_tensor(x, normalization_list[0])
+            y = func(unnormalized_x)
+            normalized_y = normalize_tensor(y, normalization_list[1])
+            return normalized_y
+    else:
+        def normalized_func(x):
+            return func(x)
     dataset_len = noisy_x.shape[0]
 
     # shuffle via seed
diff --git a/EIVPackage/EIVData/repeated_sampling.py b/EIVPackage/EIVData/repeated_sampling.py
index f6339df..16f27ec 100644
--- a/EIVPackage/EIVData/repeated_sampling.py
+++ b/EIVPackage/EIVData/repeated_sampling.py
@@ -78,11 +78,15 @@ class repeated_sampling():
                         normalize=normalize,
                         normalization_list=[full_noisy_x, full_noisy_y],
                         return_normalization=False) # same normalization
-        def normalized_func(x):
-            unnormalized_x = unnormalize_tensor(x, normalization_list[0])
-            y = self.func(unnormalized_x)
-            normalized_y = normalize_tensor(y, normalization_list[1])
-            return normalized_y
+        if normalize:
+            def normalized_func(x):
+                unnormalized_x = unnormalize_tensor(x, normalization_list[0])
+                y = self.func(unnormalized_x)
+                normalized_y = normalize_tensor(y, normalization_list[1])
+                return normalized_y
+        else:
+            def normalized_func(x):
+                return self.func(x)
         trainset = TensorDataset(noisy_train_x, noisy_train_y)
         testset = TensorDataset(noisy_test_x, noisy_test_y)
         true_trainset = TensorDataset(true_train_x, true_train_y,
diff --git a/EIVPackage/EIVData/sine.py b/EIVPackage/EIVData/sine.py
index a420654..308a308 100644
--- a/EIVPackage/EIVData/sine.py
+++ b/EIVPackage/EIVData/sine.py
@@ -54,11 +54,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
             seed_list=seeds[1:3],
             normalize=normalize,
             return_normalization=True)
-    def normalized_func(x):
-        unnormalized_x = unnormalize_tensor(x, normalization_list[0])
-        y = func(unnormalized_x)
-        normalized_y = normalize_tensor(y, normalization_list[1])
-        return normalized_y
+    if normalize:
+        def normalized_func(x):
+            unnormalized_x = unnormalize_tensor(x, normalization_list[0])
+            y = func(unnormalized_x)
+            normalized_y = normalize_tensor(y, normalization_list[1])
+            return normalized_y
+    else:
+        def normalized_func(x):
+            return func(x)
     dataset_len = noisy_x.shape[0]
 
     # shuffle via seed
diff --git a/Experiments/configurations/eiv_cubic.json b/Experiments/configurations/eiv_cubic.json
index 6b81fef..9f9f7d8 100644
--- a/Experiments/configurations/eiv_cubic.json
+++ b/Experiments/configurations/eiv_cubic.json
@@ -1,6 +1,7 @@
 {
 	"long_dataname": "cubic",
 	"short_dataname": "cubic",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,
diff --git a/Experiments/configurations/eiv_linear.json b/Experiments/configurations/eiv_linear.json
index a83fc15..5754733 100644
--- a/Experiments/configurations/eiv_linear.json
+++ b/Experiments/configurations/eiv_linear.json
@@ -1,6 +1,7 @@
 {
 	"long_dataname": "linear",
 	"short_dataname": "linear",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,
diff --git a/Experiments/configurations/eiv_quadratic.json b/Experiments/configurations/eiv_quadratic.json
index 9b5c52e..7fd2bad 100644
--- a/Experiments/configurations/eiv_quadratic.json
+++ b/Experiments/configurations/eiv_quadratic.json
@@ -1,6 +1,7 @@
 {
 	"long_dataname": "quadratic",
 	"short_dataname": "quadratic",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,
diff --git a/Experiments/configurations/eiv_sine.json b/Experiments/configurations/eiv_sine.json
index b632e1d..6d0a36d 100644
--- a/Experiments/configurations/eiv_sine.json
+++ b/Experiments/configurations/eiv_sine.json
@@ -1,6 +1,7 @@
 {
 	"long_dataname": "sine",
 	"short_dataname": "sine",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,
diff --git a/Experiments/configurations/noneiv_cubic.json b/Experiments/configurations/noneiv_cubic.json
index 00fdce9..06a1cf3 100644
--- a/Experiments/configurations/noneiv_cubic.json
+++ b/Experiments/configurations/noneiv_cubic.json
@@ -1,6 +1,7 @@
 {
 	"long_dataname": "cubic",
 	"short_dataname": "cubic",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,
diff --git a/Experiments/configurations/noneiv_linear.json b/Experiments/configurations/noneiv_linear.json
index 1b2110a..7e77536 100644
--- a/Experiments/configurations/noneiv_linear.json
+++ b/Experiments/configurations/noneiv_linear.json
@@ -1,6 +1,7 @@
 {
 	"long_dataname": "linear",
 	"short_dataname": "linear",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,
diff --git a/Experiments/configurations/noneiv_quadratic.json b/Experiments/configurations/noneiv_quadratic.json
index 573d787..699cfa7 100644
--- a/Experiments/configurations/noneiv_quadratic.json
+++ b/Experiments/configurations/noneiv_quadratic.json
@@ -1,6 +1,7 @@
 {
 	"long_dataname": "quadratic",
 	"short_dataname": "quadratic",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,
diff --git a/Experiments/configurations/noneiv_sine.json b/Experiments/configurations/noneiv_sine.json
index c94c589..b7f9e15 100644
--- a/Experiments/configurations/noneiv_sine.json
+++ b/Experiments/configurations/noneiv_sine.json
@@ -1,6 +1,7 @@
 {
 	"long_dataname": "sine",
 	"short_dataname": "sine",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,
diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py
index bf052e8..6912c01 100644
--- a/Experiments/evaluate_metrics.py
+++ b/Experiments/evaluate_metrics.py
@@ -31,6 +31,12 @@ with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as conf_file:
     eiv_conf_dict = json.load(conf_file)
 with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as conf_file:
     noneiv_conf_dict = json.load(conf_file)
+try:
+    normalize = eiv_conf_dict['normalize']
+    assert normalize == noneiv_conf_dict['normalize']
+except KeyError:
+    # normalize by default
+    normalize = True
 
 long_dataname = eiv_conf_dict["long_dataname"]
 short_dataname = eiv_conf_dict["short_dataname"]
@@ -40,7 +46,7 @@ print(f"Evaluating {long_dataname}")
 scale_outputs = False 
 load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
 
-train_data, test_data = load_data()
+train_data, test_data = load_data(normalize=normalize)
 input_dim = train_data[0][0].numel()
 output_dim = train_data[0][1].numel()
 
@@ -240,7 +246,8 @@ def collect_metrics(x_y_pairs, seed=0,
 def collect_full_seed_range_metrics(load_data,
         seed_range,test_batch_size = 100, test_samples = 10,
         noneiv_number_of_draws=100, eiv_number_of_draws=[100,5], device=device,
-        scale_outputs=scale_outputs):
+        scale_outputs=scale_outputs,
+        normalize=normalize):
     """
     Collect metrics that need all seeds for their computation.
     :param load_data: load_data map should take seed as an argument and,
@@ -257,6 +264,7 @@ def collect_full_seed_range_metrics(load_data,
     :param device: The torch.device to use
     :param scale_output: Boolean, scale the outputs for some metrics. Defaults
     to False.
+    :param normalize: Boolean, whether to normalize the data
     :returns: Dictionaries noneiv_metrics, eiv_metrics
     """
     noneiv_metrics = {}
@@ -267,9 +275,10 @@ def collect_full_seed_range_metrics(load_data,
         # load data according toseed
         try:
             train_data, test_data, true_train_data, true_test_data \
-                    = load_data(seed=seed, return_ground_truth=True)
+                    = load_data(seed=seed, return_ground_truth=True,
+                            normalize=normalize)
         except TypeError:
-            train_data, test_data = load_data(seed=seed)
+            train_data, test_data = load_data(seed=seed, normalize=normalize)
             true_train_data, true_test_data = None, None
 
         ## Compute x-dependant bias
@@ -460,9 +469,10 @@ number_of_test_samples = 2
 for seed in tqdm(seed_list):
     try:
         train_data, test_data, true_train_data, true_test_data \
-                = load_data(seed=seed, return_ground_truth=True)
+                = load_data(seed=seed, return_ground_truth=True,
+                        normalize=normalize)
     except TypeError:
-        train_data, test_data = load_data(seed=seed)
+        train_data, test_data = load_data(seed=seed, normalize=normalize)
         true_train_data, true_test_data = None, None
     if true_test_data is None:
         test_dataloader = DataLoader(test_data,
diff --git a/Experiments/plot_coverage.py b/Experiments/plot_coverage.py
index 455d283..d8f5485 100644
--- a/Experiments/plot_coverage.py
+++ b/Experiments/plot_coverage.py
@@ -48,6 +48,11 @@ def compute_coverages(data, eiv, number_of_draws,
 
     long_dataname = conf_dict["long_dataname"]
     short_dataname = conf_dict["short_dataname"]
+    try:
+        normalize = conf_dict['normalize']
+    except KeyError:
+        # normalize by default
+        normalize = True
 
 
     load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
@@ -73,12 +78,13 @@ def compute_coverages(data, eiv, number_of_draws,
         # test whether there is a ground truth
         try:
             train_data, _, _,_  \
-                    = load_data(seed=0, return_ground_truth=True)
+                    = load_data(seed=0, return_ground_truth=True,
+                            normalize=normalize)
         except TypeError:
         # if not, end function
             return None,None
     else:
-        train_data, _ = load_data()
+        train_data, _ = load_data(normalize=normalize)
 
     print(f"Computing {'EiV' if eiv else 'non-EiV'} coverage for {long_dataname}")
 
@@ -145,14 +151,15 @@ def compute_coverages(data, eiv, number_of_draws,
         """
         for seed in seed_list:
             if not use_ground_truth:
-                _, test_data = load_data(seed=seed)
+                _, test_data = load_data(seed=seed, normalize=normalize)
                 test_dataloader = DataLoader(test_data, 
                         batch_size=batch_size,
                         shuffle=True)
                 yield test_dataloader
             else:
                 _, _, _, true_test =\
-                        load_data(seed=seed, return_ground_truth=True)
+                        load_data(seed=seed, return_ground_truth=True,
+                                normalize=normalize)
                 # take noisy x but unnoisy y
                 cut_true_test = VerticalCut(true_test,
                         components_to_pick=[2,1])
diff --git a/Experiments/plot_prediction.py b/Experiments/plot_prediction.py
index 61fac8d..6702506 100644
--- a/Experiments/plot_prediction.py
+++ b/Experiments/plot_prediction.py
@@ -98,7 +98,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
 
 
     # determine dimensions
-    _, test_data = load_data(seed=plotting_seed, return_ground_truth=False)
+    _, test_data, normalized_func = load_data(seed=plotting_seed, return_ground_truth=False,
+            return_normalized_func=True)
     input_dim = test_data[0][0].numel()
     output_dim = test_data[0][1].numel()
     assert output_dim == 1
@@ -209,7 +210,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
 
 
 data_list = ['sine'] # short datanames
-list_x_range = [torch.linspace(0.0,1.0, 50)]
+#TODO: Check which ranges are "correct"
+list_x_range = [torch.linspace(-2.5,2.5, 50)]
 list_color = [('red','blue')]
 list_number_of_draws = [((100,5), 100)]
 for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list,
@@ -229,11 +231,21 @@ for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list,
         plt.figure(i)
         plt.clf()
         x_values, y_values = eiv_plotting_dictionary['range_points']
-        plt.plot(x_values, y_values,'-', color='k')
-        # plt.plot(x_values, eiv_plotting_dictionary['prediction'],'-',
-        #         color=color[0])
-        # plt.plot(x_values, noneiv_plotting_dictionary['prediction'],'-',
-        #         color=color[1])
+        plt.plot(x_values.flatten(), y_values.flatten(),'-', color='k')
+        eiv_pred = eiv_plotting_dictionary['prediction']
+        eiv_unc = eiv_plotting_dictionary['uncertainty']
+        plt.plot(x_values, eiv_pred,'-',
+                color=color[0])
+        plt.fill_between(x_values.flatten(), eiv_pred-k * eiv_unc,
+                eiv_pred + k * eiv_unc,
+                color=color[0], alpha=0.5)
+        noneiv_pred = noneiv_plotting_dictionary['prediction']
+        noneiv_unc = noneiv_plotting_dictionary['uncertainty']
+        plt.plot(x_values.flatten(), noneiv_pred,'-',
+                color=color[1])
+        plt.fill_between(x_values.flatten(), noneiv_pred-k * noneiv_unc,
+                noneiv_pred + k * noneiv_unc,
+                color=color[1], alpha=0.5)
     else:
         # multidimensional handling not included yet
         pass
diff --git a/Experiments/train_eiv.py b/Experiments/train_eiv.py
index 116408f..3ecccea 100644
--- a/Experiments/train_eiv.py
+++ b/Experiments/train_eiv.py
@@ -50,6 +50,11 @@ fixed_std_x = conf_dict['fixed_std_x']
 gamma = conf_dict["gamma"]
 hidden_layers = conf_dict["hidden_layers"]
 seed_range = conf_dict['seed_range']
+try:
+    normalize = conf_dict['normalize']
+except KeyError:
+    # normalize by default
+    normalize = True
 
 print(f"Training on {long_dataname} data")
 
@@ -199,7 +204,7 @@ def train_on_data(init_std_y, seed):
     set_seeds(seed)
     # load Datasets
     train_data, test_data = load_data(seed=seed, splitting_part=0.8,
-            normalize=True)
+            normalize=normalize)
     # make dataloaders
     train_dataloader = DataLoader(train_data, batch_size=batch_size, 
             shuffle=True)
-- 
GitLab