Introduced normalize for normalized data

e2c57f2d · Jörg Martin · f9f0119d · e2c57f2d · e2c57f2d · e2c57f2d
Commit e2c57f2d authored 3 years ago by Jörg Martin
--- a/EIVPackage/EIVData/cubic.py
+++ b/EIVPackage/EIVData/cubic.py
@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
            seed_list=seeds[1:3],
            normalize=normalize,
            return_normalization=True)
-    def normalized_func(x):
+    if normalize:
-        unnormalized_x = unnormalize_tensor(x, normalization_list[0])
+        def normalized_func(x):
-        y = func(unnormalized_x)
+            unnormalized_x = unnormalize_tensor(x, normalization_list[0])
-        normalized_y = normalize_tensor(y, normalization_list[1])
+            y = func(unnormalized_x)
-        return normalized_y
+            normalized_y = normalize_tensor(y, normalization_list[1])
+            return normalized_y
+    else:
+        def normalized_func(x):
+            return func(x)
    dataset_len = noisy_x.shape[0]
    # shuffle via seed

--- a/EIVPackage/EIVData/linear.py
+++ b/EIVPackage/EIVData/linear.py
@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
            seed_list=seeds[1:3],
            normalize=normalize,
            return_normalization=True)
-    def normalized_func(x):
+    if normalize:
-        unnormalized_x = unnormalize_tensor(x, normalization_list[0])
+        def normalized_func(x):
-        y = func(unnormalized_x)
+            unnormalized_x = unnormalize_tensor(x, normalization_list[0])
-        normalized_y = normalize_tensor(y, normalization_list[1])
+            y = func(unnormalized_x)
-        return normalized_y
+            normalized_y = normalize_tensor(y, normalization_list[1])
+            return normalized_y
+    else:
+        def normalized_func(x):
+            return func(x)
    dataset_len = noisy_x.shape[0]
    # shuffle via seed

--- a/EIVPackage/EIVData/quadratic.py
+++ b/EIVPackage/EIVData/quadratic.py
@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
            seed_list=seeds[1:3],
            normalize=normalize,
            return_normalization=True)
-    def normalized_func(x):
+    if normalize:
-        unnormalized_x = unnormalize_tensor(x, normalization_list[0])
+        def normalized_func(x):
-        y = func(unnormalized_x)
+            unnormalized_x = unnormalize_tensor(x, normalization_list[0])
-        normalized_y = normalize_tensor(y, normalization_list[1])
+            y = func(unnormalized_x)
-        return normalized_y
+            normalized_y = normalize_tensor(y, normalization_list[1])
+            return normalized_y
+    else:
+        def normalized_func(x):
+            return func(x)
    dataset_len = noisy_x.shape[0]
    # shuffle via seed

--- a/EIVPackage/EIVData/repeated_sampling.py
+++ b/EIVPackage/EIVData/repeated_sampling.py
@@ -78,11 +78,15 @@ class repeated_sampling():
                        normalize=normalize,
                        normalization_list=[full_noisy_x, full_noisy_y],
                        return_normalization=False) # same normalization
-        def normalized_func(x):
+        if normalize:
-            unnormalized_x = unnormalize_tensor(x, normalization_list[0])
+            def normalized_func(x):
-            y = self.func(unnormalized_x)
+                unnormalized_x = unnormalize_tensor(x, normalization_list[0])
-            normalized_y = normalize_tensor(y, normalization_list[1])
+                y = self.func(unnormalized_x)
-            return normalized_y
+                normalized_y = normalize_tensor(y, normalization_list[1])
+                return normalized_y
+        else:
+            def normalized_func(x):
+                return self.func(x)
        trainset = TensorDataset(noisy_train_x, noisy_train_y)
        testset = TensorDataset(noisy_test_x, noisy_test_y)
        true_trainset = TensorDataset(true_train_x, true_train_y,

--- a/EIVPackage/EIVData/sine.py
+++ b/EIVPackage/EIVData/sine.py
@@ -54,11 +54,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
            seed_list=seeds[1:3],
            normalize=normalize,
            return_normalization=True)
-    def normalized_func(x):
+    if normalize:
-        unnormalized_x = unnormalize_tensor(x, normalization_list[0])
+        def normalized_func(x):
-        y = func(unnormalized_x)
+            unnormalized_x = unnormalize_tensor(x, normalization_list[0])
-        normalized_y = normalize_tensor(y, normalization_list[1])
+            y = func(unnormalized_x)
-        return normalized_y
+            normalized_y = normalize_tensor(y, normalization_list[1])
+            return normalized_y
+    else:
+        def normalized_func(x):
+            return func(x)
    dataset_len = noisy_x.shape[0]
    # shuffle via seed

--- a/Experiments/configurations/eiv_cubic.json
+++ b/Experiments/configurations/eiv_cubic.json
 {
 	"long_dataname": "cubic",
 	"short_dataname": "cubic",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,

--- a/Experiments/configurations/eiv_linear.json
+++ b/Experiments/configurations/eiv_linear.json
 {
 	"long_dataname": "linear",
 	"short_dataname": "linear",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,

--- a/Experiments/configurations/eiv_quadratic.json
+++ b/Experiments/configurations/eiv_quadratic.json
 {
 	"long_dataname": "quadratic",
 	"short_dataname": "quadratic",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,

--- a/Experiments/configurations/eiv_sine.json
+++ b/Experiments/configurations/eiv_sine.json
 {
 	"long_dataname": "sine",
 	"short_dataname": "sine",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,

--- a/Experiments/configurations/noneiv_cubic.json
+++ b/Experiments/configurations/noneiv_cubic.json
 {
 	"long_dataname": "cubic",
 	"short_dataname": "cubic",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,

--- a/Experiments/configurations/noneiv_linear.json
+++ b/Experiments/configurations/noneiv_linear.json
 {
 	"long_dataname": "linear",
 	"short_dataname": "linear",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,

--- a/Experiments/configurations/noneiv_quadratic.json
+++ b/Experiments/configurations/noneiv_quadratic.json
 {
 	"long_dataname": "quadratic",
 	"short_dataname": "quadratic",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,

--- a/Experiments/configurations/noneiv_sine.json
+++ b/Experiments/configurations/noneiv_sine.json
 {
 	"long_dataname": "sine",
 	"short_dataname": "sine",
+	"normalize": false,
 	"lr": 1e-3,
 	"batch_size": 64,
 	"test_batch_size": 800,

--- a/Experiments/evaluate_metrics.py
+++ b/Experiments/evaluate_metrics.py
@@ -31,6 +31,12 @@ with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as conf_file:
    eiv_conf_dict = json.load(conf_file)
 with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as conf_file:
    noneiv_conf_dict = json.load(conf_file)
+try:
+    normalize = eiv_conf_dict['normalize']
+    assert normalize == noneiv_conf_dict['normalize']
+except KeyError:
+    # normalize by default
+    normalize = True
 long_dataname = eiv_conf_dict["long_dataname"]
 short_dataname = eiv_conf_dict["short_dataname"]
@@ -40,7 +46,7 @@ print(f"Evaluating {long_dataname}")
 scale_outputs = False 
 load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
-train_data, test_data = load_data()
+train_data, test_data = load_data(normalize=normalize)
 input_dim = train_data[0][0].numel()
 output_dim = train_data[0][1].numel()
@@ -240,7 +246,8 @@ def collect_metrics(x_y_pairs, seed=0,
 def collect_full_seed_range_metrics(load_data,
        seed_range,test_batch_size = 100, test_samples = 10,
        noneiv_number_of_draws=100, eiv_number_of_draws=[100,5], device=device,
-        scale_outputs=scale_outputs):
+        scale_outputs=scale_outputs,
+        normalize=normalize):
    """
    Collect metrics that need all seeds for their computation.
    :param load_data: load_data map should take seed as an argument and,
@@ -257,6 +264,7 @@ def collect_full_seed_range_metrics(load_data,
    :param device: The torch.device to use
    :param scale_output: Boolean, scale the outputs for some metrics. Defaults
    to False.
+    :param normalize: Boolean, whether to normalize the data
    :returns: Dictionaries noneiv_metrics, eiv_metrics
    """
    noneiv_metrics = {}
@@ -267,9 +275,10 @@ def collect_full_seed_range_metrics(load_data,
        # load data according toseed
        try:
            train_data, test_data, true_train_data, true_test_data \
-                    = load_data(seed=seed, return_ground_truth=True)
+                    = load_data(seed=seed, return_ground_truth=True,
+                            normalize=normalize)
        except TypeError:
-            train_data, test_data = load_data(seed=seed)
+            train_data, test_data = load_data(seed=seed, normalize=normalize)
            true_train_data, true_test_data = None, None
        ## Compute x-dependant bias
@@ -460,9 +469,10 @@ number_of_test_samples = 2
 for seed in tqdm(seed_list):
    try:
        train_data, test_data, true_train_data, true_test_data \
-                = load_data(seed=seed, return_ground_truth=True)
+                = load_data(seed=seed, return_ground_truth=True,
+                        normalize=normalize)
    except TypeError:
-        train_data, test_data = load_data(seed=seed)
+        train_data, test_data = load_data(seed=seed, normalize=normalize)
        true_train_data, true_test_data = None, None
    if true_test_data is None:
        test_dataloader = DataLoader(test_data,

--- a/Experiments/plot_coverage.py
+++ b/Experiments/plot_coverage.py
@@ -48,6 +48,11 @@ def compute_coverages(data, eiv, number_of_draws,
    long_dataname = conf_dict["long_dataname"]
    short_dataname = conf_dict["short_dataname"]
+    try:
+        normalize = conf_dict['normalize']
+    except KeyError:
+        # normalize by default
+        normalize = True
    load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
@@ -73,12 +78,13 @@ def compute_coverages(data, eiv, number_of_draws,
        # test whether there is a ground truth
        try:
            train_data, _, _,_  \
-                    = load_data(seed=0, return_ground_truth=True)
+                    = load_data(seed=0, return_ground_truth=True,
+                            normalize=normalize)
        except TypeError:
        # if not, end function
            return None,None
    else:
-        train_data, _ = load_data()
+        train_data, _ = load_data(normalize=normalize)
    print(f"Computing {'EiV' if eiv else 'non-EiV'} coverage for {long_dataname}")
@@ -145,14 +151,15 @@ def compute_coverages(data, eiv, number_of_draws,
        """
        for seed in seed_list:
            if not use_ground_truth:
-                _, test_data = load_data(seed=seed)
+                _, test_data = load_data(seed=seed, normalize=normalize)
                test_dataloader = DataLoader(test_data, 
                        batch_size=batch_size,
                        shuffle=True)
                yield test_dataloader
            else:
                _, _, _, true_test =\
-                        load_data(seed=seed, return_ground_truth=True)
+                        load_data(seed=seed, return_ground_truth=True,
+                                normalize=normalize)
                # take noisy x but unnoisy y
                cut_true_test = VerticalCut(true_test,
                        components_to_pick=[2,1])

--- a/Experiments/plot_prediction.py
+++ b/Experiments/plot_prediction.py
@@ -98,7 +98,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
    # determine dimensions
-    _, test_data = load_data(seed=plotting_seed, return_ground_truth=False)
+    _, test_data, normalized_func = load_data(seed=plotting_seed, return_ground_truth=False,
+            return_normalized_func=True)
    input_dim = test_data[0][0].numel()
    output_dim = test_data[0][1].numel()
    assert output_dim == 1
@@ -209,7 +210,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
 data_list = ['sine'] # short datanames
-list_x_range = [torch.linspace(0.0,1.0, 50)]
+#TODO: Check which ranges are "correct"
+list_x_range = [torch.linspace(-2.5,2.5, 50)]
 list_color = [('red','blue')]
 list_number_of_draws = [((100,5), 100)]
 for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list,
@@ -229,11 +231,21 @@ for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list,
        plt.figure(i)
        plt.clf()
        x_values, y_values = eiv_plotting_dictionary['range_points']
-        plt.plot(x_values, y_values,'-', color='k')
+        plt.plot(x_values.flatten(), y_values.flatten(),'-', color='k')
-        # plt.plot(x_values, eiv_plotting_dictionary['prediction'],'-',
+        eiv_pred = eiv_plotting_dictionary['prediction']
-        #         color=color[0])
+        eiv_unc = eiv_plotting_dictionary['uncertainty']
-        # plt.plot(x_values, noneiv_plotting_dictionary['prediction'],'-',
+        plt.plot(x_values, eiv_pred,'-',
-        #         color=color[1])
+                color=color[0])
+        plt.fill_between(x_values.flatten(), eiv_pred-k * eiv_unc,
+                eiv_pred + k * eiv_unc,
+                color=color[0], alpha=0.5)
+        noneiv_pred = noneiv_plotting_dictionary['prediction']
+        noneiv_unc = noneiv_plotting_dictionary['uncertainty']
+        plt.plot(x_values.flatten(), noneiv_pred,'-',
+                color=color[1])
+        plt.fill_between(x_values.flatten(), noneiv_pred-k * noneiv_unc,
+                noneiv_pred + k * noneiv_unc,
+                color=color[1], alpha=0.5)
    else:
        # multidimensional handling not included yet
        pass

--- a/Experiments/train_eiv.py
+++ b/Experiments/train_eiv.py
@@ -50,6 +50,11 @@ fixed_std_x = conf_dict['fixed_std_x']
 gamma = conf_dict["gamma"]
 hidden_layers = conf_dict["hidden_layers"]
 seed_range = conf_dict['seed_range']
+try:
+    normalize = conf_dict['normalize']
+except KeyError:
+    # normalize by default
+    normalize = True
 print(f"Training on {long_dataname} data")
@@ -199,7 +204,7 @@ def train_on_data(init_std_y, seed):
    set_seeds(seed)
    # load Datasets
    train_data, test_data = load_data(seed=seed, splitting_part=0.8,
-            normalize=True)
+            normalize=normalize)
    # make dataloaders
    train_dataloader = DataLoader(train_data, batch_size=batch_size, 
            shuffle=True)