From 9e5a993d285dc5de835bb850e0081fd831902f75 Mon Sep 17 00:00:00 2001
From: Joerg Martin <joerg.martin@ptb.de>
Date: Wed, 26 Jan 2022 09:45:58 +0000
Subject: [PATCH] Basic function in plot_prediction.py

Was tested on EiV linear and non-EiV linear, but needs further
environment to generate plots.
---
 EIVPackage/EIVData/cubic.py     |   4 +-
 EIVPackage/EIVData/linear.py    |   3 +-
 EIVPackage/EIVData/quadratic.py |   3 +-
 EIVPackage/EIVData/sine.py      |   7 +-
 Experiments/create_tabular.py   |   2 +-
 Experiments/plot_coverage.py    |   6 +-
 Experiments/plot_prediction.py  | 156 ++++++++++++++++++++++++++++++++
 7 files changed, 171 insertions(+), 10 deletions(-)
 create mode 100644 Experiments/plot_prediction.py

diff --git a/EIVPackage/EIVData/cubic.py b/EIVPackage/EIVData/cubic.py
index 5d5c63e..a9eda38 100644
--- a/EIVPackage/EIVData/cubic.py
+++ b/EIVPackage/EIVData/cubic.py
@@ -10,7 +10,7 @@ slope = 1.0
 intercept = 0.0
 x_noise_strength = 0.05 * (input_range[1] - input_range[0])/2
 y_noise_strength = 3
-
+func = lambda true_x: slope * true_x**3 + intercept 
 def load_data(seed=0, splitting_part=0.8, normalize=True,
         return_ground_truth=False):
     """
@@ -34,7 +34,7 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
     true_x = input_range[0] + (input_range[1]-input_range[0])\
                   * torch.rand((total_number_of_datapoints,1),
                           generator=torch.Generator().manual_seed(seeds[0]))
-    true_y = slope * true_x**3 + intercept 
+    true_y = func(true_x)
     # add noise and normalize x and y
     (noisy_x, noisy_y), (true_x, true_y) = add_noise(
             tensor_list=(true_x, true_y),
diff --git a/EIVPackage/EIVData/linear.py b/EIVPackage/EIVData/linear.py
index 75c7e40..bbfc532 100644
--- a/EIVPackage/EIVData/linear.py
+++ b/EIVPackage/EIVData/linear.py
@@ -10,6 +10,7 @@ slope = 1.0
 intercept = 0.0
 x_noise_strength = 0.05
 y_noise_strength = 0.1
+func = lambda true_x: slope * true_x + intercept 
 
 def load_data(seed=0, splitting_part=0.8, normalize=True,
         return_ground_truth=False):
@@ -34,7 +35,7 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
     true_x = input_range[0] + (input_range[1]-input_range[0])\
                   * torch.rand((total_number_of_datapoints,1),
                           generator=torch.Generator().manual_seed(seeds[0]))
-    true_y = slope * true_x + intercept 
+    true_y = func(true_x)
     # add noise and normalize x and y
     (noisy_x, noisy_y), (true_x, true_y) = add_noise(
             tensor_list=(true_x, true_y),
diff --git a/EIVPackage/EIVData/quadratic.py b/EIVPackage/EIVData/quadratic.py
index a421482..c662b13 100644
--- a/EIVPackage/EIVData/quadratic.py
+++ b/EIVPackage/EIVData/quadratic.py
@@ -10,6 +10,7 @@ slope = 1.0
 intercept = 0.0
 x_noise_strength = 0.05
 y_noise_strength = 0.1
+func = lambda true_x: slope * true_x**2 + intercept 
 
 def load_data(seed=0, splitting_part=0.8, normalize=True,
         return_ground_truth=False):
@@ -34,7 +35,7 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
     true_x = input_range[0] + (input_range[1]-input_range[0])\
                   * torch.rand((total_number_of_datapoints,1),
                           generator=torch.Generator().manual_seed(seeds[0]))
-    true_y = slope * true_x**2 + intercept 
+    true_y = func(true_x)
     # add noise and normalize x and y
     (noisy_x, noisy_y), (true_x, true_y) = add_noise(
             tensor_list=(true_x, true_y),
diff --git a/EIVPackage/EIVData/sine.py b/EIVPackage/EIVData/sine.py
index d9ca0b8..f68112e 100644
--- a/EIVPackage/EIVData/sine.py
+++ b/EIVPackage/EIVData/sine.py
@@ -9,6 +9,9 @@ input_range = [-0.2,0.8]
 intercept = 0.0
 x_noise_strength = 0.02 
 y_noise_strength = 0.05
+func = lambda true_x: true_x +\
+            torch.sin(2 * torch.pi * true_x) +\
+            torch.sin(4 * torch.pi * true_x)
 
 def load_data(seed=0, splitting_part=0.8, normalize=True,
         return_ground_truth=False):
@@ -33,9 +36,7 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
     true_x = input_range[0] + (input_range[1]-input_range[0])\
                   * torch.rand((total_number_of_datapoints,1),
                           generator=torch.Generator().manual_seed(seeds[0]))
-    true_y = true_x +\
-            torch.sin(2 * torch.pi * true_x) +\
-            torch.sin(4 * torch.pi * true_x)
+    true_y = func(true_x)
     # add noise and normalize x and y
     (noisy_x, noisy_y), (true_x, true_y) = add_noise(
             tensor_list=(true_x, true_y),
diff --git a/Experiments/create_tabular.py b/Experiments/create_tabular.py
index 8b8373d..cc15d7c 100644
--- a/Experiments/create_tabular.py
+++ b/Experiments/create_tabular.py
@@ -2,7 +2,7 @@ import os
 import glob
 import json
 
-metrics_to_display = ['rmse','coverage_theory','coverage_numerical','true_coverage_numerical','avg_bias']
+metrics_to_display = ['rmse','logdens','coverage_numerical','true_coverage_numerical']
 show_incomplete = True
 
 list_of_result_files = glob.glob(os.path.join('results','*.json'))
diff --git a/Experiments/plot_coverage.py b/Experiments/plot_coverage.py
index b7a5116..455d283 100644
--- a/Experiments/plot_coverage.py
+++ b/Experiments/plot_coverage.py
@@ -362,8 +362,10 @@ plt.xlabel('q')
 plt.ylabel('EiV cov - nonEiV-cov')
 
 # datasets to plot and their coloring
-datasets = ['linear', 'quadratic','yacht','wine','power',
-        'protein','concrete','california','energy','kin8nm','msd','naval']
+datasets = ['linear', 'quadratic','cubic','sine',
+        'yacht','wine','power','protein','concrete',
+        'california','energy','kin8nm','msd','naval']
+
 colors = cm.rainbow(np.linspace(0,1,len(datasets)))
 
 # loop through data
diff --git a/Experiments/plot_prediction.py b/Experiments/plot_prediction.py
new file mode 100644
index 0000000..ef3891e
--- /dev/null
+++ b/Experiments/plot_prediction.py
@@ -0,0 +1,156 @@
+"""
+Plot predictions with uncertainties for (simulated) datasets with a ground 
+truth.
+"""
+import importlib
+import os
+import json
+
+import torch
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from EIVArchitectures import Networks
+from EIVTrainingRoutines import train_and_store
+
+# coverage factor
+k = 1.96
+
+def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
+        x_noise_seed = 0, plotting_seed = 0, number_of_test_datapoints = 100):
+    """
+    Iterate through networks associated with `data` (short dataname) 
+    throuth their JSON configuration and evaluate their prediction and uncertainties for the (true) x in `x_range`. In addition, the values of the noisified `x_range` and
+    :data: String, short dataname
+    :x_range: An iterator yielding the (true) x values to consider
+    :eiv: Boolean. If True an EiV model is used, else an non-EiV model.
+    :number_of_draws: Number of draws to use for prediction. Take an int for
+    non-EiV models and a two-element list for EiV models.
+    :x_noise_seed: An integer. Will be used as a seed to generate the noise put
+    on `x_range`.
+    :plotting_seed: An integer. Needed for choosing which of the test datasets
+    will be used to returning the test datapoints.
+    :number_of_test_datapoints: A positive integer. Number of test_datapoints
+    :returns: numerical_coverage, theoretical_coverage
+    """
+    # load configuration file
+    if eiv:
+        with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as\
+                conf_file:
+            conf_dict = json.load(conf_file)
+    else:
+        with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as\
+                conf_file:
+            conf_dict = json.load(conf_file)
+
+    long_dataname = conf_dict["long_dataname"]
+    short_dataname = conf_dict["short_dataname"]
+
+
+    load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
+    x_noise_strength =\
+        importlib.import_module(f'EIVData.{long_dataname}').x_noise_strength
+
+    # switch to gpu, if possible
+    try:
+        gpu_number = conf_dict["gpu_number"]
+        device = torch.device(f'cuda:{gpu_number}')
+        try:
+            torch.tensor([0.0]).to(device)
+        except RuntimeError:
+            if torch.cuda.is_available():
+                print('Switched to GPU 0')
+                device = torch.device('cuda:0')
+            else:
+                print('No cuda available, using CPU')
+                device = torch.device('cpu')
+    except KeyError:
+        device = torch.device('cpu')
+
+
+    _, test_data = load_data(seed=plotting_seed)
+    input_dim = test_data[0][0].numel()
+    output_dim = test_data[0][1].numel()
+    test_x, test_y = test_data[:number_of_test_datapoints]
+
+    ## Create iterators for get_coverage_distribution
+    seed_list = range(conf_dict["seed_range"][0],
+            conf_dict["seed_range"][1])
+
+    # iterator for networks
+    def net_iterator(eiv=eiv, seed_list=seed_list):
+        """
+        Yields EiV models (if `eiv`) or
+        non-EiV models (if not `eiv`) for the seeds in
+        `seed_list` and `data`.
+        """
+        if eiv:
+            # load parameters
+            init_std_y = conf_dict["init_std_y_list"][0]
+            unscaled_reg = conf_dict["unscaled_reg"]
+            p = conf_dict["p"]
+            hidden_layers = conf_dict["hidden_layers"]
+            fixed_std_x = conf_dict["fixed_std_x"]
+            net = Networks.FNNEIV(p=p, init_std_y=init_std_y,
+                    h=[input_dim, *hidden_layers, output_dim],
+                    fixed_std_x=fixed_std_x).to(device)
+            for seed in seed_list:
+                # load network paramaters
+                saved_file = os.path.join('saved_networks',
+                        f'eiv_{short_dataname}'\
+                                f'_init_std_y_{init_std_y:.3f}'\
+                                f'_ureg_{unscaled_reg:.1f}'\
+                                f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\
+                                f'_seed_{seed}.pkl')
+                train_and_store.open_stored_training(saved_file=saved_file,
+                        net=net, device=device)
+                yield net
+        else:
+            # load parameters
+            init_std_y = conf_dict["init_std_y_list"][0]
+            unscaled_reg = conf_dict["unscaled_reg"]
+            p = conf_dict["p"]
+            hidden_layers = conf_dict["hidden_layers"]
+            net = Networks.FNNBer(p=p, init_std_y=init_std_y,
+                    h=[input_dim, *hidden_layers, output_dim]).to(device)
+            for seed in seed_list:
+                saved_file = os.path.join('saved_networks',
+                            f'noneiv_{short_dataname}'\
+                                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                                    f'_p_{p:.2f}_seed_{seed}.pkl')
+                # load network paramaters
+                train_and_store.open_stored_training(saved_file=saved_file,
+                        net=net, device=device)
+                yield net
+    
+    noisy_x = x_range + x_noise_strength * torch.randn(x_range.shape,
+            generator=torch.Generator().manual_seed(x_noise_seed))
+    # add feature dimension (if necessary) and move to device
+    noisy_x = noisy_x.view((-1,input_dim)).to(device)
+    mean_collection, unc_collection = [], []
+    for net in net_iterator(eiv=eiv):
+        mean, unc = net.predict_mean_and_unc(noisy_x,
+                number_of_draws=number_of_draws)[:2]
+        if len(mean.shape) > 1:
+                # assume only batch and output dimension
+                assert len(mean.shape) == 2
+                assert len(unc.shape) == 2
+                # assume one-dimensional outputs
+                assert mean.shape[1] == 1
+                assert unc.shape[1] == 1
+                # make one-dimensional
+        mean = mean.view((-1,))
+        unc = unc.view((-1,))
+        mean_collection.append(mean)
+        unc_collection.append(unc)
+
+    # stack collections along a new, first dimension
+    mean_collection = torch.stack(mean_collection, dim=0)
+    unc_collection = torch.stack(unc_collection, dim=0)
+    # average
+    averaged_mean = torch.mean(mean_collection, dim=0)
+    averaged_unc = torch.mean(unc_collection, dim=0)
+    return averaged_mean, averaged_unc, (test_x, test_y)
+
+        
-- 
GitLab