From c07920e29056d8bfa8a9d2392902d7190b23b698 Mon Sep 17 00:00:00 2001
From: Joerg Martin <joerg.martin@ptb.de>
Date: Wed, 1 Dec 2021 12:13:33 +0100
Subject: [PATCH] NonEiV training included

This covers all regression datasets treated in the MC Dropout and Deep
Ensemble paper. Results are comparable or even better. For multivariate
dataset, the decouple_dimensions keyword in the evaluation scripts can
be used to follow the (rather weird) convention of these papers.
---
 Experiments/evaluate_california.py     | 58 ++++++++++++++++++++++++++
 Experiments/evaluate_energy.py         |  2 +-
 Experiments/evaluate_kin8nm.py         |  2 +-
 Experiments/evaluate_msd.py            |  2 +-
 Experiments/evaluate_naval.py          |  2 +-
 Experiments/evaluate_power.py          |  2 +-
 Experiments/evaluate_protein.py        |  2 +-
 Experiments/evaluate_wine.py           |  2 +-
 Experiments/evaluate_yacht.py          |  2 +-
 Experiments/train_noneiv_california.py |  4 +-
 Experiments/train_noneiv_concrete.py   |  4 +-
 Experiments/train_noneiv_energy.py     |  4 +-
 Experiments/train_noneiv_kin8nm.py     |  4 +-
 Experiments/train_noneiv_msd.py        |  4 +-
 Experiments/train_noneiv_naval.py      |  4 +-
 Experiments/train_noneiv_power.py      |  4 +-
 Experiments/train_noneiv_protein.py    |  4 +-
 Experiments/train_noneiv_wine.py       |  4 +-
 Experiments/train_noneiv_yacht.py      |  4 +-
 README.md                              |  4 +-
 20 files changed, 88 insertions(+), 30 deletions(-)
 create mode 100644 Experiments/evaluate_california.py

diff --git a/Experiments/evaluate_california.py b/Experiments/evaluate_california.py
new file mode 100644
index 0000000..be631ef
--- /dev/null
+++ b/Experiments/evaluate_california.py
@@ -0,0 +1,58 @@
+import os
+import numpy as np
+import torch
+import torch.backends.cudnn
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard.writer import SummaryWriter
+
+from EIVArchitectures import Networks, initialize_weights
+from EIVData.california_housing import load_data
+from EIVTrainingRoutines import train_and_store, loss_functions
+
+from train_noneiv_california import p, init_std_y_list, seed_list, unscaled_reg, hidden_layers
+
+
+train_data, test_data = load_data()
+test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), 800))))
+
+seed = seed_list[0]
+init_std_y = init_std_y_list[0]
+saved_file = os.path.join('saved_networks',
+            f'noneiv_california'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_p_{p:.2f}_seed_{seed}.pkl')
+
+input_dim = train_data[0][0].numel()
+output_dim = train_data[0][1].numel()
+net = Networks.FNNBer(p=p, init_std_y=init_std_y,
+        h=[input_dim, *hidden_layers, output_dim])
+train_and_store.open_stored_training(saved_file=saved_file,
+        net=net)
+
+
+# RMSE
+x,y = next(iter(test_dataloader))
+out = net(x)[0]
+if len(y.shape) <=1:
+    y = y.view((-1,1))
+assert y.shape == out.shape
+res = y-out
+scale = train_data.dataset.std_labels
+scaled_res = res * scale.view((1,-1))
+scaled_res = scaled_res.detach().cpu().numpy().flatten()
+rmse = np.sqrt(np.mean(scaled_res**2)) 
+print(f'RMSE {rmse:.3f}')
+
+
+# NLL
+x,y = next(iter(test_dataloader))
+training_state = net.training
+net.train()
+logdens = net.predictive_logdensity(x, y, number_of_draws=100,
+        decouple_dimensions=True,
+        scale_labels=train_data.dataset.std_labels.view((-1,))).mean()
+if training_state:
+    net.train()
+else:
+    net.eval()
+print(f'Dropout predictive {logdens:.3f}')
diff --git a/Experiments/evaluate_energy.py b/Experiments/evaluate_energy.py
index 548596a..e9d74e3 100644
--- a/Experiments/evaluate_energy.py
+++ b/Experiments/evaluate_energy.py
@@ -19,7 +19,7 @@ seed = seed_list[0]
 init_std_y = init_std_y_list[0]
 saved_file = os.path.join('saved_networks',
             f'noneiv_energy'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
 
 input_dim = train_data[0][0].numel()
diff --git a/Experiments/evaluate_kin8nm.py b/Experiments/evaluate_kin8nm.py
index e11f441..a9d8ae6 100644
--- a/Experiments/evaluate_kin8nm.py
+++ b/Experiments/evaluate_kin8nm.py
@@ -19,7 +19,7 @@ seed = seed_list[0]
 init_std_y = init_std_y_list[0]
 saved_file = os.path.join('saved_networks',
             f'noneiv_kin8nm'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
 
 input_dim = train_data[0][0].numel()
diff --git a/Experiments/evaluate_msd.py b/Experiments/evaluate_msd.py
index abf951d..041b755 100644
--- a/Experiments/evaluate_msd.py
+++ b/Experiments/evaluate_msd.py
@@ -19,7 +19,7 @@ seed = seed_list[0]
 init_std_y = init_std_y_list[0]
 saved_file = os.path.join('saved_networks',
             f'noneiv_msd'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
 
 input_dim = train_data[0][0].numel()
diff --git a/Experiments/evaluate_naval.py b/Experiments/evaluate_naval.py
index 55e22d3..63c88a3 100644
--- a/Experiments/evaluate_naval.py
+++ b/Experiments/evaluate_naval.py
@@ -19,7 +19,7 @@ seed = seed_list[0]
 init_std_y = init_std_y_list[0]
 saved_file = os.path.join('saved_networks',
             f'noneiv_naval'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
 
 input_dim = train_data[0][0].numel()
diff --git a/Experiments/evaluate_power.py b/Experiments/evaluate_power.py
index fdf3e6e..bc4d328 100644
--- a/Experiments/evaluate_power.py
+++ b/Experiments/evaluate_power.py
@@ -19,7 +19,7 @@ seed = seed_list[0]
 init_std_y = init_std_y_list[0]
 saved_file = os.path.join('saved_networks',
             f'noneiv_power'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
 
 input_dim = train_data[0][0].numel()
diff --git a/Experiments/evaluate_protein.py b/Experiments/evaluate_protein.py
index 985eee2..de32d3c 100644
--- a/Experiments/evaluate_protein.py
+++ b/Experiments/evaluate_protein.py
@@ -19,7 +19,7 @@ seed = seed_list[0]
 init_std_y = init_std_y_list[0]
 saved_file = os.path.join('saved_networks',
             f'noneiv_protein'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
 
 input_dim = train_data[0][0].numel()
diff --git a/Experiments/evaluate_wine.py b/Experiments/evaluate_wine.py
index 3031b84..02be10d 100644
--- a/Experiments/evaluate_wine.py
+++ b/Experiments/evaluate_wine.py
@@ -19,7 +19,7 @@ seed = seed_list[0]
 init_std_y = init_std_y_list[0]
 saved_file = os.path.join('saved_networks',
             f'noneiv_wine'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
 
 input_dim = train_data[0][0].numel()
diff --git a/Experiments/evaluate_yacht.py b/Experiments/evaluate_yacht.py
index 6db62c4..842674d 100644
--- a/Experiments/evaluate_yacht.py
+++ b/Experiments/evaluate_yacht.py
@@ -19,7 +19,7 @@ seed = seed_list[0]
 init_std_y = init_std_y_list[0]
 saved_file = os.path.join('saved_networks',
             f'noneiv_yacht'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
 
 input_dim = train_data[0][0].numel()
diff --git a/Experiments/train_noneiv_california.py b/Experiments/train_noneiv_california.py
index 87ad6c4..773a30d 100644
--- a/Experiments/train_noneiv_california.py
+++ b/Experiments/train_noneiv_california.py
@@ -36,7 +36,7 @@ def set_seeds(seed):
     np.random.seed(seed)
     random.seed(seed) 
     torch.manual_seed(seed)
-seed_list = [0,]
+seed_list = range(10)
 
 # to store the RMSE
 rmse_chain = []
@@ -125,7 +125,7 @@ def train_on_data(init_std_y, seed):
     # run and save
     save_file = os.path.join('saved_networks',
             f'noneiv_california'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
     train_and_store.train_and_store(net=net, 
             epoch_map=epoch_map,
diff --git a/Experiments/train_noneiv_concrete.py b/Experiments/train_noneiv_concrete.py
index 625db7d..d4a48a8 100644
--- a/Experiments/train_noneiv_concrete.py
+++ b/Experiments/train_noneiv_concrete.py
@@ -36,7 +36,7 @@ def set_seeds(seed):
     np.random.seed(seed)
     random.seed(seed) 
     torch.manual_seed(seed)
-seed_list = [0,]
+seed_list = range(10)
 
 # to store the RMSE
 rmse_chain = []
@@ -125,7 +125,7 @@ def train_on_data(init_std_y, seed):
     # run and save
     save_file = os.path.join('saved_networks',
             f'noneiv_concrete'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
     train_and_store.train_and_store(net=net, 
             epoch_map=epoch_map,
diff --git a/Experiments/train_noneiv_energy.py b/Experiments/train_noneiv_energy.py
index a635741..04f299a 100644
--- a/Experiments/train_noneiv_energy.py
+++ b/Experiments/train_noneiv_energy.py
@@ -36,7 +36,7 @@ def set_seeds(seed):
     np.random.seed(seed)
     random.seed(seed) 
     torch.manual_seed(seed)
-seed_list = [0,]
+seed_list = range(10)
 
 # to store the RMSE
 rmse_chain = []
@@ -125,7 +125,7 @@ def train_on_data(init_std_y, seed):
     # run and save
     save_file = os.path.join('saved_networks',
             f'noneiv_energy'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
     train_and_store.train_and_store(net=net, 
             epoch_map=epoch_map,
diff --git a/Experiments/train_noneiv_kin8nm.py b/Experiments/train_noneiv_kin8nm.py
index 0baa257..90af665 100644
--- a/Experiments/train_noneiv_kin8nm.py
+++ b/Experiments/train_noneiv_kin8nm.py
@@ -36,7 +36,7 @@ def set_seeds(seed):
     np.random.seed(seed)
     random.seed(seed) 
     torch.manual_seed(seed)
-seed_list = [0,]
+seed_list = range(10)
 
 # to store the RMSE
 rmse_chain = []
@@ -125,7 +125,7 @@ def train_on_data(init_std_y, seed):
     # run and save
     save_file = os.path.join('saved_networks',
             f'noneiv_kin8nm'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
     train_and_store.train_and_store(net=net, 
             epoch_map=epoch_map,
diff --git a/Experiments/train_noneiv_msd.py b/Experiments/train_noneiv_msd.py
index a38b521..f80c03e 100644
--- a/Experiments/train_noneiv_msd.py
+++ b/Experiments/train_noneiv_msd.py
@@ -36,7 +36,7 @@ def set_seeds(seed):
     np.random.seed(seed)
     random.seed(seed) 
     torch.manual_seed(seed)
-seed_list = [0,]
+seed_list = range(10)
 
 # to store the RMSE
 rmse_chain = []
@@ -125,7 +125,7 @@ def train_on_data(init_std_y, seed):
     # run and save
     save_file = os.path.join('saved_networks',
             f'noneiv_msd'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
     train_and_store.train_and_store(net=net, 
             epoch_map=epoch_map,
diff --git a/Experiments/train_noneiv_naval.py b/Experiments/train_noneiv_naval.py
index 2b03843..f96c66e 100644
--- a/Experiments/train_noneiv_naval.py
+++ b/Experiments/train_noneiv_naval.py
@@ -36,7 +36,7 @@ def set_seeds(seed):
     np.random.seed(seed)
     random.seed(seed) 
     torch.manual_seed(seed)
-seed_list = [0,]
+seed_list = range(10)
 
 # to store the RMSE
 rmse_chain = []
@@ -125,7 +125,7 @@ def train_on_data(init_std_y, seed):
     # run and save
     save_file = os.path.join('saved_networks',
             f'noneiv_naval'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
     train_and_store.train_and_store(net=net, 
             epoch_map=epoch_map,
diff --git a/Experiments/train_noneiv_power.py b/Experiments/train_noneiv_power.py
index 14ba120..c63b571 100644
--- a/Experiments/train_noneiv_power.py
+++ b/Experiments/train_noneiv_power.py
@@ -36,7 +36,7 @@ def set_seeds(seed):
     np.random.seed(seed)
     random.seed(seed) 
     torch.manual_seed(seed)
-seed_list = [0,]
+seed_list = range(10)
 
 # to store the RMSE
 rmse_chain = []
@@ -125,7 +125,7 @@ def train_on_data(init_std_y, seed):
     # run and save
     save_file = os.path.join('saved_networks',
             f'noneiv_power'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
     train_and_store.train_and_store(net=net, 
             epoch_map=epoch_map,
diff --git a/Experiments/train_noneiv_protein.py b/Experiments/train_noneiv_protein.py
index 55db23c..ca73d6a 100644
--- a/Experiments/train_noneiv_protein.py
+++ b/Experiments/train_noneiv_protein.py
@@ -36,7 +36,7 @@ def set_seeds(seed):
     np.random.seed(seed)
     random.seed(seed) 
     torch.manual_seed(seed)
-seed_list = [0,]
+seed_list = range(10)
 
 # to store the RMSE
 rmse_chain = []
@@ -125,7 +125,7 @@ def train_on_data(init_std_y, seed):
     # run and save
     save_file = os.path.join('saved_networks',
             f'noneiv_protein'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
     train_and_store.train_and_store(net=net, 
             epoch_map=epoch_map,
diff --git a/Experiments/train_noneiv_wine.py b/Experiments/train_noneiv_wine.py
index 837eb72..dc239f7 100644
--- a/Experiments/train_noneiv_wine.py
+++ b/Experiments/train_noneiv_wine.py
@@ -36,7 +36,7 @@ def set_seeds(seed):
     np.random.seed(seed)
     random.seed(seed) 
     torch.manual_seed(seed)
-seed_list = [0,]
+seed_list = range(10)
 
 # to store the RMSE
 rmse_chain = []
@@ -125,7 +125,7 @@ def train_on_data(init_std_y, seed):
     # run and save
     save_file = os.path.join('saved_networks',
             f'noneiv_wine'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
     train_and_store.train_and_store(net=net, 
             epoch_map=epoch_map,
diff --git a/Experiments/train_noneiv_yacht.py b/Experiments/train_noneiv_yacht.py
index 7640817..d163e54 100644
--- a/Experiments/train_noneiv_yacht.py
+++ b/Experiments/train_noneiv_yacht.py
@@ -36,7 +36,7 @@ def set_seeds(seed):
     np.random.seed(seed)
     random.seed(seed) 
     torch.manual_seed(seed)
-seed_list = [0,]
+seed_list = range(10)
 
 # to store the RMSE
 rmse_chain = []
@@ -125,7 +125,7 @@ def train_on_data(init_std_y, seed):
     # run and save
     save_file = os.path.join('saved_networks',
             f'noneiv_yacht'\
-                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
                     f'_p_{p:.2f}_seed_{seed}.pkl')
     train_and_store.train_and_store(net=net, 
             epoch_map=epoch_map,
diff --git a/README.md b/README.md
index 912aa64..7551f09 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ This directory lists the source code for the article `Errors-in-Variables for de
 
 ## Requirements
 
-The software used to produce the results from the preprint was written in [Python 3](https://www.python.org/). If not already installed, the easiest way to set up Python is usually via [Anaconda](https://www.anaconda.com/). To use the software, the installation of some additional packages is required. This is discussed below. To avoid any global impacts on the Python install, especially if the system interpreter is used, it might be preferable to do the following in a virtual environment, either in [Anaconda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) or by using the [venv](https://docs.python.org/3/tutorial/venv.html) module. The Python version used for the results in the preprint is 3.8.5.
+The software used to produce the results from the preprint was written in [Python 3](https://www.python.org/). If not already installed, the easiest way to set up Python is usually via [Anaconda](https://www.anaconda.com/). To use the software, the installation of some additional packages is required. This is discussed below. To avoid any global impacts on the Python install, especially if the system interpreter is used, it might be preferable to do the following in a virtual environment, either in [Anaconda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) or by using the [venv](https://docs.python.org/3/tutorial/venv.html) module. The Python version used for the results in the preprint is 3.9.7.
 
 ### Installing additional packages (except PyTorch)
 The Python packages to use this software, except for PyTorch which we will discuss below, can be installed by using the file `requirements.txt` 
@@ -13,7 +13,7 @@ The Python packages to use this software, except for PyTorch which we will discu
 pip install -r requirements.txt
 ```
 
-When using Anaconda, make sure that `python` is installed in the virtual environment. If not, use `conda install python` or `conda install python=3.8.5` before running the `pip` command. 
+When using Anaconda, make sure that `python` is installed in the virtual environment. If not, use `conda install python` or `conda install python=3.9.7` before running the `pip` command. 
 
 **When running into version issues**: 
 
-- 
GitLab