From 3d20eacb94a30074d088a8ee4f87118a40351f74 Mon Sep 17 00:00:00 2001 From: Joerg Martin <joerg.martin@ptb.de> Date: Mon, 6 Dec 2021 14:52:47 +0100 Subject: [PATCH] Fixed seed handling in datasets --- EIVPackage/EIVData/california_housing.py | 5 +- EIVPackage/EIVData/concrete_strength.py | 5 +- EIVPackage/EIVData/energy_efficiency.py | 5 +- EIVPackage/EIVData/kin8nm.py | 5 +- EIVPackage/EIVData/million_song.py | 4 +- EIVPackage/EIVData/naval_propulsion.py | 5 +- EIVPackage/EIVData/power_plant.py | 5 +- EIVPackage/EIVData/protein_structure.py | 4 +- EIVPackage/EIVData/wine_quality.py | 4 +- EIVPackage/EIVData/yacht_hydrodynamics.py | 4 +- Experiments/evaluate_california.py | 58 ----------- Experiments/evaluate_energy.py | 121 ---------------------- Experiments/evaluate_kin8nm.py | 58 ----------- Experiments/evaluate_msd.py | 58 ----------- Experiments/evaluate_naval.py | 81 --------------- Experiments/evaluate_power.py | 58 ----------- Experiments/evaluate_protein.py | 58 ----------- Experiments/evaluate_tabular.py | 25 +++-- Experiments/evaluate_wine.py | 58 ----------- Experiments/evaluate_yacht.py | 58 ----------- 20 files changed, 48 insertions(+), 631 deletions(-) delete mode 100644 Experiments/evaluate_california.py delete mode 100644 Experiments/evaluate_energy.py delete mode 100644 Experiments/evaluate_kin8nm.py delete mode 100644 Experiments/evaluate_msd.py delete mode 100644 Experiments/evaluate_naval.py delete mode 100644 Experiments/evaluate_power.py delete mode 100644 Experiments/evaluate_protein.py delete mode 100644 Experiments/evaluate_wine.py delete mode 100644 Experiments/evaluate_yacht.py diff --git a/EIVPackage/EIVData/california_housing.py b/EIVPackage/EIVData/california_housing.py index d1f4424..cf75519 100644 --- a/EIVPackage/EIVData/california_housing.py +++ b/EIVPackage/EIVData/california_housing.py @@ -1,3 +1,4 @@ +import torch from EIVData.csv_dataset import CSVData from torch.utils.data import random_split @@ -18,7 +19,9 @@ def load_data(seed=0, splitting_part=0.8, normalize=True): dataset_len = len(california_dataset) train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len - california_trainset, california_testset = random_split(california_dataset , lengths=[train_len, test_len]) + california_trainset, california_testset = random_split(california_dataset, + lengths=[train_len, test_len], + generator=torch.Generator().manual_seed(seed)) return california_trainset, california_testset diff --git a/EIVPackage/EIVData/concrete_strength.py b/EIVPackage/EIVData/concrete_strength.py index 472548c..90bc1ff 100644 --- a/EIVPackage/EIVData/concrete_strength.py +++ b/EIVPackage/EIVData/concrete_strength.py @@ -1,3 +1,4 @@ +import torch from EIVData.csv_dataset import CSVData from torch.utils.data import random_split @@ -18,5 +19,7 @@ def load_data(seed=0, splitting_part=0.8, normalize=True): dataset_len = len(concrete_dataset) train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len - concrete_trainset, concrete_testset = random_split(concrete_dataset , lengths=[train_len, test_len]) + concrete_trainset, concrete_testset = random_split(concrete_dataset, + lengths=[train_len, test_len], + generator=torch.Generator().manual_seed(seed)) return concrete_trainset, concrete_testset diff --git a/EIVPackage/EIVData/energy_efficiency.py b/EIVPackage/EIVData/energy_efficiency.py index b421b9c..ddf6317 100644 --- a/EIVPackage/EIVData/energy_efficiency.py +++ b/EIVPackage/EIVData/energy_efficiency.py @@ -1,3 +1,4 @@ +import torch from EIVData.csv_dataset import CSVData from torch.utils.data import random_split @@ -18,5 +19,7 @@ def load_data(seed=0, splitting_part=0.8, normalize=True): dataset_len = len(energy_dataset) train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len - energy_trainset, energy_testset = random_split(energy_dataset , lengths=[train_len, test_len]) + energy_trainset, energy_testset = random_split(energy_dataset, + lengths=[train_len, test_len], + generator=torch.Generator().manual_seed(seed)) return energy_trainset, energy_testset diff --git a/EIVPackage/EIVData/kin8nm.py b/EIVPackage/EIVData/kin8nm.py index 7cb7ad3..774f1f4 100644 --- a/EIVPackage/EIVData/kin8nm.py +++ b/EIVPackage/EIVData/kin8nm.py @@ -1,3 +1,4 @@ +import torch from EIVData.csv_dataset import CSVData from torch.utils.data import random_split @@ -18,7 +19,9 @@ def load_data(seed=0, splitting_part=0.8, normalize=True): dataset_len = len(kin8nm_dataset) train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len - kin8nm_trainset, kin8nm_testset = random_split(kin8nm_dataset , lengths=[train_len, test_len]) + kin8nm_trainset, kin8nm_testset = random_split(kin8nm_dataset, + lengths=[train_len, test_len], + generator=torch.Generator().manual_seed(seed)) return kin8nm_trainset, kin8nm_testset diff --git a/EIVPackage/EIVData/million_song.py b/EIVPackage/EIVData/million_song.py index 1dd545a..ae6d18a 100644 --- a/EIVPackage/EIVData/million_song.py +++ b/EIVPackage/EIVData/million_song.py @@ -1,3 +1,4 @@ +import torch from EIVData.csv_dataset import CSVData from torch.utils.data import random_split @@ -21,5 +22,6 @@ def load_data(seed=0, splitting_part=0.8, normalize=True): train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len msd_trainset, msd_testset = random_split(msd_dataset, - lengths=[train_len, test_len]) + lengths=[train_len, test_len], + generator=torch.Generator().manual_seed(seed)) return msd_trainset, msd_testset diff --git a/EIVPackage/EIVData/naval_propulsion.py b/EIVPackage/EIVData/naval_propulsion.py index a3e9596..8749899 100644 --- a/EIVPackage/EIVData/naval_propulsion.py +++ b/EIVPackage/EIVData/naval_propulsion.py @@ -1,3 +1,4 @@ +import torch from EIVData.csv_dataset import CSVData from torch.utils.data import random_split @@ -20,6 +21,8 @@ def load_data(seed=0, splitting_part=0.8, normalize=True): dataset_len = len(naval_dataset) train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len - naval_trainset, naval_testset = random_split(naval_dataset , lengths=[train_len, test_len]) + naval_trainset, naval_testset = random_split(naval_dataset, + lengths=[train_len, test_len], + generator=torch.Generator().manual_seed(seed)) return naval_trainset, naval_testset diff --git a/EIVPackage/EIVData/power_plant.py b/EIVPackage/EIVData/power_plant.py index e33f742..6f40c65 100644 --- a/EIVPackage/EIVData/power_plant.py +++ b/EIVPackage/EIVData/power_plant.py @@ -1,3 +1,4 @@ +import torch from EIVData.csv_dataset import CSVData from torch.utils.data import random_split @@ -19,6 +20,8 @@ def load_data(seed=0, splitting_part=0.8, normalize=True): dataset_len = len(naval_dataset) train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len - naval_trainset, naval_testset = random_split(naval_dataset , lengths=[train_len, test_len]) + naval_trainset, naval_testset = random_split(naval_dataset, + lengths=[train_len, test_len], + generator=torch.Generator().manual_seed(seed)) return naval_trainset, naval_testset diff --git a/EIVPackage/EIVData/protein_structure.py b/EIVPackage/EIVData/protein_structure.py index 50e34e1..8305087 100644 --- a/EIVPackage/EIVData/protein_structure.py +++ b/EIVPackage/EIVData/protein_structure.py @@ -1,3 +1,4 @@ +import torch from EIVData.csv_dataset import CSVData from torch.utils.data import random_split @@ -20,5 +21,6 @@ def load_data(seed=0, splitting_part=0.8, normalize=True): train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len protein_trainset, protein_testset = random_split(protein_dataset, - lengths=[train_len, test_len]) + lengths=[train_len, test_len], + generator=torch.Generator().manual_seed(seed)) return protein_trainset, protein_testset diff --git a/EIVPackage/EIVData/wine_quality.py b/EIVPackage/EIVData/wine_quality.py index eb4216a..78c839a 100644 --- a/EIVPackage/EIVData/wine_quality.py +++ b/EIVPackage/EIVData/wine_quality.py @@ -1,3 +1,4 @@ +import torch from EIVData.csv_dataset import CSVData from torch.utils.data import random_split @@ -20,5 +21,6 @@ def load_data(seed=0, splitting_part=0.8, normalize=True): train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len wine_trainset, wine_testset = random_split(wine_dataset, - lengths=[train_len, test_len]) + lengths=[train_len, test_len], + generator=torch.Generator().manual_seed(seed)) return wine_trainset, wine_testset diff --git a/EIVPackage/EIVData/yacht_hydrodynamics.py b/EIVPackage/EIVData/yacht_hydrodynamics.py index f3cf3f8..505c841 100644 --- a/EIVPackage/EIVData/yacht_hydrodynamics.py +++ b/EIVPackage/EIVData/yacht_hydrodynamics.py @@ -1,3 +1,4 @@ +import torch from EIVData.csv_dataset import CSVData from torch.utils.data import random_split @@ -21,7 +22,8 @@ def load_data(seed=0, splitting_part=0.8, normalize=True): train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len yacht_trainset, yacht_testset = random_split(yacht_dataset, - lengths=[train_len, test_len]) + lengths=[train_len, test_len], + generator=torch.Generator().manual_seed(seed)) return yacht_trainset, yacht_testset diff --git a/Experiments/evaluate_california.py b/Experiments/evaluate_california.py deleted file mode 100644 index be631ef..0000000 --- a/Experiments/evaluate_california.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import numpy as np -import torch -import torch.backends.cudnn -from torch.utils.data import DataLoader -from torch.utils.tensorboard.writer import SummaryWriter - -from EIVArchitectures import Networks, initialize_weights -from EIVData.california_housing import load_data -from EIVTrainingRoutines import train_and_store, loss_functions - -from train_noneiv_california import p, init_std_y_list, seed_list, unscaled_reg, hidden_layers - - -train_data, test_data = load_data() -test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), 800)))) - -seed = seed_list[0] -init_std_y = init_std_y_list[0] -saved_file = os.path.join('saved_networks', - f'noneiv_california'\ - f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ - f'_p_{p:.2f}_seed_{seed}.pkl') - -input_dim = train_data[0][0].numel() -output_dim = train_data[0][1].numel() -net = Networks.FNNBer(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim]) -train_and_store.open_stored_training(saved_file=saved_file, - net=net) - - -# RMSE -x,y = next(iter(test_dataloader)) -out = net(x)[0] -if len(y.shape) <=1: - y = y.view((-1,1)) -assert y.shape == out.shape -res = y-out -scale = train_data.dataset.std_labels -scaled_res = res * scale.view((1,-1)) -scaled_res = scaled_res.detach().cpu().numpy().flatten() -rmse = np.sqrt(np.mean(scaled_res**2)) -print(f'RMSE {rmse:.3f}') - - -# NLL -x,y = next(iter(test_dataloader)) -training_state = net.training -net.train() -logdens = net.predictive_logdensity(x, y, number_of_draws=100, - decouple_dimensions=True, - scale_labels=train_data.dataset.std_labels.view((-1,))).mean() -if training_state: - net.train() -else: - net.eval() -print(f'Dropout predictive {logdens:.3f}') diff --git a/Experiments/evaluate_energy.py b/Experiments/evaluate_energy.py deleted file mode 100644 index 37a03ca..0000000 --- a/Experiments/evaluate_energy.py +++ /dev/null @@ -1,121 +0,0 @@ -import os -import numpy as np -import torch -import torch.backends.cudnn -from torch.utils.data import DataLoader -from torch.utils.tensorboard.writer import SummaryWriter - -from EIVArchitectures import Networks, initialize_weights -from EIVData.energy_efficiency import load_data -from EIVTrainingRoutines import train_and_store, loss_functions - -print('Non-EiV') -from train_noneiv_energy import p, init_std_y_list, seed_list, unscaled_reg, hidden_layers - -train_data, test_data = load_data() -test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), 800)))) - -seed = seed_list[0] -init_std_y = init_std_y_list[0] -saved_file = os.path.join('saved_networks', - f'noneiv_energy'\ - f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ - f'_p_{p:.2f}_seed_{seed}.pkl') - -input_dim = train_data[0][0].numel() -output_dim = train_data[0][1].numel() -net = Networks.FNNBer(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim]) -train_and_store.open_stored_training(saved_file=saved_file, - net=net) - - -# RMSE -x,y = next(iter(test_dataloader)) -training_state = net.training -net.train() -out, sigmas = net.predict(x, number_of_draws=100, take_average_of_prediction=True) -if len(y.shape) <=1: - y = y.view((-1,1)) -assert y.shape == out.shape -res = y-out -scale = train_data.dataset.std_labels -scaled_res = res * scale.view((1,-1)) -scaled_res = scaled_res.detach().cpu().numpy().flatten() -rmse = np.sqrt(np.mean(scaled_res**2)) -print(f'RMSE {rmse:.3f}') - - -# NLL -x,y = next(iter(test_dataloader)) -training_state = net.training -net.train() -logdens = net.predictive_logdensity(x, y, number_of_draws=100, - decouple_dimensions=True, - scale_labels=train_data.dataset.std_labels.view((-1,))).mean() -if training_state: - net.train() -else: - net.eval() -print(f'Dropout predictive {logdens:.3f}') - -print('EiV') -from train_eiv_energy import p, init_std_y_list, seed_list, unscaled_reg, hidden_layers, fixed_std_x - -train_data, test_data = load_data() -test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), 800)))) - -seed = seed_list[0] -init_std_y = init_std_y_list[0] -saved_file = os.path.join('saved_networks', - f'eiv_energy'\ - f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ - f'_p_{p:.2f}_seed_{seed}.pkl') - -input_dim = train_data[0][0].numel() -output_dim = train_data[0][1].numel() -net = Networks.FNNEIV(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim], fixed_std_x=fixed_std_x) -train_and_store.open_stored_training(saved_file=saved_file, - net=net) - - -# RMSE -x,y = next(iter(test_dataloader)) -training_state = net.training -noise_state = net.noise_is_on -net.train() -net.noise_on() -out = net.predict(x, number_of_draws=500, take_average_of_prediction=True)[0] -if len(y.shape) <=1: - y = y.view((-1,1)) -assert y.shape == out.shape -res = y-out -scale = train_data.dataset.std_labels -scaled_res = res * scale.view((1,-1)) -scaled_res = scaled_res.detach().cpu().numpy().flatten() -rmse = np.sqrt(np.mean(scaled_res**2)) -if training_state: - net.train() -else: - net.eval() -if noise_state: - net.noise_on() -else: - net.noise_off() -print(f'RMSE {rmse:.3f}') - - -# NLL -x,y = next(iter(test_dataloader)) -training_state = net.training -net.train() -logdens = net.predictive_logdensity(x, y, number_of_draws=100, - decouple_dimensions=True, - scale_labels=train_data.dataset.std_labels.view((-1,))).mean() -if training_state: - net.train() -else: - net.eval() -print(f'Dropout predictive {logdens:.3f}') - diff --git a/Experiments/evaluate_kin8nm.py b/Experiments/evaluate_kin8nm.py deleted file mode 100644 index a9d8ae6..0000000 --- a/Experiments/evaluate_kin8nm.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import numpy as np -import torch -import torch.backends.cudnn -from torch.utils.data import DataLoader -from torch.utils.tensorboard.writer import SummaryWriter - -from EIVArchitectures import Networks, initialize_weights -from EIVData.kin8nm import load_data -from EIVTrainingRoutines import train_and_store, loss_functions - -from train_noneiv_kin8nm import p, init_std_y_list, seed_list, unscaled_reg, hidden_layers - - -train_data, test_data = load_data() -test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), 800)))) - -seed = seed_list[0] -init_std_y = init_std_y_list[0] -saved_file = os.path.join('saved_networks', - f'noneiv_kin8nm'\ - f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ - f'_p_{p:.2f}_seed_{seed}.pkl') - -input_dim = train_data[0][0].numel() -output_dim = train_data[0][1].numel() -net = Networks.FNNBer(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim]) -train_and_store.open_stored_training(saved_file=saved_file, - net=net) - - -# RMSE -x,y = next(iter(test_dataloader)) -out = net(x)[0] -if len(y.shape) <=1: - y = y.view((-1,1)) -assert y.shape == out.shape -res = y-out -scale = train_data.dataset.std_labels -scaled_res = res * scale.view((1,-1)) -scaled_res = scaled_res.detach().cpu().numpy().flatten() -rmse = np.sqrt(np.mean(scaled_res**2)) -print(f'RMSE {rmse:.3f}') - - -# NLL -x,y = next(iter(test_dataloader)) -training_state = net.training -net.train() -logdens = net.predictive_logdensity(x, y, number_of_draws=100, - decouple_dimensions=True, - scale_labels=train_data.dataset.std_labels.view((-1,))).mean() -if training_state: - net.train() -else: - net.eval() -print(f'Dropout predictive {logdens:.3f}') diff --git a/Experiments/evaluate_msd.py b/Experiments/evaluate_msd.py deleted file mode 100644 index 041b755..0000000 --- a/Experiments/evaluate_msd.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import numpy as np -import torch -import torch.backends.cudnn -from torch.utils.data import DataLoader -from torch.utils.tensorboard.writer import SummaryWriter - -from EIVArchitectures import Networks, initialize_weights -from EIVData.million_song import load_data -from EIVTrainingRoutines import train_and_store, loss_functions - -from train_noneiv_msd import p, init_std_y_list, seed_list, unscaled_reg, hidden_layers - - -train_data, test_data = load_data() -test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), 800)))) - -seed = seed_list[0] -init_std_y = init_std_y_list[0] -saved_file = os.path.join('saved_networks', - f'noneiv_msd'\ - f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ - f'_p_{p:.2f}_seed_{seed}.pkl') - -input_dim = train_data[0][0].numel() -output_dim = train_data[0][1].numel() -net = Networks.FNNBer(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim]) -train_and_store.open_stored_training(saved_file=saved_file, - net=net) - - -# RMSE -x,y = next(iter(test_dataloader)) -out = net(x)[0] -if len(y.shape) <=1: - y = y.view((-1,1)) -assert y.shape == out.shape -res = y-out -scale = train_data.dataset.std_labels -scaled_res = res * scale.view((1,-1)) -scaled_res = scaled_res.detach().cpu().numpy().flatten() -rmse = np.sqrt(np.mean(scaled_res**2)) -print(f'RMSE {rmse:.3f}') - - -# NLL -x,y = next(iter(test_dataloader)) -training_state = net.training -net.train() -logdens = net.predictive_logdensity(x, y, number_of_draws=100, - decouple_dimensions=True, - scale_labels=train_data.dataset.std_labels.view((-1,))).mean() -if training_state: - net.train() -else: - net.eval() -print(f'Dropout predictive {logdens:.3f}') diff --git a/Experiments/evaluate_naval.py b/Experiments/evaluate_naval.py deleted file mode 100644 index 63c88a3..0000000 --- a/Experiments/evaluate_naval.py +++ /dev/null @@ -1,81 +0,0 @@ -import os -import numpy as np -import torch -import torch.backends.cudnn -from torch.utils.data import DataLoader -from torch.utils.tensorboard.writer import SummaryWriter - -from EIVArchitectures import Networks, initialize_weights -from EIVData.naval_propulsion import load_data -from EIVTrainingRoutines import train_and_store, loss_functions - -from train_noneiv_naval import p, init_std_y_list, seed_list, unscaled_reg, hidden_layers - - -train_data, test_data = load_data() -test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), 800)))) - -seed = seed_list[0] -init_std_y = init_std_y_list[0] -saved_file = os.path.join('saved_networks', - f'noneiv_naval'\ - f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ - f'_p_{p:.2f}_seed_{seed}.pkl') - -input_dim = train_data[0][0].numel() -output_dim = train_data[0][1].numel() -net = Networks.FNNBer(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim]) -train_and_store.open_stored_training(saved_file=saved_file, - net=net) - - -# # RMSE -# x,y = next(iter(test_dataloader)) -# training_state = net.training -# net.eval() -# out = net(x)[0] -# if len(y.shape) <=1: -# y = y.view((-1,1)) -# assert y.shape == out.shape -# res = y-out -# if training_state: -# net.train() -# scale = train_data.dataset.std_labels -# scaled_res = res * scale.view((1,-1)) -# scaled_res = scaled_res.detach().cpu().numpy().flatten() -# rmse = np.sqrt(np.mean(scaled_res**2)) -# print(f'no Dropout RMSE {rmse:.3f}') - -# # RMSE with prediction -# x,y = next(iter(test_dataloader)) -# training_state = net.training -# net.train() -# out = net.predict(x, number_of_draws=100)[0] -# if len(y.shape) <=1: -# y = y.view((-1,1)) -# assert y.shape == out.shape -# res = y-out -# if training_state: -# net.train() -# else: -# net.eval() -# scale = train_data.dataset.std_labels -# scaled_res = res * scale.view((1,-1)) -# scaled_res = scaled_res.detach().cpu().numpy().flatten() -# rmse = np.sqrt(np.mean(scaled_res**2)) -# print(f'Dropout predictive RMSE {rmse:.3f}') - - -# NLL -x,y = next(iter(test_dataloader)) -training_state = net.training -net.train() -logdens = net.predictive_logdensity(x, y, number_of_draws=100, - decouple_dimensions=True, - scale_labels=train_data.dataset.std_labels.view((-1,))).mean() -if training_state: - net.train() -else: - net.eval() -print(f'Dropout predictive {logdens:.3f}') diff --git a/Experiments/evaluate_power.py b/Experiments/evaluate_power.py deleted file mode 100644 index bc4d328..0000000 --- a/Experiments/evaluate_power.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import numpy as np -import torch -import torch.backends.cudnn -from torch.utils.data import DataLoader -from torch.utils.tensorboard.writer import SummaryWriter - -from EIVArchitectures import Networks, initialize_weights -from EIVData.power_plant import load_data -from EIVTrainingRoutines import train_and_store, loss_functions - -from train_noneiv_power import p, init_std_y_list, seed_list, unscaled_reg, hidden_layers - - -train_data, test_data = load_data() -test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), 800)))) - -seed = seed_list[0] -init_std_y = init_std_y_list[0] -saved_file = os.path.join('saved_networks', - f'noneiv_power'\ - f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ - f'_p_{p:.2f}_seed_{seed}.pkl') - -input_dim = train_data[0][0].numel() -output_dim = train_data[0][1].numel() -net = Networks.FNNBer(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim]) -train_and_store.open_stored_training(saved_file=saved_file, - net=net) - - -# RMSE -x,y = next(iter(test_dataloader)) -out = net(x)[0] -if len(y.shape) <=1: - y = y.view((-1,1)) -assert y.shape == out.shape -res = y-out -scale = train_data.dataset.std_labels -scaled_res = res * scale.view((1,-1)) -scaled_res = scaled_res.detach().cpu().numpy().flatten() -rmse = np.sqrt(np.mean(scaled_res**2)) -print(f'RMSE {rmse:.3f}') - - -# NLL -x,y = next(iter(test_dataloader)) -training_state = net.training -net.train() -logdens = net.predictive_logdensity(x, y, number_of_draws=100, - decouple_dimensions=True, - scale_labels=train_data.dataset.std_labels.view((-1,))).mean() -if training_state: - net.train() -else: - net.eval() -print(f'Dropout predictive {logdens:.3f}') diff --git a/Experiments/evaluate_protein.py b/Experiments/evaluate_protein.py deleted file mode 100644 index de32d3c..0000000 --- a/Experiments/evaluate_protein.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import numpy as np -import torch -import torch.backends.cudnn -from torch.utils.data import DataLoader -from torch.utils.tensorboard.writer import SummaryWriter - -from EIVArchitectures import Networks, initialize_weights -from EIVData.protein_structure import load_data -from EIVTrainingRoutines import train_and_store, loss_functions - -from train_noneiv_protein import p, init_std_y_list, seed_list, unscaled_reg, hidden_layers - - -train_data, test_data = load_data() -test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), 800)))) - -seed = seed_list[0] -init_std_y = init_std_y_list[0] -saved_file = os.path.join('saved_networks', - f'noneiv_protein'\ - f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ - f'_p_{p:.2f}_seed_{seed}.pkl') - -input_dim = train_data[0][0].numel() -output_dim = train_data[0][1].numel() -net = Networks.FNNBer(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim]) -train_and_store.open_stored_training(saved_file=saved_file, - net=net) - - -# RMSE -x,y = next(iter(test_dataloader)) -out = net(x)[0] -if len(y.shape) <=1: - y = y.view((-1,1)) -assert y.shape == out.shape -res = y-out -scale = train_data.dataset.std_labels -scaled_res = res * scale.view((1,-1)) -scaled_res = scaled_res.detach().cpu().numpy().flatten() -rmse = np.sqrt(np.mean(scaled_res**2)) -print(f'RMSE {rmse:.3f}') - - -# NLL -x,y = next(iter(test_dataloader)) -training_state = net.training -net.train() -logdens = net.predictive_logdensity(x, y, number_of_draws=100, - decouple_dimensions=True, - scale_labels=train_data.dataset.std_labels.view((-1,))).mean() -if training_state: - net.train() -else: - net.eval() -print(f'Dropout predictive {logdens:.3f}') diff --git a/Experiments/evaluate_tabular.py b/Experiments/evaluate_tabular.py index e13f9d5..54b2078 100644 --- a/Experiments/evaluate_tabular.py +++ b/Experiments/evaluate_tabular.py @@ -1,5 +1,6 @@ import importlib import os +import matplotlib import numpy as np import torch @@ -25,8 +26,8 @@ input_dim = train_data[0][0].numel() output_dim = train_data[0][1].numel() def collect_metrics(x,y, seed=0, - noneiv_number_of_draws=100, eiv_number_of_draws=[100,1], - decouple_dimensions=False, device=torch.device('cuda:1')): + noneiv_number_of_draws=100, eiv_number_of_draws=[100,1], + decouple_dimensions=False, device=torch.device('cuda:1')): """ :param x: A torch.tensor, taken as input :param y: A torch.tensor, taken as output @@ -144,7 +145,7 @@ noneiv_rmse_collection = [] noneiv_logdens_collection = [] eiv_rmse_collection = [] eiv_logdens_collection = [] -num_test_epochs = 20 +num_test_epochs = 30 assert train_noneiv.seed_list == train_eiv.seed_list seed_list = train_noneiv.seed_list for seed in tqdm(seed_list): @@ -162,15 +163,13 @@ for seed in tqdm(seed_list): eiv_logdens_collection.append(eiv_logdens) -# TODO: Despite statistics, the fluctuations seem to be large -# TODO: fix sqrt scaling, missing factor print('Non-EiV') -print(f'RMSE {np.mean(noneiv_rmse_collection):.3f}'\ - f'({np.std(noneiv_rmse_collection)/np.sqrt(num_test_epochs):.3f})') -print(f'LogDens {np.mean(noneiv_logdens_collection):.3f}'\ - f'({np.std(noneiv_logdens_collection)/np.sqrt(num_test_epochs):.3f})') +print(f'RMSE {np.mean(noneiv_rmse_collection):.5f}'\ + f'({np.std(noneiv_rmse_collection)/np.sqrt(num_test_epochs*len(seed_list)):.5f})') +print(f'LogDens {np.mean(noneiv_logdens_collection):.5f}'\ + f'({np.std(noneiv_logdens_collection)/np.sqrt(num_test_epochs*len(seed_list)):.5f})') print('EiV') -print(f'RMSE {np.mean(eiv_rmse_collection):.3f}'\ - f'({np.std(eiv_rmse_collection)/np.sqrt(num_test_epochs):.3f})') -print(f'LogDens {np.mean(eiv_logdens_collection):.3f}'\ - f'({np.std(eiv_logdens_collection)/np.sqrt(num_test_epochs):.3f})') +print(f'RMSE {np.mean(eiv_rmse_collection):.5f}'\ + f'({np.std(eiv_rmse_collection)/np.sqrt(num_test_epochs*len(seed_list)):.5f})') +print(f'LogDens {np.mean(eiv_logdens_collection):.5f}'\ + f'({np.std(eiv_logdens_collection)/np.sqrt(num_test_epochs*len(seed_list)):.5f})') diff --git a/Experiments/evaluate_wine.py b/Experiments/evaluate_wine.py deleted file mode 100644 index 02be10d..0000000 --- a/Experiments/evaluate_wine.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import numpy as np -import torch -import torch.backends.cudnn -from torch.utils.data import DataLoader -from torch.utils.tensorboard.writer import SummaryWriter - -from EIVArchitectures import Networks, initialize_weights -from EIVData.wine_quality import load_data -from EIVTrainingRoutines import train_and_store, loss_functions - -from train_noneiv_wine import p, init_std_y_list, seed_list, unscaled_reg, hidden_layers - - -train_data, test_data = load_data() -test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), 800)))) - -seed = seed_list[0] -init_std_y = init_std_y_list[0] -saved_file = os.path.join('saved_networks', - f'noneiv_wine'\ - f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ - f'_p_{p:.2f}_seed_{seed}.pkl') - -input_dim = train_data[0][0].numel() -output_dim = train_data[0][1].numel() -net = Networks.FNNBer(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim]) -train_and_store.open_stored_training(saved_file=saved_file, - net=net) - - -# RMSE -x,y = next(iter(test_dataloader)) -out = net(x)[0] -if len(y.shape) <=1: - y = y.view((-1,1)) -assert y.shape == out.shape -res = y-out -scale = train_data.dataset.std_labels -scaled_res = res * scale.view((1,-1)) -scaled_res = scaled_res.detach().cpu().numpy().flatten() -rmse = np.sqrt(np.mean(scaled_res**2)) -print(f'RMSE {rmse:.3f}') - - -# NLL -x,y = next(iter(test_dataloader)) -training_state = net.training -net.train() -logdens = net.predictive_logdensity(x, y, number_of_draws=100, - decouple_dimensions=True, - scale_labels=train_data.dataset.std_labels.view((-1,))).mean() -if training_state: - net.train() -else: - net.eval() -print(f'Dropout predictive {logdens:.3f}') diff --git a/Experiments/evaluate_yacht.py b/Experiments/evaluate_yacht.py deleted file mode 100644 index 842674d..0000000 --- a/Experiments/evaluate_yacht.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import numpy as np -import torch -import torch.backends.cudnn -from torch.utils.data import DataLoader -from torch.utils.tensorboard.writer import SummaryWriter - -from EIVArchitectures import Networks, initialize_weights -from EIVData.yacht_hydrodynamics import load_data -from EIVTrainingRoutines import train_and_store, loss_functions - -from train_noneiv_yacht import p, init_std_y_list, seed_list, unscaled_reg, hidden_layers - - -train_data, test_data = load_data() -test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), 800)))) - -seed = seed_list[0] -init_std_y = init_std_y_list[0] -saved_file = os.path.join('saved_networks', - f'noneiv_yacht'\ - f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ - f'_p_{p:.2f}_seed_{seed}.pkl') - -input_dim = train_data[0][0].numel() -output_dim = train_data[0][1].numel() -net = Networks.FNNBer(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim]) -train_and_store.open_stored_training(saved_file=saved_file, - net=net) - - -# RMSE -x,y = next(iter(test_dataloader)) -out = net(x)[0] -if len(y.shape) <=1: - y = y.view((-1,1)) -assert y.shape == out.shape -res = y-out -scale = train_data.dataset.std_labels -scaled_res = res * scale.view((1,-1)) -scaled_res = scaled_res.detach().cpu().numpy().flatten() -rmse = np.sqrt(np.mean(scaled_res**2)) -print(f'RMSE {rmse:.3f}') - - -# NLL -x,y = next(iter(test_dataloader)) -training_state = net.training -net.train() -logdens = net.predictive_logdensity(x, y, number_of_draws=100, - decouple_dimensions=True, - scale_labels=train_data.dataset.std_labels.view((-1,))).mean() -if training_state: - net.train() -else: - net.eval() -print(f'Dropout predictive {logdens:.3f}') -- GitLab