diff --git a/EIVPackage/EIVGeneral/coverage_collect.py b/EIVPackage/EIVGeneral/coverage_collect.py index 6a3dbb7a0164f8804606c4eaa0262d1977995254..934eaac864beb0425447fa3be2b5442abe3a04a3 100644 --- a/EIVPackage/EIVGeneral/coverage_collect.py +++ b/EIVPackage/EIVGeneral/coverage_collect.py @@ -4,6 +4,7 @@ Collect coverages for various coverage factors, networks and dataloaders. import numpy as np import torch import torch.backends.cudnn +from EIVGeneral.coverage_metrics import epistemic_coverage def get_coverages(not_averaged_predictions, y,\ q_range=np.linspace(0.1,0.9,num=30)): @@ -60,6 +61,8 @@ def get_coverage_distribution(net_iterator, dataloader_iterator, not_av_pred_collection_out, not_av_pred_collection_sigma,\ y_collection = [], [], [] for i, (x,y) in enumerate(dataloader): + if i>= number_of_test_samples: + break x, y = x.to(device), y.to(device) not_averaged_predictions = net.predict(x, take_average_of_prediction=False, diff --git a/Experiments/plot_coverage.py b/Experiments/plot_coverage.py index cac67c28a61983e1149652971a749856b422b8b2..2b625eb6e1c9ea34bf3464af4cc4b5e1bf53055f 100644 --- a/Experiments/plot_coverage.py +++ b/Experiments/plot_coverage.py @@ -6,7 +6,6 @@ import importlib import os import json -import numpy as np import torch import torch.backends.cudnn from torch.utils.data import DataLoader @@ -18,144 +17,158 @@ from EIVGeneral.coverage_collect import get_coverage_distribution from EIVGeneral.manipulate_datasets import VerticalCut -# read in data via --data option -data = 'linear' - # load hyperparameters from JSON file -with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as conf_file: - eiv_conf_dict = json.load(conf_file) -with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as conf_file: - noneiv_conf_dict = json.load(conf_file) +def get_coverages(data, eiv, number_of_draws, use_ground_truth=False): + """ + Create network and dataloader iterators for `data` (short dataname) and + feed them into `get_coverage_distribution`. + :data: String, short dataname + :eiv: Boolean. If True an EiV model is used, else an non-EiV model. + :number_of_draws: Number of draws to use for prediction. Take an int for + non-EiV models and a two-element list for EiV models. + :use_ground_truth: If True, unnoisy `y` are considered when computing the + coverage. If there is no ground truth None,None is returned + :returns: numerical_coverage, theoretical_coverage + """ + # load configuration file + if eiv: + with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as\ + conf_file: + conf_dict = json.load(conf_file) + else: + with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as\ + conf_file: + conf_dict = json.load(conf_file) -long_dataname = eiv_conf_dict["long_dataname"] -short_dataname = eiv_conf_dict["short_dataname"] + long_dataname = conf_dict["long_dataname"] + short_dataname = conf_dict["short_dataname"] -print(f"Plotting coverage for {long_dataname}") + print(f"Plotting coverage for {long_dataname}") -scale_outputs = False -load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data + load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data -try: - gpu_number = eiv_conf_dict["gpu_number"] - device = torch.device(f'cuda:{gpu_number}') + # switch to gpu, if possible try: - torch.tensor([0.0]).to(device) - except RuntimeError: - if torch.cuda.is_available(): - print('Switched to GPU 0') - device = torch.device('cuda:0') - else: - print('No cuda available, using CPU') - device = torch.device('cpu') -except KeyError: - device = torch.device('cpu') - - -# test whether there is a ground truth -try: - train_data, test_data, true_train_data, true_test_data \ - = load_data(seed=0, return_ground_truth=True) - ground_truth_exists = True -except TypeError: - train_data, test_data = load_data(seed=0) - true_train_data, true_test_data = None, None - ground_truth_exists = False - -train_data, test_data = load_data() -input_dim = train_data[0][0].numel() -output_dim = train_data[0][1].numel() - -## Create iterators -seed_list = range(noneiv_conf_dict["seed_range"][0], - noneiv_conf_dict["seed_range"][1]) - -# networks -def net_iterator(eiv=True, seed_list=seed_list): - if eiv: - init_std_y = eiv_conf_dict["init_std_y_list"][0] - unscaled_reg = eiv_conf_dict["unscaled_reg"] - p = eiv_conf_dict["p"] - hidden_layers = eiv_conf_dict["hidden_layers"] - fixed_std_x = eiv_conf_dict["fixed_std_x"] - net = Networks.FNNEIV(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim], - fixed_std_x=fixed_std_x).to(device) - for seed in seed_list: - saved_file = os.path.join('saved_networks', - f'eiv_{short_dataname}'\ - f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ - f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\ - f'_seed_{seed}.pkl') - train_and_store.open_stored_training(saved_file=saved_file, - net=net, device=device) - yield net + gpu_number = conf_dict["gpu_number"] + device = torch.device(f'cuda:{gpu_number}') + try: + torch.tensor([0.0]).to(device) + except RuntimeError: + if torch.cuda.is_available(): + print('Switched to GPU 0') + device = torch.device('cuda:0') + else: + print('No cuda available, using CPU') + device = torch.device('cpu') + except KeyError: + device = torch.device('cpu') + + + if use_ground_truth: + # test whether there is a ground truth + try: + train_data, _, _,_ \ + = load_data(seed=0, return_ground_truth=True) + except TypeError: + # if not, end function + return None,None else: - init_std_y = noneiv_conf_dict["init_std_y_list"][0] - unscaled_reg = noneiv_conf_dict["unscaled_reg"] - p = noneiv_conf_dict["p"] - hidden_layers = noneiv_conf_dict["hidden_layers"] - net = Networks.FNNBer(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim]).to(device) - for seed in seed_list: - saved_file = os.path.join('saved_networks', - f'noneiv_{short_dataname}'\ - f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ - f'_p_{p:.2f}_seed_{seed}.pkl') - train_and_store.open_stored_training(saved_file=saved_file, - net=net, device=device) - yield net - -# dataloaders -def dataloader_iterator(seed_list=seed_list, use_ground_truth=False, - batch_size = 100): - for seed in seed_list: - if not use_ground_truth: - train_data, test_data = load_data(seed=seed) - test_dataloader = DataLoader(test_data, - batch_size=batch_size, - shuffle=True) - yield test_dataloader + train_data, _ = load_data() + + + # train_data only used for finding dimensions + input_dim = train_data[0][0].numel() + output_dim = train_data[0][1].numel() + + ## Create iterators for get_coverage_distribution + seed_list = range(conf_dict["seed_range"][0], + conf_dict["seed_range"][1]) + + # iterator for networks + def net_iterator(eiv=eiv, seed_list=seed_list): + """ + Yields EiV models (if `eiv`) or + non-EiV models (if not `eiv`) for the seeds in + `seed_list` and `data`. + """ + if eiv: + # load parameters + init_std_y = conf_dict["init_std_y_list"][0] + unscaled_reg = conf_dict["unscaled_reg"] + p = conf_dict["p"] + hidden_layers = conf_dict["hidden_layers"] + fixed_std_x = conf_dict["fixed_std_x"] + net = Networks.FNNEIV(p=p, init_std_y=init_std_y, + h=[input_dim, *hidden_layers, output_dim], + fixed_std_x=fixed_std_x).to(device) + for seed in seed_list: + # load network paramaters + saved_file = os.path.join('saved_networks', + f'eiv_{short_dataname}'\ + f'_init_std_y_{init_std_y:.3f}'\ + f'_ureg_{unscaled_reg:.1f}'\ + f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\ + f'_seed_{seed}.pkl') + train_and_store.open_stored_training(saved_file=saved_file, + net=net, device=device) + yield net else: - assert ground_truth_exists - _, _, _, true_test =\ - load_data(seed=seed, return_ground_truth=True) - # take noisy x but unnoisy y - cut_true_test = VerticalCut(true_test, components_to_pick=[2,1]) - test_dataloader = DataLoader(cut_true_test, - batch_size=batch_size, - shuffle=True) - yield test_dataloader - - - - -eiv_numerical_coverage, eiv_theoretical_coverage = get_coverage_distribution( - net_iterator=net_iterator(eiv=True), - dataloader_iterator=dataloader_iterator(), - device=device, - number_of_draws=[100,5]) -mean_eiv_theoretical_coverage = np.mean(eiv_theoretical_coverage, axis=1) -std_eiv_theoretical_coverage = np.std(eiv_theoretical_coverage, axis=1) -mean_eiv_numerical_coverage = np.mean(eiv_numerical_coverage, axis=1) -std_eiv_numerical_coverage = np.std(eiv_numerical_coverage, axis=1) -noneiv_numerical_coverage, noneiv_theoretical_coverage = get_coverage_distribution( - net_iterator=net_iterator(eiv=False), + # load parameters + init_std_y = conf_dict["init_std_y_list"][0] + unscaled_reg = conf_dict["unscaled_reg"] + p = conf_dict["p"] + hidden_layers = conf_dict["hidden_layers"] + net = Networks.FNNBer(p=p, init_std_y=init_std_y, + h=[input_dim, *hidden_layers, output_dim]).to(device) + for seed in seed_list: + saved_file = os.path.join('saved_networks', + f'noneiv_{short_dataname}'\ + f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ + f'_p_{p:.2f}_seed_{seed}.pkl') + # load network paramaters + train_and_store.open_stored_training(saved_file=saved_file, + net=net, device=device) + yield net + + # iterator for dataloaders + def dataloader_iterator(seed_list=seed_list,use_ground_truth=use_ground_truth, + batch_size = 100): + """ + Yields dataloaders for `data`, according to the seeds in `seed_list`. + If `use_ground_truth` the data is cut to contain noisy x and unnoisy y. + """ + for seed in seed_list: + if not use_ground_truth: + _, test_data = load_data(seed=seed) + test_dataloader = DataLoader(test_data, + batch_size=batch_size, + shuffle=True) + yield test_dataloader + else: + _, _, _, true_test =\ + load_data(seed=seed, return_ground_truth=True) + # take noisy x but unnoisy y + cut_true_test = VerticalCut(true_test, + components_to_pick=[2,1]) + test_dataloader = DataLoader(cut_true_test, + batch_size=batch_size, + shuffle=True) + yield test_dataloader + + + # Compute coverages + numerical_coverage, theoretical_coverage = get_coverage_distribution( + net_iterator=net_iterator(eiv=eiv), dataloader_iterator=dataloader_iterator(), device=device, - number_of_draws=100) -mean_noneiv_theoretical_coverage = np.mean(noneiv_theoretical_coverage, axis=1) -std_noneiv_theoretical_coverage = np.std(noneiv_theoretical_coverage, axis=1) -mean_noneiv_numerical_coverage = np.mean(noneiv_numerical_coverage, axis=1) -std_noneiv_numerical_coverage = np.std(noneiv_numerical_coverage, axis=1) -plt.plot(mean_eiv_theoretical_coverage, mean_eiv_numerical_coverage, color='r', label='EiV') -plt.fill_between(mean_eiv_theoretical_coverage, mean_eiv_numerical_coverage - - std_eiv_numerical_coverage, - mean_eiv_numerical_coverage + std_eiv_numerical_coverage, color='r', alpha=0.5) -plt.plot(mean_noneiv_theoretical_coverage, mean_noneiv_numerical_coverage, color='b', label='nonEiV') -plt.fill_between(mean_noneiv_theoretical_coverage, mean_noneiv_numerical_coverage - - std_noneiv_numerical_coverage, - mean_noneiv_numerical_coverage + std_noneiv_numerical_coverage, color='b', alpha=0.5) -diag_x = np.linspace(0, np.max(mean_eiv_numerical_coverage)) -plt.plot(diag_x, diag_x, 'k--') -plt.show() + number_of_draws=number_of_draws) + return numerical_coverage, theoretical_coverage + + +##### +# numerical_coverage, theoretical_coverage =\ +# get_coverages(data='quadratic', eiv=True, number_of_draws=100, +# use_ground_truth=True) +# print(numerical_coverage) +# print(theoretical_coverage)