diff --git a/EIVPackage/EIVData/cubic.py b/EIVPackage/EIVData/cubic.py index dce6ea452fba08280d457e995295d5fe87d46d6d..9d5e3979db7af0e236536a9b4e8f6052fa43874e 100644 --- a/EIVPackage/EIVData/cubic.py +++ b/EIVPackage/EIVData/cubic.py @@ -9,8 +9,8 @@ total_number_of_datapoints = 1000 input_range = [-1,1] slope = 1.0 intercept = 0.0 -x_noise_strength = 0.1 -y_noise_strength = 0.1 +x_noise_strength = 0.2 +y_noise_strength = 0.05 func = lambda true_x: slope * true_x**3 + intercept def load_data(seed=0, splitting_part=0.8, normalize=True, diff --git a/EIVPackage/EIVData/linear.py b/EIVPackage/EIVData/linear.py index 97d2cd951e0d86650c9ab85af17d5ceb96312375..2a1b0d7a4da8e81db9c90376ec3386d2f466aec3 100644 --- a/EIVPackage/EIVData/linear.py +++ b/EIVPackage/EIVData/linear.py @@ -10,7 +10,7 @@ input_range = [-1,1] slope = 1.0 intercept = 0.0 x_noise_strength = 0.1 -y_noise_strength = 0.1 +y_noise_strength = 0.2 func = lambda true_x: slope * true_x + intercept def load_data(seed=0, splitting_part=0.8, normalize=True, diff --git a/EIVPackage/EIVData/sine.py b/EIVPackage/EIVData/sine.py index cde80d349eef58cadab44b79f0937d52a90e8d41..2e9eabc4d1a4f38b5c716f3bd05b28548faaafc1 100644 --- a/EIVPackage/EIVData/sine.py +++ b/EIVPackage/EIVData/sine.py @@ -8,8 +8,8 @@ from EIVGeneral.manipulate_tensors import add_noise, normalize_tensor,\ total_number_of_datapoints = 2000 input_range = [-0.2,0.8] intercept = 0.0 -x_noise_strength = 0.02 -y_noise_strength = 0.02 +x_noise_strength = 0.04 +y_noise_strength = 0.01 func = lambda true_x: true_x +\ torch.sin(2 * torch.pi * true_x) +\ torch.sin(4 * torch.pi * true_x) diff --git a/EIVPackage/EIVGeneral/coverage_metrics.py b/EIVPackage/EIVGeneral/coverage_metrics.py index 7515497349f2e9636bd44b9c8955a9937a9e2224..4b6824d07b5e8b532f270ddb4121bd150b18486d 100644 --- a/EIVPackage/EIVGeneral/coverage_metrics.py +++ b/EIVPackage/EIVGeneral/coverage_metrics.py @@ -119,6 +119,57 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, theoretical_coverage = q return numerical_coverage, theoretical_coverage +def total_coverage(not_averaged_predictions, y, q=0.95): + """ + Returns the total coverage of (noisy) `y` by the interval + "predictions + total_unc * q-Interval", where + - "q-Interval" is the interval of measure `q` under the standard normal, + - "predictions" are the entries of the first component of the tuple + `not_averaged_predictions` averaged over their second dimension. + - total_unc is the total uncertainty computed from + `not_averaged_predictions`. + :param not_averaged_predictions: A tuple of tensors as in the output of + `FNNEIV.predict` with `take_average_of_prediction` set to `False`, i.e.: + the predictions of the neural net not averaged over the first dimension + (the repetition dimension in `FNNEIV.predict`) and + the aleatoric uncertainty with a batch dimension and a feature dimension. + :param y: A `torch.tensor` of the same shape then the second components + of `not_averaged_predictions`. If the feature dimension is missing, it is added. + :param q: A float between 0 and 1. Defaults to 0.95. + :returns: coverage + """ + out, sigmas = not_averaged_predictions + # add an output axis if necessary + if len(y.shape) <= 1: + y = y[...,None] + if len(sigmas.shape) <= 1: + sigmas = sigmas[...,None] + # squeeze last dimensions into one + y = y.view((y.shape[0], -1)) + sigmas = sigmas.view((sigmas.shape[0], -1)) + out = out.view((*out.shape[:2], -1)) + # check if dimensions are consistent + # compute epistemic uncertainty + epis_unc = torch.std(out, dim=1) + out = torch.mean(out, dim=1) + assert y.shape == sigmas.shape + assert y.shape == out.shape + assert epis_unc.shape == sigmas.shape + # compute total uncertainty + total_unc = torch.sqrt(epis_unc**2 + sigmas **2) + # fix interval based on epis_unc + out_dim = y.shape[1] + interval_length = multivariate_interval_length(dim=out_dim, q=q) \ + * total_unc + # numerical computation + errors = out - y + assert errors.shape == total_unc.shape + check_if_in_interval = logical_and_along_dimension( + torch.abs(errors) <= interval_length, dim=1) + coverage = torch.mean( + check_if_in_interval.to(torch.float32)).cpu().detach().item() + return coverage + def normalized_std(not_averaged_predictions, y): """ Returns the standard deviation of normalized residuals, averaged over the diff --git a/Experiments/configurations/eiv_cubic.json b/Experiments/configurations/eiv_cubic.json index 0e9f9e88edb30a99bd73a2814a92c4ca504218fd..59c37c13abd44984283d2afb579296a18dec8a75 100644 --- a/Experiments/configurations/eiv_cubic.json +++ b/Experiments/configurations/eiv_cubic.json @@ -13,10 +13,10 @@ "std_y_update_points": [1,40], "eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_batches": 10, - "init_std_y_list": [0.5], + "init_std_y_list": [0.05], "gamma": 0.5, "hidden_layers": [128, 128, 128, 128], - "fixed_std_x": 0.10, + "fixed_std_x": 0.20, "seed_range": [0,10], "gpu_number": 1 } diff --git a/Experiments/configurations/eiv_linear.json b/Experiments/configurations/eiv_linear.json index 8b7ebe0767411186d51ca18dde9d2d6aeb92de06..ce84949e64c4edfc4f0f6b100742f2822ca32f1c 100644 --- a/Experiments/configurations/eiv_linear.json +++ b/Experiments/configurations/eiv_linear.json @@ -13,7 +13,7 @@ "std_y_update_points": [1,40], "eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_batches": 10, - "init_std_y_list": [0.5], + "init_std_y_list": [0.1], "gamma": 0.5, "hidden_layers": [128, 128, 128, 128], "fixed_std_x": 0.10, diff --git a/Experiments/configurations/eiv_quadratic.json b/Experiments/configurations/eiv_quadratic.json index c774bbed09beb17e65c475fdfedfca1c795682a7..dc8a9cd87c17b40cec38ee086d4d1954ade2c62c 100644 --- a/Experiments/configurations/eiv_quadratic.json +++ b/Experiments/configurations/eiv_quadratic.json @@ -13,7 +13,7 @@ "std_y_update_points": [1,40], "eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_batches": 10, - "init_std_y_list": [0.5], + "init_std_y_list": [0.1], "gamma": 0.5, "hidden_layers": [128, 128, 128, 128], "fixed_std_x": 0.10, diff --git a/Experiments/configurations/eiv_sine.json b/Experiments/configurations/eiv_sine.json index 1195ea0891ab3dc49745b0c9b310416cc2ee0ba9..b6bf6d0d286644e120729899bc8b1c1a2854681e 100644 --- a/Experiments/configurations/eiv_sine.json +++ b/Experiments/configurations/eiv_sine.json @@ -14,10 +14,10 @@ "eiv_number_of_forward_draws": 10, "eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_batches": 10, - "init_std_y_list": [0.1], + "init_std_y_list": [0.01], "gamma": 0.5, "hidden_layers": [128, 128, 128, 128], - "fixed_std_x": 0.02, + "fixed_std_x": 0.04, "seed_range": [0,10], "gpu_number": 1 } diff --git a/Experiments/configurations/noneiv_cubic.json b/Experiments/configurations/noneiv_cubic.json index 6f71af1d77db1a357f0d68edfe192cc06578d3af..a325665d0c8e0bfe813abd2ae27ba02deaf4610c 100644 --- a/Experiments/configurations/noneiv_cubic.json +++ b/Experiments/configurations/noneiv_cubic.json @@ -13,7 +13,7 @@ "std_y_update_points": [1,40] , "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, - "init_std_y_list": [0.5], + "init_std_y_list": [0.05], "gamma": 0.5, "hidden_layers": [128, 128, 128, 128], "seed_range": [0,10], diff --git a/Experiments/configurations/noneiv_linear.json b/Experiments/configurations/noneiv_linear.json index ae3040e2b157bc7bd10cfd73d9f4f5448f4cf023..ff57b5686c5beeb2240c14bed889a3480d586429 100644 --- a/Experiments/configurations/noneiv_linear.json +++ b/Experiments/configurations/noneiv_linear.json @@ -13,7 +13,7 @@ "std_y_update_points": [1,40] , "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, - "init_std_y_list": [0.5], + "init_std_y_list": [0.1], "gamma": 0.5, "hidden_layers": [128, 128, 128, 128], "seed_range": [0,10], diff --git a/Experiments/configurations/noneiv_quadratic.json b/Experiments/configurations/noneiv_quadratic.json index 405263797ff8b9ffa3ef692540b0db5edc506cda..d858851a1dc0be2e52c3d12a579efe485aad7503 100644 --- a/Experiments/configurations/noneiv_quadratic.json +++ b/Experiments/configurations/noneiv_quadratic.json @@ -13,7 +13,7 @@ "std_y_update_points": [1,40] , "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, - "init_std_y_list": [0.5], + "init_std_y_list": [0.1], "gamma": 0.5, "hidden_layers": [128, 128, 128, 128], "seed_range": [0,10], diff --git a/Experiments/configurations/noneiv_sine.json b/Experiments/configurations/noneiv_sine.json index cc889194d725b1e167b1129d444e12720e2610a0..5191847de1cbf256a4b572a30a09f8dd22e0bea3 100644 --- a/Experiments/configurations/noneiv_sine.json +++ b/Experiments/configurations/noneiv_sine.json @@ -13,7 +13,7 @@ "std_y_update_points": [1,40] , "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, - "init_std_y_list": [0.1], + "init_std_y_list": [0.01], "gamma": 0.5, "hidden_layers": [128, 128, 128, 128], "seed_range": [0,10], diff --git a/Experiments/create_tabular.py b/Experiments/create_tabular.py index cc15d7c84243bc517954c38ec9e5c92d744bc2c4..a83dcb94ab6484b853dd4487e710a13543a27170 100644 --- a/Experiments/create_tabular.py +++ b/Experiments/create_tabular.py @@ -2,7 +2,7 @@ import os import glob import json -metrics_to_display = ['rmse','logdens','coverage_numerical','true_coverage_numerical'] +metrics_to_display = ['rmse','true_coverage_numerical', 'total_coverage'] show_incomplete = True list_of_result_files = glob.glob(os.path.join('results','*.json')) diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py index 6912c013b2fa24ed916632321303f616f0d7c767..06f1b37bc770112d0c4a71d395a650d535e8b226 100644 --- a/Experiments/evaluate_metrics.py +++ b/Experiments/evaluate_metrics.py @@ -15,7 +15,8 @@ from tqdm import tqdm from EIVArchitectures import Networks from EIVTrainingRoutines import train_and_store -from EIVGeneral.coverage_metrics import epistemic_coverage, normalized_std +from EIVGeneral.coverage_metrics import epistemic_coverage, normalized_std,\ + total_coverage from EIVData.repeated_sampling import repeated_sampling # read in data via --data option @@ -111,8 +112,11 @@ def collect_metrics(x_y_pairs, seed=0, f'_p_{p:.2f}_seed_{seed}.pkl') net = Networks.FNNBer(p=p, init_std_y=init_std_y, h=[input_dim, *hidden_layers, output_dim]).to(device) - train_and_store.open_stored_training(saved_file=saved_file, - net=net, device=device) + + # load network and extract std_y + noneiv_std_y = train_and_store.open_stored_training(saved_file=saved_file, + net=net, device=device)[3] + noneiv_metrics['std_y'] = noneiv_std_y.cpu()[-1].item() # RMSE @@ -137,6 +141,8 @@ def collect_metrics(x_y_pairs, seed=0, noneiv_metrics['coverage_numerical'], noneiv_metrics['coverage_theory'] =\ epistemic_coverage(not_averaged_predictions, y,\ normalize_errors=False) + noneiv_metrics['total_coverage'] =\ + total_coverage(not_averaged_predictions, y) noneiv_metrics['coverage_normalized'],_ =\ epistemic_coverage(not_averaged_predictions, y,\ normalize_errors=True) @@ -182,8 +188,11 @@ def collect_metrics(x_y_pairs, seed=0, net = Networks.FNNEIV(p=p, init_std_y=init_std_y, h=[input_dim, *hidden_layers, output_dim], fixed_std_x=fixed_std_x).to(device) - train_and_store.open_stored_training(saved_file=saved_file, - net=net) + + # load network and extract std_y + eiv_std_y = train_and_store.open_stored_training(saved_file=saved_file, + net=net, device=device)[3] + eiv_metrics['std_y'] = eiv_std_y.cpu()[-1].item() # RMSE training_state = net.training @@ -207,6 +216,8 @@ def collect_metrics(x_y_pairs, seed=0, eiv_metrics['bias' ]= np.mean(scaled_res) eiv_metrics['coverage_numerical'], eiv_metrics['coverage_theory'] =\ epistemic_coverage(not_averaged_predictions, y, normalize_errors=False) + eiv_metrics['total_coverage'] =\ + total_coverage(not_averaged_predictions, y) eiv_metrics['coverage_normalized'],_ =\ epistemic_coverage(not_averaged_predictions, y, normalize_errors=True) eiv_metrics['res_std' ]= normalized_std(not_averaged_predictions, y) diff --git a/Experiments/plot_coverage_vs_q.py b/Experiments/plot_coverage_vs_q.py new file mode 100644 index 0000000000000000000000000000000000000000..5cb7f8f143f65524c22e28ecd6bcf388059f04dd --- /dev/null +++ b/Experiments/plot_coverage_vs_q.py @@ -0,0 +1,255 @@ +""" +Compute the coverage for various coverage factors and compare them with +with the corresponding q or theoretical coverage. Results will be stored +in various plots in the results/figures folder. +""" +import importlib +import os +import json + +import numpy as np +import torch +import torch.backends.cudnn +from torch.utils.data import DataLoader +from matplotlib.pyplot import cm +import matplotlib.pyplot as plt +from tqdm import tqdm + +from EIVArchitectures import Networks +from EIVTrainingRoutines import train_and_store +from EIVGeneral.coverage_collect import get_coverage_distribution +from EIVGeneral.manipulate_datasets import VerticalCut + +# coverages to consider +q_range = np.linspace(0.1, 0.95) + +def coverage_diagonal_plot(eiv_coverages, noneiv_coverages, color, + against_theoretical = False, label = '', mean_error=True): + """ + Plot numerical coverages against q (used coverage value), if + against_theoretical is False, or the theoretical coverage, if + `against_theoretical` is True. + :param eiv_coverages: The output of `compute_coverages` with `eiv=True` + :param noneiv_coverages: The output of `compute_coverages` with `eiv=False` + :param color: String, denoting the color. + :param against_theoretical: Boolean, see above. + :param label: String. Will be included as label in the plot. + :param mean_error: Boolean. If True the standard deviation is divided + by the square root of the number of elements, to display the error + of the mean (and not the dispersion). + """ + eiv_numerical_coverage, eiv_theoretical_coverage = eiv_coverages + noneiv_numerical_coverage, noneiv_theoretical_coverage = noneiv_coverages + assert (len(eiv_numerical_coverage.shape)) == 2 + assert (len(noneiv_numerical_coverage.shape)) == 2 + # EiV + # take mean/std over seed dimension + mean_eiv_numerical_coverage = np.mean(eiv_numerical_coverage, axis=-1) + std_eiv_numerical_coverage = np.std(eiv_numerical_coverage, axis=-1) + if mean_error: + std_eiv_numerical_coverage /= np.sqrt(eiv_numerical_coverage.shape[1]) + if against_theoretical: + # show theoretical coverage on x-axis + x_values = np.mean(eiv_theoretical_coverage, axis=-1) + else: + # show q-range on x-axis + x_values = np.array(q_range) + # plot mean + plt.plot(x_values, mean_eiv_numerical_coverage, + color=color, linestyle='solid', label=label) + # plot std + plt.fill_between(x_values, + mean_eiv_numerical_coverage - std_eiv_numerical_coverage, + mean_eiv_numerical_coverage + std_eiv_numerical_coverage, + color=color, alpha=0.5) + # non-EiV + # take mean/std over seed dimension + mean_noneiv_numerical_coverage = np.mean(noneiv_numerical_coverage, axis=-1) + std_noneiv_numerical_coverage = np.std(noneiv_numerical_coverage, axis=-1) + if mean_error: + std_noneiv_numerical_coverage /= \ + np.sqrt(noneiv_numerical_coverage.shape[1]) + if against_theoretical: + # show theoretical coverage on x-axis + x_values = np.mean(noneiv_theoretical_coverage, axis=-1) + else: + # show q-range on x-axis + x_values = np.array(q_range) + # plot mean + plt.plot(x_values, mean_noneiv_numerical_coverage, + color=color, linestyle='dashed') + # plot std + plt.fill_between(x_values, + mean_noneiv_numerical_coverage - std_noneiv_numerical_coverage, + mean_noneiv_numerical_coverage + std_noneiv_numerical_coverage, + color=color, alpha=0.3) + + + +# create figures, together with title and axis labels +plt.figure(1) +plt.clf() +plt.title('Coverage for datasets with ground truth') +plt.xlabel('q') +plt.ylabel('coverage') +# datasets to plot and their coloring +datasets = ['linear', 'quadratic','cubic','sine'] + +colors = ['#084519', '#7D098D', '#77050C', '#09017F'] + +def compute_coverages(data, eiv, number_of_draws): + """ + Create network and dataloader iterators for `data` (short dataname) and + feed them into `get_coverage_distribution`. + :data: String, short dataname + :eiv: Boolean. If True an EiV model is used, else an non-EiV model. + :number_of_draws: Number of draws to use for prediction. Take an int for + non-EiV models and a two-element list for EiV models. + :returns: numerical_coverage, theoretical_coverage + """ + # load configuration file + if eiv: + with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as\ + conf_file: + conf_dict = json.load(conf_file) + else: + with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as\ + conf_file: + conf_dict = json.load(conf_file) + + long_dataname = conf_dict["long_dataname"] + short_dataname = conf_dict["short_dataname"] + try: + normalize = conf_dict['normalize'] + except KeyError: + # normalize by default + normalize = True + + + load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data + + # switch to gpu, if possible + try: + gpu_number = conf_dict["gpu_number"] + device = torch.device(f'cuda:{gpu_number}') + try: + torch.tensor([0.0]).to(device) + except RuntimeError: + if torch.cuda.is_available(): + print('Switched to GPU 0') + device = torch.device('cuda:0') + else: + print('No cuda available, using CPU') + device = torch.device('cpu') + except KeyError: + device = torch.device('cpu') + + + train_data, _, _,_ \ + = load_data(seed=0, return_ground_truth=True, + normalize=normalize) + + + # train_data only used for finding dimensions + input_dim = train_data[0][0].numel() + output_dim = train_data[0][1].numel() + + ## Create iterators for get_coverage_distribution + seed_list = range(conf_dict["seed_range"][0], + conf_dict["seed_range"][1]) + + # iterator for networks + def net_iterator(eiv=eiv, seed_list=seed_list): + """ + Yields EiV models (if `eiv`) or + non-EiV models (if not `eiv`) for the seeds in + `seed_list` and `data`. + """ + if eiv: + # load parameters + init_std_y = conf_dict["init_std_y_list"][0] + unscaled_reg = conf_dict["unscaled_reg"] + p = conf_dict["p"] + hidden_layers = conf_dict["hidden_layers"] + fixed_std_x = conf_dict["fixed_std_x"] + net = Networks.FNNEIV(p=p, init_std_y=init_std_y, + h=[input_dim, *hidden_layers, output_dim], + fixed_std_x=fixed_std_x).to(device) + for seed in seed_list: + # load network paramaters + saved_file = os.path.join('saved_networks', + f'eiv_{short_dataname}'\ + f'_init_std_y_{init_std_y:.3f}'\ + f'_ureg_{unscaled_reg:.1f}'\ + f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\ + f'_seed_{seed}.pkl') + train_and_store.open_stored_training(saved_file=saved_file, + net=net, device=device) + yield net + else: + # load parameters + init_std_y = conf_dict["init_std_y_list"][0] + unscaled_reg = conf_dict["unscaled_reg"] + p = conf_dict["p"] + hidden_layers = conf_dict["hidden_layers"] + net = Networks.FNNBer(p=p, init_std_y=init_std_y, + h=[input_dim, *hidden_layers, output_dim]).to(device) + for seed in seed_list: + saved_file = os.path.join('saved_networks', + f'noneiv_{short_dataname}'\ + f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ + f'_p_{p:.2f}_seed_{seed}.pkl') + # load network paramaters + train_and_store.open_stored_training(saved_file=saved_file, + net=net, device=device) + yield net + + # iterator for dataloaders + def dataloader_iterator(seed_list=seed_list, batch_size = 100): + """ + Yields dataloaders for `data`, according to the seeds in `seed_list`. + """ + for seed in seed_list: + _, _, _, true_test =\ + load_data(seed=seed, return_ground_truth=True, + normalize=normalize) + # take noisy x but unnoisy y + cut_true_test = VerticalCut(true_test, + components_to_pick=[2,1]) + test_dataloader = DataLoader(cut_true_test, + batch_size=batch_size, + shuffle=True) + yield test_dataloader + + + # Compute coverages + numerical_coverage, theoretical_coverage = get_coverage_distribution( + net_iterator=net_iterator(eiv=eiv), + dataloader_iterator=dataloader_iterator(), + device=device, + number_of_draws=number_of_draws, + q_range=q_range, + noisy_y = False) + return numerical_coverage, theoretical_coverage + +# loop through data +for data, color in tqdm(zip(datasets, colors)): + # compute coverages + eiv_coverages = compute_coverages(data=data, eiv=True, + number_of_draws=[100,5]) + noneiv_coverages = compute_coverages(data=data, eiv=False, + number_of_draws=100) + # create plots + coverage_diagonal_plot(eiv_coverages, noneiv_coverages, + color=color, against_theoretical=False, label=data) + +# add diagonal +x_diag = np.linspace(0.0, 1.0) +plt.plot(x_diag, x_diag, color='k', linestyle='dotted' ) + +# add legend +plt.legend() + +# save and show +plt.savefig('results/figures/true_coverage_vs_q.pdf') +plt.show() diff --git a/Experiments/plot_prediction.py b/Experiments/plot_prediction.py index c0e1817fc8d20a865f9f236a5da0b3e5ecb95725..a603d78742a38971ffbac057919f034ad2c4b906 100644 --- a/Experiments/plot_prediction.py +++ b/Experiments/plot_prediction.py @@ -255,6 +255,7 @@ for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list, plt.fill_between(x_values.flatten(), noneiv_pred-k * noneiv_unc, noneiv_pred + k * noneiv_unc, color=color[1], alpha=0.5) + plt.savefig(f'results/figures/prediction_{data}.pdf') else: # multidimensional handling not included yet pass diff --git a/Experiments/plot_summary.py b/Experiments/plot_summary.py new file mode 100644 index 0000000000000000000000000000000000000000..c0863c604ff618df5e45351576c493cece331f50 --- /dev/null +++ b/Experiments/plot_summary.py @@ -0,0 +1,180 @@ +""" +Plot summary quantities in bar plots, that is +- the RMSE (w.r.t. noisy data) +- the total coverage +- the learned std_y +by reading the results produced by `evaluate_metrics.py` +""" +import os +import glob +import json + +import numpy as np +import matplotlib.pyplot as plt + +k = 2 + +# load in all available result files +list_of_result_files = glob.glob(os.path.join('results','*.json')) +results = {} +for filename in list_of_result_files: + data = filename.replace(os.path.join('results','metrics_'),'').replace('.json','') + with open(filename,'r') as f: + results[data] = json.load(f) + +def save_readout(dictionary, key): + """ + Returns the value of the `dictionary` for `key`, unless + the later doesn't exist, in which case (None,None) is returned. + """ + try: + readout = dictionary[key] + if type(readout) is list: + assert len(readout) == 2 + return readout + else: + readout = float(readout) + return (readout, None) + except KeyError: + return (None,None) + + + +## RMSE plot + +metric = 'rmse' +data_list = results.keys() +colors = ['red', 'blue'] +ymax = 0.8 +# read out EiV and non-EiV results for all datasets +metric_results = [ + (save_readout(results[data]['eiv'], metric), + save_readout(results[data]['noneiv'], metric)) + for data in data_list] + +# create figure +plt.figure(1) +plt.clf() +plt.title('RMSE') + +# plot bars +for i, ([(eiv_metric_mean, eiv_metric_std), + (noneiv_metric_mean, noneiv_metric_std)],\ + data) in\ + enumerate(zip(metric_results, data_list)): + if eiv_metric_mean is not None: + assert noneiv_metric_mean is not None + if eiv_metric_std is not None: + assert noneiv_metric_std is not None + plt.plot(i+1, eiv_metric_mean, '^', color=colors[0]) + plt.bar(i+1, + height = 2*eiv_metric_std, + width = 0.1, + bottom = eiv_metric_mean - eiv_metric_std, + color=colors[0], + alpha=0.5) + plt.plot(i+1, noneiv_metric_mean, '^', color=colors[1]) + plt.bar(i+1, + height = 2 * k *noneiv_metric_std, + width = 0.1, + bottom = noneiv_metric_mean - k* noneiv_metric_std, + color=colors[1], + alpha=0.5) +plt.ylim(bottom=0, top=ymax) +ax = plt.gca() +ax.set_xticks(np.arange(1,len(data_list)+1)) +ax.set_xticklabels(data_list, rotation='vertical') +plt.savefig('results/figures/RMSE_bar_plot.pdf') + +## coverage plot + +metric = 'true_coverage_numerical' +data_list = ['linear','quadratic','cubic','sine'] +colors = ['red', 'blue'] +ymax = 1.0 +# read out EiV and non-EiV results for all datasets +metric_results = [ + (save_readout(results[data]['eiv'], metric), + save_readout(results[data]['noneiv'], metric)) + for data in data_list] + +# create figure +plt.figure(2) +plt.clf() +plt.title('coverage (ground truth)') + +# plot bars +for i, ([(eiv_metric_mean, eiv_metric_std), + (noneiv_metric_mean, noneiv_metric_std)],\ + data) in\ + enumerate(zip(metric_results, data_list)): + if eiv_metric_mean is not None: + assert noneiv_metric_mean is not None + if eiv_metric_std is not None: + assert noneiv_metric_std is not None + plt.plot(i+1, eiv_metric_mean, '^', color=colors[0]) + plt.bar(i+1, + height = 2*eiv_metric_std, + width = 0.1, + bottom = eiv_metric_mean - eiv_metric_std, + color=colors[0], + alpha=0.5) + plt.plot(i+1, noneiv_metric_mean, '^', color=colors[1]) + plt.bar(i+1, + height = 2 * k *noneiv_metric_std, + width = 0.1, + bottom = noneiv_metric_mean - k* noneiv_metric_std, + color=colors[1], + alpha=0.5) +plt.axhline(0.95,0.0,1.0,color='k', linestyle='dashed') +plt.ylim(bottom=0, top=ymax) +ax = plt.gca() +ax.set_xticks(np.arange(1,len(data_list)+1)) +ax.set_xticklabels(data_list, rotation='vertical') +plt.savefig('results/figures/true_coverage_bar_plot.pdf') + +## noisy coverage plot + +metric = 'coverage_numerical' +data_list = results.keys() +colors = ['red', 'blue'] +ymax = 1.0 +# read out EiV and non-EiV results for all datasets +metric_results = [ + (save_readout(results[data]['eiv'], metric), + save_readout(results[data]['noneiv'], metric)) + for data in data_list] + +# create figure +plt.figure(3) +plt.clf() +plt.title('coverage (noisy labels)') + +# plot bars +for i, ([(eiv_metric_mean, eiv_metric_std), + (noneiv_metric_mean, noneiv_metric_std)],\ + data) in\ + enumerate(zip(metric_results, data_list)): + if eiv_metric_mean is not None: + assert noneiv_metric_mean is not None + if eiv_metric_std is not None: + assert noneiv_metric_std is not None + plt.plot(i+1, eiv_metric_mean, '^', color=colors[0]) + plt.bar(i+1, + height = 2*eiv_metric_std, + width = 0.1, + bottom = eiv_metric_mean - eiv_metric_std, + color=colors[0], + alpha=0.5) + plt.plot(i+1, noneiv_metric_mean, '^', color=colors[1]) + plt.bar(i+1, + height = 2 * k *noneiv_metric_std, + width = 0.1, + bottom = noneiv_metric_mean - k* noneiv_metric_std, + color=colors[1], + alpha=0.5) +plt.ylim(bottom=0, top=ymax) +ax = plt.gca() +ax.set_xticks(np.arange(1,len(data_list)+1)) +ax.set_xticklabels(data_list, rotation='vertical') +plt.savefig('results/figures/noisy_coverage_bar_plot.pdf') diff --git a/Experiments/plot_coverage.py b/Experiments/plot_variety_of_coverage_plots.py similarity index 100% rename from Experiments/plot_coverage.py rename to Experiments/plot_variety_of_coverage_plots.py