From cb254c523bbdcabd6177f6828e59aefd43308d4f Mon Sep 17 00:00:00 2001 From: Joerg Martin <joerg.martin@ptb.de> Date: Thu, 16 Dec 2021 09:21:15 +0100 Subject: [PATCH] Changed evaluation of metrics Based now on JSON files in results folder. evaluate_tabular.py has been renamed into evaluate_metrics. JSON files have also been updated. Need to check whether correct now for all datasets. --- Experiments/configurations/eiv_msd.json | 2 +- Experiments/configurations/eiv_power.json | 2 +- Experiments/configurations/eiv_protein.json | 2 +- Experiments/configurations/eiv_wine.json | 2 +- Experiments/configurations/noneiv_kin8nm.json | 2 +- Experiments/configurations/noneiv_msd.json | 2 +- Experiments/configurations/noneiv_power.json | 2 +- .../configurations/noneiv_protein.json | 2 +- Experiments/configurations/noneiv_wine.json | 2 +- Experiments/create_tabular.py | 35 +++++++++++++++++++ ...valuate_tabular.py => evaluate_metrics.py} | 24 +++++++++---- 11 files changed, 62 insertions(+), 15 deletions(-) create mode 100644 Experiments/create_tabular.py rename Experiments/{evaluate_tabular.py => evaluate_metrics.py} (91%) diff --git a/Experiments/configurations/eiv_msd.json b/Experiments/configurations/eiv_msd.json index a971f4b..d298086 100644 --- a/Experiments/configurations/eiv_msd.json +++ b/Experiments/configurations/eiv_msd.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 4, - "std_y_update_points": 4, + "std_y_update_points": [1,4], "eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/eiv_power.json b/Experiments/configurations/eiv_power.json index c7517c3..917f1f0 100644 --- a/Experiments/configurations/eiv_power.json +++ b/Experiments/configurations/eiv_power.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 10, - "std_y_update_points": 15, + "std_y_update_points": [1,15], "eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/eiv_protein.json b/Experiments/configurations/eiv_protein.json index 04ffb85..330713a 100644 --- a/Experiments/configurations/eiv_protein.json +++ b/Experiments/configurations/eiv_protein.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 10, - "std_y_update_points": 10, + "std_y_update_points": [1,14], "eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/eiv_wine.json b/Experiments/configurations/eiv_wine.json index 0246c03..fef0c48 100644 --- a/Experiments/configurations/eiv_wine.json +++ b/Experiments/configurations/eiv_wine.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 30, - "std_y_update_points": 50, + "std_y_update_points": [1,40], "eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/noneiv_kin8nm.json b/Experiments/configurations/noneiv_kin8nm.json index fdc5ec5..694d7f0 100644 --- a/Experiments/configurations/noneiv_kin8nm.json +++ b/Experiments/configurations/noneiv_kin8nm.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 20, - "std_y_update_points": [1,16], + "std_y_update_points": [1,14], "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/noneiv_msd.json b/Experiments/configurations/noneiv_msd.json index a0e2f73..126a571 100644 --- a/Experiments/configurations/noneiv_msd.json +++ b/Experiments/configurations/noneiv_msd.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 4, - "std_y_update_points": 4, + "std_y_update_points": [1,4], "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/noneiv_power.json b/Experiments/configurations/noneiv_power.json index 48aac9d..e14e82d 100644 --- a/Experiments/configurations/noneiv_power.json +++ b/Experiments/configurations/noneiv_power.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 10, - "std_y_update_points": 15, + "std_y_update_points": [1,15], "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/noneiv_protein.json b/Experiments/configurations/noneiv_protein.json index 454deae..36f5faf 100644 --- a/Experiments/configurations/noneiv_protein.json +++ b/Experiments/configurations/noneiv_protein.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 10, - "std_y_update_points": 10, + "std_y_update_points": [1,14], "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/noneiv_wine.json b/Experiments/configurations/noneiv_wine.json index ddd2199..1c01476 100644 --- a/Experiments/configurations/noneiv_wine.json +++ b/Experiments/configurations/noneiv_wine.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 30, - "std_y_update_points": 50, + "std_y_update_points": [1,40], "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/create_tabular.py b/Experiments/create_tabular.py new file mode 100644 index 0000000..3d12843 --- /dev/null +++ b/Experiments/create_tabular.py @@ -0,0 +1,35 @@ +import os +import glob +import json + +metrics_to_display = ['rmse','logdens','bias','coverage_normalized'] + + +list_of_result_files = glob.glob(os.path.join('results','*.json')) +results = {} +for filename in list_of_result_files: + data = filename.replace(os.path.join('results','metrics_'),'').replace('.json','') + with open(filename,'r') as f: + results[data] = json.load(f) + +## header +header_string = 'DATA' +for metric in metrics_to_display: + header_string += f' {metric}' +print(header_string) +## results +for data in results.keys(): + noneiv_results = [results[data]['noneiv'][metric] + for metric in metrics_to_display] + noneiv_results_string = f'{data} - nonEiV:' + for [metric_mean, metric_std] in noneiv_results: + noneiv_results_string += f' {metric_mean:.3f} ({metric_std:.3f})' + print(noneiv_results_string) + eiv_results = [results[data]['eiv'][metric] + for metric in metrics_to_display] + eiv_results_string = f'{data} - EiV:' + for [metric_mean, metric_std] in eiv_results: + eiv_results_string += f' {metric_mean:.3f} ({metric_std:.3f})' + print(eiv_results_string) + + diff --git a/Experiments/evaluate_tabular.py b/Experiments/evaluate_metrics.py similarity index 91% rename from Experiments/evaluate_tabular.py rename to Experiments/evaluate_metrics.py index 1cb0fec..5dbc16f 100644 --- a/Experiments/evaluate_tabular.py +++ b/Experiments/evaluate_metrics.py @@ -231,11 +231,23 @@ for seed in tqdm(seed_list): noneiv_metrics_collection[key].append(noneiv_metrics[key]) eiv_metrics_collection[key].append(eiv_metrics[key]) -print('Non-EiV\n-----') +results_dict = {} +print('Non-EiV:\n-----') +results_dict['noneiv'] = {} for key in collection_keys: - print(f'{key} {np.mean(noneiv_metrics_collection[key]):.5f}'\ - f'({np.std(noneiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list)):.5f})') -print('EiV\n-----') + metric_mean = float(np.mean(noneiv_metrics_collection[key])) + metric_std = float(np.std(noneiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list))) + results_dict['noneiv'][key] = (metric_mean, metric_std) + print(f'{key}: {metric_mean:.5f} ({metric_std:.5f})') +print('\n') +print('EiV:\n-----') +results_dict['eiv'] = {} for key in collection_keys: - print(f'{key} {np.mean(eiv_metrics_collection[key]):.5f}'\ - f'({np.std(eiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list)):.5f})') + metric_mean = float(np.mean(eiv_metrics_collection[key])) + metric_std = float(np.std(eiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list))) + print(f'{key}: {metric_mean:.5f} ({metric_std:.5f})') + results_dict['eiv'][key] = (metric_mean, metric_std) + +# write results to a JSON file in the results folder +with open(os.path.join('results',f'metrics_{short_dataname}.json'), 'w') as f: + json.dump(results_dict, f) -- GitLab