diff --git a/Experiments/configurations/eiv_msd.json b/Experiments/configurations/eiv_msd.json index a971f4bd96be65305b2572631c9d9ff092bce985..d29808616f409eb56467b162271e0362b196de76 100644 --- a/Experiments/configurations/eiv_msd.json +++ b/Experiments/configurations/eiv_msd.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 4, - "std_y_update_points": 4, + "std_y_update_points": [1,4], "eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/eiv_power.json b/Experiments/configurations/eiv_power.json index c7517c3e09f449f06472e1eb85687cbe678d1d94..917f1f002b4b79382d002183cd577e5ac653d3cb 100644 --- a/Experiments/configurations/eiv_power.json +++ b/Experiments/configurations/eiv_power.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 10, - "std_y_update_points": 15, + "std_y_update_points": [1,15], "eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/eiv_protein.json b/Experiments/configurations/eiv_protein.json index 04ffb85949dff72f1af20f888b802aab8e13bf94..330713a1118dcfc2e2ef3b8159574fbbdb696932 100644 --- a/Experiments/configurations/eiv_protein.json +++ b/Experiments/configurations/eiv_protein.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 10, - "std_y_update_points": 10, + "std_y_update_points": [1,14], "eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/eiv_wine.json b/Experiments/configurations/eiv_wine.json index 0246c033c5c91e2d0d02988c1830aea812112aee..fef0c488758678ca809102ce53f30e226a3b77c1 100644 --- a/Experiments/configurations/eiv_wine.json +++ b/Experiments/configurations/eiv_wine.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 30, - "std_y_update_points": 50, + "std_y_update_points": [1,40], "eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/noneiv_kin8nm.json b/Experiments/configurations/noneiv_kin8nm.json index fdc5ec591a2fea76f3c0b1309edb06645cb240ce..694d7f0d065021462c841e636957caf0d46622ad 100644 --- a/Experiments/configurations/noneiv_kin8nm.json +++ b/Experiments/configurations/noneiv_kin8nm.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 20, - "std_y_update_points": [1,16], + "std_y_update_points": [1,14], "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/noneiv_msd.json b/Experiments/configurations/noneiv_msd.json index a0e2f73beb63e165c7da5266b774f227e90310c1..126a571eb955cf2dd320d0b40bd4c749446a3253 100644 --- a/Experiments/configurations/noneiv_msd.json +++ b/Experiments/configurations/noneiv_msd.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 4, - "std_y_update_points": 4, + "std_y_update_points": [1,4], "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/noneiv_power.json b/Experiments/configurations/noneiv_power.json index 48aac9dcf4cc208095142760d7bf286c2589b22b..e14e82d00f0048d0fa42a47baddf0ff520dbc27e 100644 --- a/Experiments/configurations/noneiv_power.json +++ b/Experiments/configurations/noneiv_power.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 10, - "std_y_update_points": 15, + "std_y_update_points": [1,15], "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/noneiv_protein.json b/Experiments/configurations/noneiv_protein.json index 454deaef823c1a25c03588c4f430b5d0f67e5e93..36f5fafd53fc56e16387b86a038507e848564c97 100644 --- a/Experiments/configurations/noneiv_protein.json +++ b/Experiments/configurations/noneiv_protein.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 10, - "std_y_update_points": 10, + "std_y_update_points": [1,14], "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/configurations/noneiv_wine.json b/Experiments/configurations/noneiv_wine.json index ddd2199fae58e2dd62409d554f51983b5bd1ef02..1c014763748a07c5ecb846a2391eff968fb09a78 100644 --- a/Experiments/configurations/noneiv_wine.json +++ b/Experiments/configurations/noneiv_wine.json @@ -9,7 +9,7 @@ "report_point": 5, "p": 0.2, "lr_update": 30, - "std_y_update_points": 50, + "std_y_update_points": [1,40], "noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_batches": 10, "init_std_y_list": [0.5], diff --git a/Experiments/create_tabular.py b/Experiments/create_tabular.py new file mode 100644 index 0000000000000000000000000000000000000000..3d128431359c29961892df2f49e3a2f11720f450 --- /dev/null +++ b/Experiments/create_tabular.py @@ -0,0 +1,35 @@ +import os +import glob +import json + +metrics_to_display = ['rmse','logdens','bias','coverage_normalized'] + + +list_of_result_files = glob.glob(os.path.join('results','*.json')) +results = {} +for filename in list_of_result_files: + data = filename.replace(os.path.join('results','metrics_'),'').replace('.json','') + with open(filename,'r') as f: + results[data] = json.load(f) + +## header +header_string = 'DATA' +for metric in metrics_to_display: + header_string += f' {metric}' +print(header_string) +## results +for data in results.keys(): + noneiv_results = [results[data]['noneiv'][metric] + for metric in metrics_to_display] + noneiv_results_string = f'{data} - nonEiV:' + for [metric_mean, metric_std] in noneiv_results: + noneiv_results_string += f' {metric_mean:.3f} ({metric_std:.3f})' + print(noneiv_results_string) + eiv_results = [results[data]['eiv'][metric] + for metric in metrics_to_display] + eiv_results_string = f'{data} - EiV:' + for [metric_mean, metric_std] in eiv_results: + eiv_results_string += f' {metric_mean:.3f} ({metric_std:.3f})' + print(eiv_results_string) + + diff --git a/Experiments/evaluate_tabular.py b/Experiments/evaluate_metrics.py similarity index 91% rename from Experiments/evaluate_tabular.py rename to Experiments/evaluate_metrics.py index 1cb0fec9875b2e0b7fad3fd42acb2bb4f9057648..5dbc16fcc5277e95db69d61b5decaee1a17fc0df 100644 --- a/Experiments/evaluate_tabular.py +++ b/Experiments/evaluate_metrics.py @@ -231,11 +231,23 @@ for seed in tqdm(seed_list): noneiv_metrics_collection[key].append(noneiv_metrics[key]) eiv_metrics_collection[key].append(eiv_metrics[key]) -print('Non-EiV\n-----') +results_dict = {} +print('Non-EiV:\n-----') +results_dict['noneiv'] = {} for key in collection_keys: - print(f'{key} {np.mean(noneiv_metrics_collection[key]):.5f}'\ - f'({np.std(noneiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list)):.5f})') -print('EiV\n-----') + metric_mean = float(np.mean(noneiv_metrics_collection[key])) + metric_std = float(np.std(noneiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list))) + results_dict['noneiv'][key] = (metric_mean, metric_std) + print(f'{key}: {metric_mean:.5f} ({metric_std:.5f})') +print('\n') +print('EiV:\n-----') +results_dict['eiv'] = {} for key in collection_keys: - print(f'{key} {np.mean(eiv_metrics_collection[key]):.5f}'\ - f'({np.std(eiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list)):.5f})') + metric_mean = float(np.mean(eiv_metrics_collection[key])) + metric_std = float(np.std(eiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list))) + print(f'{key}: {metric_mean:.5f} ({metric_std:.5f})') + results_dict['eiv'][key] = (metric_mean, metric_std) + +# write results to a JSON file in the results folder +with open(os.path.join('results',f'metrics_{short_dataname}.json'), 'w') as f: + json.dump(results_dict, f)