Skip to content
Snippets Groups Projects
Commit cb254c52 authored by Jörg Martin's avatar Jörg Martin
Browse files

Changed evaluation of metrics

Based now on JSON files in results folder. evaluate_tabular.py has been
renamed into evaluate_metrics. JSON files have also been updated. Need
to check whether correct now for all datasets.
parent a9de00e5
Branches
Tags
No related merge requests found
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
"report_point": 5, "report_point": 5,
"p": 0.2, "p": 0.2,
"lr_update": 4, "lr_update": 4,
"std_y_update_points": 4, "std_y_update_points": [1,4],
"eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_draws": [100,5],
"eiv_prediction_number_of_batches": 10, "eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5], "init_std_y_list": [0.5],
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
"report_point": 5, "report_point": 5,
"p": 0.2, "p": 0.2,
"lr_update": 10, "lr_update": 10,
"std_y_update_points": 15, "std_y_update_points": [1,15],
"eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_draws": [100,5],
"eiv_prediction_number_of_batches": 10, "eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5], "init_std_y_list": [0.5],
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
"report_point": 5, "report_point": 5,
"p": 0.2, "p": 0.2,
"lr_update": 10, "lr_update": 10,
"std_y_update_points": 10, "std_y_update_points": [1,14],
"eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_draws": [100,5],
"eiv_prediction_number_of_batches": 10, "eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5], "init_std_y_list": [0.5],
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
"report_point": 5, "report_point": 5,
"p": 0.2, "p": 0.2,
"lr_update": 30, "lr_update": 30,
"std_y_update_points": 50, "std_y_update_points": [1,40],
"eiv_prediction_number_of_draws": [100,5], "eiv_prediction_number_of_draws": [100,5],
"eiv_prediction_number_of_batches": 10, "eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5], "init_std_y_list": [0.5],
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
"report_point": 5, "report_point": 5,
"p": 0.2, "p": 0.2,
"lr_update": 20, "lr_update": 20,
"std_y_update_points": [1,16], "std_y_update_points": [1,14],
"noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10, "noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5], "init_std_y_list": [0.5],
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
"report_point": 5, "report_point": 5,
"p": 0.2, "p": 0.2,
"lr_update": 4, "lr_update": 4,
"std_y_update_points": 4, "std_y_update_points": [1,4],
"noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10, "noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5], "init_std_y_list": [0.5],
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
"report_point": 5, "report_point": 5,
"p": 0.2, "p": 0.2,
"lr_update": 10, "lr_update": 10,
"std_y_update_points": 15, "std_y_update_points": [1,15],
"noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10, "noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5], "init_std_y_list": [0.5],
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
"report_point": 5, "report_point": 5,
"p": 0.2, "p": 0.2,
"lr_update": 10, "lr_update": 10,
"std_y_update_points": 10, "std_y_update_points": [1,14],
"noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10, "noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5], "init_std_y_list": [0.5],
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
"report_point": 5, "report_point": 5,
"p": 0.2, "p": 0.2,
"lr_update": 30, "lr_update": 30,
"std_y_update_points": 50, "std_y_update_points": [1,40],
"noneiv_prediction_number_of_draws": 100, "noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10, "noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5], "init_std_y_list": [0.5],
......
import os
import glob
import json
metrics_to_display = ['rmse','logdens','bias','coverage_normalized']
list_of_result_files = glob.glob(os.path.join('results','*.json'))
results = {}
for filename in list_of_result_files:
data = filename.replace(os.path.join('results','metrics_'),'').replace('.json','')
with open(filename,'r') as f:
results[data] = json.load(f)
## header
header_string = 'DATA'
for metric in metrics_to_display:
header_string += f' {metric}'
print(header_string)
## results
for data in results.keys():
noneiv_results = [results[data]['noneiv'][metric]
for metric in metrics_to_display]
noneiv_results_string = f'{data} - nonEiV:'
for [metric_mean, metric_std] in noneiv_results:
noneiv_results_string += f' {metric_mean:.3f} ({metric_std:.3f})'
print(noneiv_results_string)
eiv_results = [results[data]['eiv'][metric]
for metric in metrics_to_display]
eiv_results_string = f'{data} - EiV:'
for [metric_mean, metric_std] in eiv_results:
eiv_results_string += f' {metric_mean:.3f} ({metric_std:.3f})'
print(eiv_results_string)
...@@ -231,11 +231,23 @@ for seed in tqdm(seed_list): ...@@ -231,11 +231,23 @@ for seed in tqdm(seed_list):
noneiv_metrics_collection[key].append(noneiv_metrics[key]) noneiv_metrics_collection[key].append(noneiv_metrics[key])
eiv_metrics_collection[key].append(eiv_metrics[key]) eiv_metrics_collection[key].append(eiv_metrics[key])
print('Non-EiV\n-----') results_dict = {}
print('Non-EiV:\n-----')
results_dict['noneiv'] = {}
for key in collection_keys: for key in collection_keys:
print(f'{key} {np.mean(noneiv_metrics_collection[key]):.5f}'\ metric_mean = float(np.mean(noneiv_metrics_collection[key]))
f'({np.std(noneiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list)):.5f})') metric_std = float(np.std(noneiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list)))
print('EiV\n-----') results_dict['noneiv'][key] = (metric_mean, metric_std)
print(f'{key}: {metric_mean:.5f} ({metric_std:.5f})')
print('\n')
print('EiV:\n-----')
results_dict['eiv'] = {}
for key in collection_keys: for key in collection_keys:
print(f'{key} {np.mean(eiv_metrics_collection[key]):.5f}'\ metric_mean = float(np.mean(eiv_metrics_collection[key]))
f'({np.std(eiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list)):.5f})') metric_std = float(np.std(eiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list)))
print(f'{key}: {metric_mean:.5f} ({metric_std:.5f})')
results_dict['eiv'][key] = (metric_mean, metric_std)
# write results to a JSON file in the results folder
with open(os.path.join('results',f'metrics_{short_dataname}.json'), 'w') as f:
json.dump(results_dict, f)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment