Skip to content
Snippets Groups Projects
Commit cb254c52 authored by Jörg Martin's avatar Jörg Martin
Browse files

Changed evaluation of metrics

Based now on JSON files in results folder. evaluate_tabular.py has been
renamed into evaluate_metrics. JSON files have also been updated. Need
to check whether correct now for all datasets.
parent a9de00e5
No related branches found
No related tags found
No related merge requests found
......@@ -9,7 +9,7 @@
"report_point": 5,
"p": 0.2,
"lr_update": 4,
"std_y_update_points": 4,
"std_y_update_points": [1,4],
"eiv_prediction_number_of_draws": [100,5],
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
......
......@@ -9,7 +9,7 @@
"report_point": 5,
"p": 0.2,
"lr_update": 10,
"std_y_update_points": 15,
"std_y_update_points": [1,15],
"eiv_prediction_number_of_draws": [100,5],
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
......
......@@ -9,7 +9,7 @@
"report_point": 5,
"p": 0.2,
"lr_update": 10,
"std_y_update_points": 10,
"std_y_update_points": [1,14],
"eiv_prediction_number_of_draws": [100,5],
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
......
......@@ -9,7 +9,7 @@
"report_point": 5,
"p": 0.2,
"lr_update": 30,
"std_y_update_points": 50,
"std_y_update_points": [1,40],
"eiv_prediction_number_of_draws": [100,5],
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
......
......@@ -9,7 +9,7 @@
"report_point": 5,
"p": 0.2,
"lr_update": 20,
"std_y_update_points": [1,16],
"std_y_update_points": [1,14],
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
......
......@@ -9,7 +9,7 @@
"report_point": 5,
"p": 0.2,
"lr_update": 4,
"std_y_update_points": 4,
"std_y_update_points": [1,4],
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
......
......@@ -9,7 +9,7 @@
"report_point": 5,
"p": 0.2,
"lr_update": 10,
"std_y_update_points": 15,
"std_y_update_points": [1,15],
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
......
......@@ -9,7 +9,7 @@
"report_point": 5,
"p": 0.2,
"lr_update": 10,
"std_y_update_points": 10,
"std_y_update_points": [1,14],
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
......
......@@ -9,7 +9,7 @@
"report_point": 5,
"p": 0.2,
"lr_update": 30,
"std_y_update_points": 50,
"std_y_update_points": [1,40],
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
......
import os
import glob
import json
metrics_to_display = ['rmse','logdens','bias','coverage_normalized']
list_of_result_files = glob.glob(os.path.join('results','*.json'))
results = {}
for filename in list_of_result_files:
data = filename.replace(os.path.join('results','metrics_'),'').replace('.json','')
with open(filename,'r') as f:
results[data] = json.load(f)
## header
header_string = 'DATA'
for metric in metrics_to_display:
header_string += f' {metric}'
print(header_string)
## results
for data in results.keys():
noneiv_results = [results[data]['noneiv'][metric]
for metric in metrics_to_display]
noneiv_results_string = f'{data} - nonEiV:'
for [metric_mean, metric_std] in noneiv_results:
noneiv_results_string += f' {metric_mean:.3f} ({metric_std:.3f})'
print(noneiv_results_string)
eiv_results = [results[data]['eiv'][metric]
for metric in metrics_to_display]
eiv_results_string = f'{data} - EiV:'
for [metric_mean, metric_std] in eiv_results:
eiv_results_string += f' {metric_mean:.3f} ({metric_std:.3f})'
print(eiv_results_string)
......@@ -231,11 +231,23 @@ for seed in tqdm(seed_list):
noneiv_metrics_collection[key].append(noneiv_metrics[key])
eiv_metrics_collection[key].append(eiv_metrics[key])
print('Non-EiV\n-----')
results_dict = {}
print('Non-EiV:\n-----')
results_dict['noneiv'] = {}
for key in collection_keys:
print(f'{key} {np.mean(noneiv_metrics_collection[key]):.5f}'\
f'({np.std(noneiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list)):.5f})')
print('EiV\n-----')
metric_mean = float(np.mean(noneiv_metrics_collection[key]))
metric_std = float(np.std(noneiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list)))
results_dict['noneiv'][key] = (metric_mean, metric_std)
print(f'{key}: {metric_mean:.5f} ({metric_std:.5f})')
print('\n')
print('EiV:\n-----')
results_dict['eiv'] = {}
for key in collection_keys:
print(f'{key} {np.mean(eiv_metrics_collection[key]):.5f}'\
f'({np.std(eiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list)):.5f})')
metric_mean = float(np.mean(eiv_metrics_collection[key]))
metric_std = float(np.std(eiv_metrics_collection[key])/np.sqrt(num_test_epochs*len(seed_list)))
print(f'{key}: {metric_mean:.5f} ({metric_std:.5f})')
results_dict['eiv'][key] = (metric_mean, metric_std)
# write results to a JSON file in the results folder
with open(os.path.join('results',f'metrics_{short_dataname}.json'), 'w') as f:
json.dump(results_dict, f)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment