diff --git a/EIVPackage/EIVGeneral/coverage_metrics.py b/EIVPackage/EIVGeneral/coverage_metrics.py index 7515497349f2e9636bd44b9c8955a9937a9e2224..4b6824d07b5e8b532f270ddb4121bd150b18486d 100644 --- a/EIVPackage/EIVGeneral/coverage_metrics.py +++ b/EIVPackage/EIVGeneral/coverage_metrics.py @@ -119,6 +119,57 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, theoretical_coverage = q return numerical_coverage, theoretical_coverage +def total_coverage(not_averaged_predictions, y, q=0.95): + """ + Returns the total coverage of (noisy) `y` by the interval + "predictions + total_unc * q-Interval", where + - "q-Interval" is the interval of measure `q` under the standard normal, + - "predictions" are the entries of the first component of the tuple + `not_averaged_predictions` averaged over their second dimension. + - total_unc is the total uncertainty computed from + `not_averaged_predictions`. + :param not_averaged_predictions: A tuple of tensors as in the output of + `FNNEIV.predict` with `take_average_of_prediction` set to `False`, i.e.: + the predictions of the neural net not averaged over the first dimension + (the repetition dimension in `FNNEIV.predict`) and + the aleatoric uncertainty with a batch dimension and a feature dimension. + :param y: A `torch.tensor` of the same shape then the second components + of `not_averaged_predictions`. If the feature dimension is missing, it is added. + :param q: A float between 0 and 1. Defaults to 0.95. + :returns: coverage + """ + out, sigmas = not_averaged_predictions + # add an output axis if necessary + if len(y.shape) <= 1: + y = y[...,None] + if len(sigmas.shape) <= 1: + sigmas = sigmas[...,None] + # squeeze last dimensions into one + y = y.view((y.shape[0], -1)) + sigmas = sigmas.view((sigmas.shape[0], -1)) + out = out.view((*out.shape[:2], -1)) + # check if dimensions are consistent + # compute epistemic uncertainty + epis_unc = torch.std(out, dim=1) + out = torch.mean(out, dim=1) + assert y.shape == sigmas.shape + assert y.shape == out.shape + assert epis_unc.shape == sigmas.shape + # compute total uncertainty + total_unc = torch.sqrt(epis_unc**2 + sigmas **2) + # fix interval based on epis_unc + out_dim = y.shape[1] + interval_length = multivariate_interval_length(dim=out_dim, q=q) \ + * total_unc + # numerical computation + errors = out - y + assert errors.shape == total_unc.shape + check_if_in_interval = logical_and_along_dimension( + torch.abs(errors) <= interval_length, dim=1) + coverage = torch.mean( + check_if_in_interval.to(torch.float32)).cpu().detach().item() + return coverage + def normalized_std(not_averaged_predictions, y): """ Returns the standard deviation of normalized residuals, averaged over the diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py index 6912c013b2fa24ed916632321303f616f0d7c767..06f1b37bc770112d0c4a71d395a650d535e8b226 100644 --- a/Experiments/evaluate_metrics.py +++ b/Experiments/evaluate_metrics.py @@ -15,7 +15,8 @@ from tqdm import tqdm from EIVArchitectures import Networks from EIVTrainingRoutines import train_and_store -from EIVGeneral.coverage_metrics import epistemic_coverage, normalized_std +from EIVGeneral.coverage_metrics import epistemic_coverage, normalized_std,\ + total_coverage from EIVData.repeated_sampling import repeated_sampling # read in data via --data option @@ -111,8 +112,11 @@ def collect_metrics(x_y_pairs, seed=0, f'_p_{p:.2f}_seed_{seed}.pkl') net = Networks.FNNBer(p=p, init_std_y=init_std_y, h=[input_dim, *hidden_layers, output_dim]).to(device) - train_and_store.open_stored_training(saved_file=saved_file, - net=net, device=device) + + # load network and extract std_y + noneiv_std_y = train_and_store.open_stored_training(saved_file=saved_file, + net=net, device=device)[3] + noneiv_metrics['std_y'] = noneiv_std_y.cpu()[-1].item() # RMSE @@ -137,6 +141,8 @@ def collect_metrics(x_y_pairs, seed=0, noneiv_metrics['coverage_numerical'], noneiv_metrics['coverage_theory'] =\ epistemic_coverage(not_averaged_predictions, y,\ normalize_errors=False) + noneiv_metrics['total_coverage'] =\ + total_coverage(not_averaged_predictions, y) noneiv_metrics['coverage_normalized'],_ =\ epistemic_coverage(not_averaged_predictions, y,\ normalize_errors=True) @@ -182,8 +188,11 @@ def collect_metrics(x_y_pairs, seed=0, net = Networks.FNNEIV(p=p, init_std_y=init_std_y, h=[input_dim, *hidden_layers, output_dim], fixed_std_x=fixed_std_x).to(device) - train_and_store.open_stored_training(saved_file=saved_file, - net=net) + + # load network and extract std_y + eiv_std_y = train_and_store.open_stored_training(saved_file=saved_file, + net=net, device=device)[3] + eiv_metrics['std_y'] = eiv_std_y.cpu()[-1].item() # RMSE training_state = net.training @@ -207,6 +216,8 @@ def collect_metrics(x_y_pairs, seed=0, eiv_metrics['bias' ]= np.mean(scaled_res) eiv_metrics['coverage_numerical'], eiv_metrics['coverage_theory'] =\ epistemic_coverage(not_averaged_predictions, y, normalize_errors=False) + eiv_metrics['total_coverage'] =\ + total_coverage(not_averaged_predictions, y) eiv_metrics['coverage_normalized'],_ =\ epistemic_coverage(not_averaged_predictions, y, normalize_errors=True) eiv_metrics['res_std' ]= normalized_std(not_averaged_predictions, y) diff --git a/Experiments/plot_summary.py b/Experiments/plot_summary.py new file mode 100644 index 0000000000000000000000000000000000000000..953ec8dc9c9d8beb9226056f5366a985403457a8 --- /dev/null +++ b/Experiments/plot_summary.py @@ -0,0 +1,13 @@ +""" +Plot summary quantities in bar plots, that is +- the RMSE (w.r.t. noisy data) +- the total coverage +- the learned std_y +by reading the results produced by `evaluate_metrics.py` +""" +import os +import glob +import json + + +## include evaluate_metrics content here and adapt