Added total_coverage and std_y to evaluate_metrics

aca7bfdd · Jörg Martin · 9efa18aa · aca7bfdd · aca7bfdd · aca7bfdd
Commit aca7bfdd authored 3 years ago by Jörg Martin
--- a/EIVPackage/EIVGeneral/coverage_metrics.py
+++ b/EIVPackage/EIVGeneral/coverage_metrics.py
@@ -119,6 +119,57 @@ def epistemic_coverage(not_averaged_predictions,  y, q=0.95,
        theoretical_coverage = q
    return numerical_coverage, theoretical_coverage

+def total_coverage(not_averaged_predictions,  y, q=0.95):
+    """
+    Returns the total coverage of (noisy) `y` by the interval 
+    "predictions + total_unc * q-Interval", where 
+    - "q-Interval" is the interval of measure `q` under the standard normal, 
+    - "predictions" are the entries of the first component of the tuple
+      `not_averaged_predictions` averaged over their second dimension.
+    -  total_unc is the total uncertainty computed from
+       `not_averaged_predictions`.
+    :param not_averaged_predictions: A tuple of tensors as in the output of
+    `FNNEIV.predict` with `take_average_of_prediction` set to `False`, i.e.:
+    the predictions of the neural net not averaged over the first dimension 
+    (the repetition dimension in `FNNEIV.predict`) and
+    the aleatoric uncertainty with a batch dimension and a feature dimension. 
+    :param y: A `torch.tensor` of the same shape then the second components
+    of `not_averaged_predictions`. If the feature dimension is missing, it is added.
+    :param q: A float between 0 and 1. Defaults to 0.95.
+    :returns: coverage
+    """
+    out, sigmas = not_averaged_predictions
+    # add an output axis if necessary
+    if len(y.shape) <= 1:
+        y = y[...,None]
+    if len(sigmas.shape) <= 1:
+        sigmas = sigmas[...,None]
+    # squeeze last dimensions into one
+    y = y.view((y.shape[0], -1))
+    sigmas = sigmas.view((sigmas.shape[0], -1))
+    out = out.view((*out.shape[:2], -1))
+    # check if dimensions are consistent
+    # compute epistemic uncertainty
+    epis_unc = torch.std(out, dim=1)
+    out = torch.mean(out, dim=1)
+    assert y.shape == sigmas.shape
+    assert y.shape == out.shape
+    assert epis_unc.shape == sigmas.shape
+    # compute total uncertainty
+    total_unc = torch.sqrt(epis_unc**2 + sigmas **2)
+    # fix interval based on epis_unc
+    out_dim = y.shape[1]
+    interval_length = multivariate_interval_length(dim=out_dim, q=q) \
+                * total_unc
+    # numerical computation
+    errors = out - y
+    assert errors.shape == total_unc.shape
+    check_if_in_interval = logical_and_along_dimension(
+            torch.abs(errors) <= interval_length, dim=1)
+    coverage = torch.mean(
+            check_if_in_interval.to(torch.float32)).cpu().detach().item()
+    return coverage
+
 def normalized_std(not_averaged_predictions, y):
    """
    Returns the standard deviation of normalized residuals, averaged over the

--- a/Experiments/evaluate_metrics.py
+++ b/Experiments/evaluate_metrics.py
@@ -15,7 +15,8 @@ from tqdm import tqdm

 from EIVArchitectures import Networks
 from EIVTrainingRoutines import train_and_store
-from EIVGeneral.coverage_metrics import epistemic_coverage, normalized_std
+from EIVGeneral.coverage_metrics import epistemic_coverage, normalized_std,\
+        total_coverage
 from EIVData.repeated_sampling import repeated_sampling

 # read in data via --data option
@@ -111,8 +112,11 @@ def collect_metrics(x_y_pairs, seed=0,
                        f'_p_{p:.2f}_seed_{seed}.pkl')
    net = Networks.FNNBer(p=p, init_std_y=init_std_y,
            h=[input_dim, *hidden_layers, output_dim]).to(device)
-    train_and_store.open_stored_training(saved_file=saved_file,
-            net=net, device=device)
+
+    # load network and extract std_y
+    noneiv_std_y = train_and_store.open_stored_training(saved_file=saved_file,
+            net=net, device=device)[3]
+    noneiv_metrics['std_y'] = noneiv_std_y.cpu()[-1].item()


    # RMSE
@@ -137,6 +141,8 @@ def collect_metrics(x_y_pairs, seed=0,
    noneiv_metrics['coverage_numerical'], noneiv_metrics['coverage_theory'] =\
            epistemic_coverage(not_averaged_predictions, y,\
            normalize_errors=False)
+    noneiv_metrics['total_coverage'] =\
+            total_coverage(not_averaged_predictions, y)
    noneiv_metrics['coverage_normalized'],_ =\
            epistemic_coverage(not_averaged_predictions, y,\
            normalize_errors=True)
@@ -182,8 +188,11 @@ def collect_metrics(x_y_pairs, seed=0,
    net = Networks.FNNEIV(p=p, init_std_y=init_std_y,
            h=[input_dim, *hidden_layers, output_dim],
            fixed_std_x=fixed_std_x).to(device)
-    train_and_store.open_stored_training(saved_file=saved_file,
-            net=net)
+
+    # load network and extract std_y
+    eiv_std_y = train_and_store.open_stored_training(saved_file=saved_file,
+            net=net, device=device)[3]
+    eiv_metrics['std_y'] = eiv_std_y.cpu()[-1].item()

    # RMSE
    training_state = net.training
@@ -207,6 +216,8 @@ def collect_metrics(x_y_pairs, seed=0,
    eiv_metrics['bias' ]= np.mean(scaled_res)
    eiv_metrics['coverage_numerical'], eiv_metrics['coverage_theory'] =\
            epistemic_coverage(not_averaged_predictions, y, normalize_errors=False)
+    eiv_metrics['total_coverage'] =\
+            total_coverage(not_averaged_predictions, y)
    eiv_metrics['coverage_normalized'],_ =\
            epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
    eiv_metrics['res_std' ]= normalized_std(not_averaged_predictions, y)

--- a/Experiments/plot_summary.py
+++ b/Experiments/plot_summary.py
+"""
+Plot summary quantities in bar plots, that is
+- the RMSE (w.r.t. noisy data)
+- the total coverage
+- the learned std_y
+by reading the results produced by `evaluate_metrics.py`
+"""
+import os
+import glob
+import json
+
+
+## include evaluate_metrics content here and adapt