diff --git a/EIVPackage/EIVGeneral/coverage_metrics.py b/EIVPackage/EIVGeneral/coverage_metrics.py index 9fd3ff5dd92a580c976e6cbe658f0f21655b39b1..7515497349f2e9636bd44b9c8955a9937a9e2224 100644 --- a/EIVPackage/EIVGeneral/coverage_metrics.py +++ b/EIVPackage/EIVGeneral/coverage_metrics.py @@ -34,15 +34,13 @@ def multivariate_interval_length(dim, q=0.95): def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=False, - average_predictions=True, noisy_y=True): """ Returns the average coverage of `y` by the interval "predictions + prefactor * q-Interval", where - "q-Interval" is the interval of measure `q` under the standard normal, - where - "predictions" are the entries of the first component of the tuple - `not_averaged_predictions`, averaged if `average_predictions` is True. + `not_averaged_predictions` averaged over their second dimension. - "prefactor either equals the epistemic uncertainty, computed from the first component of `not_averaged_predictions`,if `normalize_errors` is set to False, or 1 if it is true. @@ -63,34 +61,27 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, :param normalize_errors: If True, the deviations between predictions and `y` are normalized by the total uncertainty, computed from the aleatoric and epistemic uncertainty and the coverage w.r.t. q-interval is computed. - :param average_predictions: If True, average the predictions before - computing the coverage. Defaults to False. :param noisy_y: Boolean. If True (the default), `y` is treated as noisy and the total uncertainty is considered. If False, `y` is treated as the unnoisy ground truth. :returns: numerical_coverage, theoretical_coverage """ out, sigmas = not_averaged_predictions - # add repetition axis - y = y[:,None,...] - sigmas = sigmas[:,None,...] # add an output axis if necessary - if len(y.shape) <= 2: + if len(y.shape) <= 1: y = y[...,None] - if len(sigmas.shape) <= 2: + if len(sigmas.shape) <= 1: sigmas = sigmas[...,None] # squeeze last dimensions into one - y = y.view((*y.shape[:2], -1)) - sigmas = sigmas.view((*sigmas.shape[:2], -1)) + y = y.view((y.shape[0], -1)) + sigmas = sigmas.view((sigmas.shape[0], -1)) out = out.view((*out.shape[:2], -1)) # check if dimensions are consistent - assert y.shape == sigmas.shape - assert y.shape[0] == out.shape[0] - assert y.shape[2] == out.shape[2] # compute epistemic uncertainty - epis_unc = torch.std(out, dim=1, keepdim=True) - if average_predictions: - out = torch.mean(out, dim=1, keepdim=True) + epis_unc = torch.std(out, dim=1) + out = torch.mean(out, dim=1) + assert y.shape == sigmas.shape + assert y.shape == out.shape assert epis_unc.shape == sigmas.shape # compute total uncertainty if noisy_y: @@ -99,7 +90,7 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, # for unnoisy y, the aleatoric uncertainty is treated as 0 total_unc = epis_unc # fix interval based on epis_unc - out_dim = y.shape[2] + out_dim = y.shape[1] if not normalize_errors: interval_length = multivariate_interval_length(dim=out_dim, q=q) \ * epis_unc @@ -108,11 +99,10 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, # numerical computation errors = out - y if normalize_errors: - assert errors.shape[0] == total_unc.shape[0] - assert errors.shape[2] == total_unc.shape[2] + assert errors.shape == total_unc.shape errors /= total_unc check_if_in_interval = logical_and_along_dimension( - torch.abs(errors) <= interval_length, dim=2) + torch.abs(errors) <= interval_length, dim=1) numerical_coverage = torch.mean( check_if_in_interval.to(torch.float32) ).cpu().detach().item() @@ -121,11 +111,10 @@ def epistemic_coverage(not_averaged_predictions, y, q=0.95, cdf_args = (interval_length/total_unc).detach().cpu().numpy() cdf_values = scipy.stats.norm.cdf(cdf_args) prob_values = 2*cdf_values -1 - assert len(cdf_values.shape) == 3 - assert cdf_values.shape[1] == 1 + assert len(cdf_values.shape) == 2 # take product over feature dimension # and average over batch dimension - theoretical_coverage = np.mean(np.prod(prob_values, axis=2)).item() + theoretical_coverage = np.mean(np.prod(prob_values, axis=1)).item() else: theoretical_coverage = q return numerical_coverage, theoretical_coverage diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py index b415792857ac162211827b13f5e7a567a08f8fc6..e72dc1180fd7bcd041a58ac5ff895524e7198457 100644 --- a/Experiments/evaluate_metrics.py +++ b/Experiments/evaluate_metrics.py @@ -19,7 +19,7 @@ from EIVGeneral.coverage_metrics import epistemic_coverage, normalized_std # read in data via --data option parser = argparse.ArgumentParser() -parser.add_argument("--data", help="Loads data", default='quadratic') +parser.add_argument("--data", help="Loads data", default='linear') parser.add_argument("--no-autoindent", help="", action="store_true") # to avoid conflics in IPython args = parser.parse_args() @@ -127,7 +127,7 @@ def collect_metrics(x_y_pairs, seed=0, noneiv_metrics['rmse'] = np.sqrt(np.mean(scaled_res**2)) noneiv_metrics['bias'] = np.mean(scaled_res) noneiv_metrics['coverage_numerical'], noneiv_metrics['coverage_theory'] =\ - epistemic_coverage(not_averaged_predictions, y, normalize_errors=False, average_predictions=True) + epistemic_coverage(not_averaged_predictions, y, normalize_errors=False) noneiv_metrics['coverage_normalized'],_ =\ epistemic_coverage(not_averaged_predictions, y, normalize_errors=True) noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y) @@ -137,10 +137,9 @@ def collect_metrics(x_y_pairs, seed=0, noneiv_metrics['true_coverage_numerical'],\ noneiv_metrics['true_coverage_theory'] =\ epistemic_coverage(not_averaged_predictions, true_y, - average_predictions=True, normalize_errors=False, noisy_y=False) - true_res = true_y - noneiv_mean - noneiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2)) + true_res = true_y - noneiv_mean + noneiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2)) # NLL @@ -197,7 +196,7 @@ def collect_metrics(x_y_pairs, seed=0, eiv_metrics['rmse' ]= np.sqrt(np.mean(scaled_res**2)) eiv_metrics['bias' ]= np.mean(scaled_res) eiv_metrics['coverage_numerical'], eiv_metrics['coverage_theory'] =\ - epistemic_coverage(not_averaged_predictions, y, normalize_errors=False, average_predictions=True) + epistemic_coverage(not_averaged_predictions, y, normalize_errors=False) eiv_metrics['coverage_normalized'],_ =\ epistemic_coverage(not_averaged_predictions, y, normalize_errors=True) eiv_metrics['res_std' ]= normalized_std(not_averaged_predictions, y) @@ -207,11 +206,10 @@ def collect_metrics(x_y_pairs, seed=0, eiv_metrics['true_coverage_numerical'],\ eiv_metrics['true_coverage_theory'] =\ epistemic_coverage(not_averaged_predictions, true_y, - average_predictions=True, normalize_errors=False, noisy_y=False) - true_res = true_y - eiv_mean - eiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2)) + true_res = true_y - eiv_mean + eiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2)) # NLL if scale_outputs: