diff --git a/EIVPackage/EIVArchitectures/Networks.py b/EIVPackage/EIVArchitectures/Networks.py index ccae84842e6a29979c0a72156606ddcfaf8ca3e0..6acace59b7abef559bd631bdfaae915e6d481129 100644 --- a/EIVPackage/EIVArchitectures/Networks.py +++ b/EIVPackage/EIVArchitectures/Networks.py @@ -158,8 +158,6 @@ class FNNEIV(nn.Module): regularization += self.main[0].neg_x_evidence(x) return regularization - # TODO: repetition - # + Test via breakpoints def predict(self, x, number_of_draws=[100,5], number_of_parameter_chunks = None, remove_graph=True , take_average_of_prediction=True): @@ -217,17 +215,16 @@ class FNNEIV(nn.Module): sigma = torch.mean(sigma, dim=1) return pred, sigma - # TODO: repetition - # + Implement - # + Test via breakpoints - def predictive_logdensity(self, x, y, number_of_draws=[100, 5], number_of_parameter_chunks = None, remove_graph=True, + def predictive_logdensity(self, x_or_predictions, y, number_of_draws=[100, 5], number_of_parameter_chunks = None, remove_graph=True, average_batch_dimension=True, scale_labels=None, decouple_dimensions=False): """ Computes the logarithm of the predictive density evaluated at `y`. If `average_batch_dimension` is `True` these values will be averaged over the batch dimension. - :param x: A torch.tensor, the input + :param x_or_predictions: Either a torch.tensor or a tuple. If + `x_or_predictions' is a tensor, it will be used as input. If it is a + tuple, it will be interpreted as the output of `predict` with `take_average_of_prediction` set to False. :param y: A torch.tensor, labels on which to evaluate the density :param number_of_draws: An integer or a list. If an integer `number_of_draws`, will be converted internally to @@ -250,10 +247,13 @@ class FNNEIV(nn.Module): average, to make results comparable with the literature. Defaults to False. """ - out, sigmas = self.predict(x, number_of_draws=number_of_draws, + if type(x_or_predictions) is torch.tensor: + out, sigmas = self.predict(x_or_predictions, number_of_draws=number_of_draws, number_of_parameter_chunks=number_of_parameter_chunks, remove_graph=remove_graph, take_average_of_prediction=False) + else: + out, sigmas = x_or_predictions # Add "repetition" dimension to y and sigmas y = y[:,None,...] sigmas = sigmas[:,None,...] @@ -427,14 +427,16 @@ class FNNBer(nn.Module): sigma = torch.mean(sigma, dim=1) return pred, sigma - def predictive_logdensity(self, x, y, number_of_draws=100, remove_graph=True, + def predictive_logdensity(self, x_or_predictions, y, number_of_draws=100, remove_graph=True, average_batch_dimension=True, scale_labels=None, decouple_dimensions=False): """ Computes the logarithm of the predictive density evaluated at `y`. If `average_batch_dimension` is `True` these values will be averaged over the batch dimension. - :param x: A torch.tensor, the input + :param x_or_predictions: Either a torch.tensor or a tuple. If + `x_or_predictions' is a tensor, it will be used as input. If it is a + tuple, it will be interpreted as the output of `predict` with `take_average_of_prediction` set to False. :param y: A torch.tensor, labels on which to evaluate the density :param number_of_draws: Number of draws to obtain from x :param remove_graph: If True (default) the output will @@ -448,14 +450,20 @@ class FNNBer(nn.Module): average, to make results comparable with the literature. Defaults to False. """ - out, sigmas = self.predict(x, number_of_draws=number_of_draws, - take_average_of_prediction=False, remove_graph=remove_graph) - # Add "repetition" dimension to y and out + if type(x_or_predictions) is torch.tensor: + out, sigmas = self.predict(x_or_predictions, + number_of_draws=number_of_draws, + take_average_of_prediction=False, remove_graph=remove_graph) + else: + out, sigmas = x_or_predictions + # Add "repetition" dimension to y and sigmas y = y[:,None,...] sigmas = sigmas[:,None,...] if len(y.shape) <= 2: # add an output axis if necessary y = y[...,None] + if len(sigmas.shape) <= 2: + # add an output axis if necessary sigmas = sigmas[...,None] # squeeze last dimensions into one y = y.view((*y.shape[:2], -1)) diff --git a/EIVPackage/EIVGeneral/coverage_metrices.py b/EIVPackage/EIVGeneral/coverage_metrices.py deleted file mode 100644 index a984e77b0a8ae980b03117faa9ad2981dc59b775..0000000000000000000000000000000000000000 --- a/EIVPackage/EIVGeneral/coverage_metrices.py +++ /dev/null @@ -1,118 +0,0 @@ -""" -Computes the coverage of observations by predictions and uncertainty and -similar quantities. This module contains three functions -- epistemic_coverage: Computes the coverage of observations by predictions and - their epistemic uncertainty and the averaged theoretical ground truth. If - `normalized` is set to True, residuals are normalized and a fixed interval - length is considered. -- normalized_std: Computes the standard deviation of the normalized residuals. -""" -import numpy as np -import scipy.stats -import torch - -def logical_and_along_dimension(x, dim=-1, keepdim=False): - """ - For a boolean tensor `x` perform a logical AND along the axis `dim`. - If `keepdim` is true the axis `dim` will be kept (defaults to False). - """ - return torch.prod(x.to(torch.bool), dim=dim, keepdim=keepdim).to(torch.bool) - - -def multivariate_interval_length(dim, q=0.95): - """ - Returns the half side length of multivariate cube, symmetrically centered - around 0 such that its measure under a standard normal distribution equals - `q`. - :param dim: A non-negative integer, the dimension. - :param q: Float, should be between 0 and 1. Defaults to 0.95. - """ - # use independence of components to reduce to a univariate quantile - univariate_quantile = 0.5 * (1+0.95**(1/dim)) - return scipy.stats.norm.ppf(univariate_quantile) - - -def epistemic_coverage(prediction_triple, y, q=0.95, normalize_errors=False): - """ - Returns the average coverage of `y` by the interval - "prefactor * (predictions + q-Interval)", - where "q-Interval" is the interval of measure `q` under the standard normal, - "predictions" is the first component of `prediction_triple` and prefactor is either - the epistemic uncertainty, given by the second component of `prediction_triple`, if - `normalize_errors` is False, or 1 if it is true. The coverage is returned - as given by `y` and as a theoretical_coverage computed from the epistemic - uncertainty (second component of `prediction_triple`) and the aleatoric uncertainty - (third component of `prediction_triple`) - :param prediction_triple: A triple of tensors containing (in this order): the - predictions of the neural net (the average under the posterior), the - epistemic uncertainty (the standard deviation under the posterior) and - the aleatoric uncertainty. All tensors are expected to have two dimensions: - a batch and a feature dimension. - :param y: A `torch.tensor` of the same shape then the first two components - of `prediction_triple`. If the feature dimension is missing, it is added. - :param q: A float between 0 and 1. Defaults to 0.95. - :param normalize_errors: If True, the deviations between predictions and - `y` are normalized by the total uncertainty, computed from the aleatoric - and epistemic uncertainty and the coverage w.r.t. q-interval is computed. - :returns: numerical_coverage, theoretical_coverage - """ - mean, epis_unc, aleat_unc = prediction_triple - assert epis_unc.shape == aleat_unc.shape - assert mean.shape == epis_unc.shape - # Add feature dimension to y if missing - if len(y.shape) <= 1: - y = y.view((-1,1)) - assert y.shape == mean.shape - # fix interval based on epis_unc - if not normalize_errors: - interval_length = multivariate_interval_length(dim=y.shape[1], q=q) \ - * epis_unc - else: - interval_length = multivariate_interval_length(dim=y.shape[1], q=q) - total_unc = torch.sqrt(epis_unc**2 + aleat_unc **2) - # numerical computation - errors = mean - y - if normalize_errors: - assert errors.shape == total_unc.shape - errors /= total_unc - check_if_in_interval = logical_and_along_dimension( - torch.abs(errors) <= interval_length, dim=1) - numerical_coverage = torch.mean( - check_if_in_interval.to(torch.float32) - ).cpu().detach().item() - # theoretical computation - if not normalize_errors: - cdf_args = (interval_length/total_unc).detach().cpu().numpy() - cdf_values = scipy.stats.norm.cdf(cdf_args) - prob_values = 2*cdf_values -1 - assert len(cdf_values.shape) == 2 - theoretical_coverage = np.mean(np.prod(prob_values, axis=1)).item() - else: - theoretical_coverage = q - return numerical_coverage, theoretical_coverage - -def normalized_std(prediction_triple, y): - """ - Returns the standard deviation of normalized residuals. In theory this - number should be equal to 1.0. - :param prediction_triple: A triple of tensors containing (in this order): the - predictions of the neural net (the average under the posterior), the - epistemic uncertainty (the standard deviation under the posterior) and - the aleatoric uncertainty. - :param y: A `torch.tensor` of the same shape then the first two components - of `prediction_triple`. If the feature dimension is missing, it is added. - :returns: numerical_coverage, theoretical_coverage - """ - mean, epis_unc, aleat_unc = prediction_triple - assert epis_unc.shape == aleat_unc.shape - assert mean.shape == epis_unc.shape - # Add feature dimension to y if missing - if len(y.shape) <= 1: - y = y.view((-1,1)) - assert y.shape == mean.shape - total_unc = torch.sqrt(epis_unc**2 + aleat_unc **2) - # numerical computation - errors = mean - y - assert errors.shape == total_unc.shape - errors /= total_unc - return torch.mean(torch.std(errors, dim=0)).item() diff --git a/EIVPackage/EIVGeneral/coverage_metrics.py b/EIVPackage/EIVGeneral/coverage_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..3bafa96fcf3cb72e17ac075dff300309759dd9e9 --- /dev/null +++ b/EIVPackage/EIVGeneral/coverage_metrics.py @@ -0,0 +1,154 @@ +""" +Computes the coverage of observations by predictions and uncertainty and +similar quantities. This module contains three functions +- epistemic_coverage: Computes the coverage of observations by predictions and + their epistemic uncertainty and the averaged theoretical ground truth. If + `normalized` is set to True, residuals are normalized and a fixed interval + length is considered. +- normalized_std: Computes the standard deviation of the normalized residuals. +""" +import numpy as np +import scipy.stats +import torch + +def logical_and_along_dimension(x, dim=-1, keepdim=False): + """ + For a boolean tensor `x` perform a logical AND along the axis `dim`. + If `keepdim` is true the axis `dim` will be kept (defaults to False). + """ + return torch.prod(x.to(torch.bool), dim=dim, keepdim=keepdim).to(torch.bool) + + +def multivariate_interval_length(dim, q=0.95): + """ + Returns the half side length of multivariate cube, symmetrically centered + around 0 such that its measure under a standard normal distribution equals + `q`. + :param dim: A non-negative integer, the dimension. + :param q: Float, should be between 0 and 1. Defaults to 0.95. + """ + # use independence of components to reduce to a univariate quantile + univariate_quantile = 0.5 * (1+q**(1/dim)) + return scipy.stats.norm.ppf(univariate_quantile) + + +def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=False): + """ + Returns the average coverage of `y` by the interval + "prefactor * (predictions + q-Interval)", + where + - "q-Interval" is the interval of measure `q` under the standard normal, + where + - "predictions" are the entries of the first component of the tuple + `not_averaged_predictions`, + - "prefactor either equals the epistemic uncertainty, computed from the + first component of `not_averaged_predictions`,if + `normalize_errors` is set to False, or 1 if it is true. + The coverage is returned as given by the `y` and as a theoretical_coverage + computed from the epistemic uncertainty and the aleatoric uncertainty + (second component of `not_averaged_predictions`). + :param not_averaged_predictions: A tuple of tensors as in the output of + `FNNEIV.predict` with `take_average_of_prediction` set to `False`, i.e.: + the predictions of the neural net not averaged over the first dimension + (the repetition dimension in `FNNEIV.predict`) and + the aleatoric uncertainty with a batch dimension and a feature dimension. + :param y: A `torch.tensor` of the same shape then the second components + of `not_averaged_predictions`. If the feature dimension is missing, it is added. + :param q: A float between 0 and 1. Defaults to 0.95. + :param normalize_errors: If True, the deviations between predictions and + `y` are normalized by the total uncertainty, computed from the aleatoric + and epistemic uncertainty and the coverage w.r.t. q-interval is computed. + :returns: numerical_coverage, theoretical_coverage + """ + out, sigmas = not_averaged_predictions + y = y[:,None,...] + sigmas = sigmas[:,None,...] + # add an output axis if necessary + if len(y.shape) <= 2: + y = y[...,None] + if len(sigmas.shape) <= 2: + sigmas = sigmas[...,None] + # squeeze last dimensions into one + y = y.view((*y.shape[:2], -1)) + sigmas = sigmas.view((*sigmas.shape[:2], -1)) + out = out.view((*out.shape[:2], -1)) + # check if dimensions consistent + assert y.shape == sigmas.shape + assert y.shape[0] == out.shape[0] + assert y.shape[2] == out.shape[2] + # compute epistemic uncertainty + epis_unc = torch.std(out, dim=1, keepdim=True) + # compute total uncertainty + assert epis_unc.shape == sigmas.shape + total_unc = torch.sqrt(epis_unc**2 + sigmas **2) + # fix interval based on epis_unc + if not normalize_errors: + interval_length = multivariate_interval_length(dim=y.shape[1], q=q) \ + * epis_unc + else: + interval_length = multivariate_interval_length(dim=y.shape[1], q=q) + # numerical computation + errors = out - y + if normalize_errors: + assert errors.shape[0] == total_unc.shape[0] + assert errors.shape[2] == total_unc.shape[2] + errors /= total_unc + check_if_in_interval = logical_and_along_dimension( + torch.abs(errors) <= interval_length, dim=2) + numerical_coverage = torch.mean( + check_if_in_interval.to(torch.float32) + ).cpu().detach().item() + # theoretical computation + if not normalize_errors: + cdf_args = (interval_length/total_unc).detach().cpu().numpy() + cdf_values = scipy.stats.norm.cdf(cdf_args) + prob_values = 2*cdf_values -1 + assert len(cdf_values.shape) == 3 + assert cdf_values.shape[1] == 1 + # take product over feature dimension + # and average over batch dimension + theoretical_coverage = np.mean(np.prod(prob_values, axis=2)).item() + else: + theoretical_coverage = q + return numerical_coverage, theoretical_coverage + +def normalized_std(not_averaged_predictions, y): + """ + Returns the standard deviation of normalized residuals, averaged over the + feature dimension. In theory this + number should be equal to 1.0. + :param not_averaged_predictions: A triple of tensors containing (in this order): the + predictions of the neural net (the average under the posterior), the + epistemic uncertainty (the standard deviation under the posterior) and + the aleatoric uncertainty. + :param y: A `torch.tensor` of the same shape then the first two components + of `not_averaged_predictions`. If the feature dimension is missing, it is added. + :returns: numerical_coverage, theoretical_coverage + """ + out = not_averaged_predictions[0] + sigmas = not_averaged_predictions[1] + # add repetition dimension + y = y[:,None,...] + sigmas = sigmas[:,None,...] + # add an output axis if necessary + if len(y.shape) <= 2: + y = y[...,None] + if len(sigmas.shape) <= 2: + sigmas = sigmas[...,None] + # squeeze last dimensions into one + y = y.view((*y.shape[:2], -1)) + sigmas = sigmas.view((*sigmas.shape[:2], -1)) + out = out.view((*out.shape[:2], -1)) + # check if dimensions consistent + assert y.shape == sigmas.shape + assert y.shape[0] == out.shape[0] + assert y.shape[2] == out.shape[2] + # compute epistemic uncertainty + epis_unc = torch.std(out, dim=1, keepdim=True) + assert epis_unc.shape == sigmas.shape + total_unc = torch.sqrt(epis_unc**2 + sigmas **2) + # numerical computation + errors = out - y + errors /= total_unc + # average over feature dimension + return torch.mean(torch.std(errors, dim=(0,1))).item() diff --git a/Experiments/evaluate_tabular.py b/Experiments/evaluate_tabular.py index 6a738bdaae782541924d8c474efb90c7eac6b8b0..40b3f85726306a48a85863a88d1e03e9e3f57f77 100644 --- a/Experiments/evaluate_tabular.py +++ b/Experiments/evaluate_tabular.py @@ -67,13 +67,13 @@ def collect_metrics(x,y, seed=0, # RMSE training_state = net.training net.train() - ###### - prediction_triple =\ - net.predict_mean_and_unc(x, number_of_draws=noneiv_number_of_draws) + not_averaged_predictions = net.predict(x, number_of_draws=noneiv_number_of_draws, + take_average_of_prediction=False) + noneiv_mean = torch.mean(not_averaged_predictions[0], dim=1) if len(y.shape) <= 1: y = y.view((-1,1)) - assert y.shape == prediction_triple[0].shape - res = y-prediction_triple[0] + assert y.shape == noneiv_mean.shape + res = y-noneiv_mean if scale_outputs: scale = train_data.dataset.std_labels.to(device) scaled_res = res * scale.view((1,-1)) @@ -83,10 +83,10 @@ def collect_metrics(x,y, seed=0, noneiv_metrics['rmse'] = np.sqrt(np.mean(scaled_res**2)) noneiv_metrics['bias'] = np.mean(scaled_res) noneiv_metrics['coverage_numerical'], noneiv_metrics['coverage_theory'] =\ - epistemic_coverage(prediction_triple, y, normalize_errors=False) + epistemic_coverage(not_averaged_predictions, y, normalize_errors=False) noneiv_metrics['coverage_normalized'],_ =\ - epistemic_coverage(prediction_triple, y, normalize_errors=True) - noneiv_metrics['res_std'] = normalized_std(prediction_triple, y) + epistemic_coverage(not_averaged_predictions, y, normalize_errors=True) + noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y) @@ -95,7 +95,8 @@ def collect_metrics(x,y, seed=0, scale_labels = train_data.dataset.std_labels.view((-1,)).to(device) else: scale_labels = None - noneiv_metrics['logdens' ]= net.predictive_logdensity(x, y, + noneiv_metrics['logdens' ]= net.predictive_logdensity( + not_averaged_predictions, y, number_of_draws=100, decouple_dimensions=decouple_dimensions, scale_labels=scale_labels).mean().detach().cpu().numpy() @@ -127,12 +128,13 @@ def collect_metrics(x,y, seed=0, noise_state = net.noise_is_on net.train() net.noise_on() - prediction_triple =\ - net.predict_mean_and_unc(x, number_of_draws=eiv_number_of_draws) + not_averaged_predictions = net.predict(x, number_of_draws=noneiv_number_of_draws, + take_average_of_prediction=False) + eiv_mean = torch.mean(not_averaged_predictions[0], dim=1) if len(y.shape) <= 1: y = y.view((-1,1)) - assert y.shape == prediction_triple[0].shape - res = y-prediction_triple[0] + assert y.shape == eiv_mean.shape + res = y-eiv_mean scale = train_data.dataset.std_labels.to(device) if scale_outputs: scale = train_data.dataset.std_labels.to(device) @@ -143,10 +145,10 @@ def collect_metrics(x,y, seed=0, eiv_metrics['rmse' ]= np.sqrt(np.mean(scaled_res**2)) eiv_metrics['bias' ]= np.mean(scaled_res) eiv_metrics['coverage_numerical'], eiv_metrics['coverage_theory'] =\ - epistemic_coverage(prediction_triple, y, normalize_errors=False) + epistemic_coverage(not_averaged_predictions, y, normalize_errors=False) eiv_metrics['coverage_normalized'],_ =\ - epistemic_coverage(prediction_triple, y, normalize_errors=True) - eiv_metrics['res_std' ]= normalized_std(prediction_triple, y) + epistemic_coverage(not_averaged_predictions, y, normalize_errors=True) + eiv_metrics['res_std' ]= normalized_std(not_averaged_predictions, y) # NLL @@ -154,7 +156,8 @@ def collect_metrics(x,y, seed=0, scale_labels = train_data.dataset.std_labels.view((-1,)).to(device) else: scale_labels = None - eiv_metrics['logdens' ]= net.predictive_logdensity(x, y, + eiv_metrics['logdens' ]= net.predictive_logdensity( + not_averaged_predictions, y, number_of_draws=eiv_number_of_draws, decouple_dimensions=decouple_dimensions, scale_labels=scale_labels).mean().detach().cpu().numpy()