Skip to content
Snippets Groups Projects
Commit 952d77ad authored by Jörg Martin's avatar Jörg Martin
Browse files

Coverage metrics now based on parameter sampling

parents 84a30b5a 08722085
No related branches found
No related tags found
No related merge requests found
...@@ -158,8 +158,6 @@ class FNNEIV(nn.Module): ...@@ -158,8 +158,6 @@ class FNNEIV(nn.Module):
regularization += self.main[0].neg_x_evidence(x) regularization += self.main[0].neg_x_evidence(x)
return regularization return regularization
# TODO: repetition
# + Test via breakpoints
def predict(self, x, number_of_draws=[100,5], number_of_parameter_chunks = None, def predict(self, x, number_of_draws=[100,5], number_of_parameter_chunks = None,
remove_graph=True remove_graph=True
, take_average_of_prediction=True): , take_average_of_prediction=True):
...@@ -217,17 +215,16 @@ class FNNEIV(nn.Module): ...@@ -217,17 +215,16 @@ class FNNEIV(nn.Module):
sigma = torch.mean(sigma, dim=1) sigma = torch.mean(sigma, dim=1)
return pred, sigma return pred, sigma
# TODO: repetition def predictive_logdensity(self, x_or_predictions, y, number_of_draws=[100, 5], number_of_parameter_chunks = None, remove_graph=True,
# + Implement
# + Test via breakpoints
def predictive_logdensity(self, x, y, number_of_draws=[100, 5], number_of_parameter_chunks = None, remove_graph=True,
average_batch_dimension=True, scale_labels=None, average_batch_dimension=True, scale_labels=None,
decouple_dimensions=False): decouple_dimensions=False):
""" """
Computes the logarithm of the predictive density evaluated at `y`. If Computes the logarithm of the predictive density evaluated at `y`. If
`average_batch_dimension` is `True` these values will be averaged over `average_batch_dimension` is `True` these values will be averaged over
the batch dimension. the batch dimension.
:param x: A torch.tensor, the input :param x_or_predictions: Either a torch.tensor or a tuple. If
`x_or_predictions' is a tensor, it will be used as input. If it is a
tuple, it will be interpreted as the output of `predict` with `take_average_of_prediction` set to False.
:param y: A torch.tensor, labels on which to evaluate the density :param y: A torch.tensor, labels on which to evaluate the density
:param number_of_draws: An integer or a list. If an integer :param number_of_draws: An integer or a list. If an integer
`number_of_draws`, will be converted internally to `number_of_draws`, will be converted internally to
...@@ -250,10 +247,13 @@ class FNNEIV(nn.Module): ...@@ -250,10 +247,13 @@ class FNNEIV(nn.Module):
average, to make results comparable with the literature. Defaults to average, to make results comparable with the literature. Defaults to
False. False.
""" """
out, sigmas = self.predict(x, number_of_draws=number_of_draws, if type(x_or_predictions) is torch.tensor:
out, sigmas = self.predict(x_or_predictions, number_of_draws=number_of_draws,
number_of_parameter_chunks=number_of_parameter_chunks, number_of_parameter_chunks=number_of_parameter_chunks,
remove_graph=remove_graph, remove_graph=remove_graph,
take_average_of_prediction=False) take_average_of_prediction=False)
else:
out, sigmas = x_or_predictions
# Add "repetition" dimension to y and sigmas # Add "repetition" dimension to y and sigmas
y = y[:,None,...] y = y[:,None,...]
sigmas = sigmas[:,None,...] sigmas = sigmas[:,None,...]
...@@ -427,14 +427,16 @@ class FNNBer(nn.Module): ...@@ -427,14 +427,16 @@ class FNNBer(nn.Module):
sigma = torch.mean(sigma, dim=1) sigma = torch.mean(sigma, dim=1)
return pred, sigma return pred, sigma
def predictive_logdensity(self, x, y, number_of_draws=100, remove_graph=True, def predictive_logdensity(self, x_or_predictions, y, number_of_draws=100, remove_graph=True,
average_batch_dimension=True, scale_labels=None, average_batch_dimension=True, scale_labels=None,
decouple_dimensions=False): decouple_dimensions=False):
""" """
Computes the logarithm of the predictive density evaluated at `y`. If Computes the logarithm of the predictive density evaluated at `y`. If
`average_batch_dimension` is `True` these values will be averaged over `average_batch_dimension` is `True` these values will be averaged over
the batch dimension. the batch dimension.
:param x: A torch.tensor, the input :param x_or_predictions: Either a torch.tensor or a tuple. If
`x_or_predictions' is a tensor, it will be used as input. If it is a
tuple, it will be interpreted as the output of `predict` with `take_average_of_prediction` set to False.
:param y: A torch.tensor, labels on which to evaluate the density :param y: A torch.tensor, labels on which to evaluate the density
:param number_of_draws: Number of draws to obtain from x :param number_of_draws: Number of draws to obtain from x
:param remove_graph: If True (default) the output will :param remove_graph: If True (default) the output will
...@@ -448,14 +450,20 @@ class FNNBer(nn.Module): ...@@ -448,14 +450,20 @@ class FNNBer(nn.Module):
average, to make results comparable with the literature. Defaults to average, to make results comparable with the literature. Defaults to
False. False.
""" """
out, sigmas = self.predict(x, number_of_draws=number_of_draws, if type(x_or_predictions) is torch.tensor:
take_average_of_prediction=False, remove_graph=remove_graph) out, sigmas = self.predict(x_or_predictions,
# Add "repetition" dimension to y and out number_of_draws=number_of_draws,
take_average_of_prediction=False, remove_graph=remove_graph)
else:
out, sigmas = x_or_predictions
# Add "repetition" dimension to y and sigmas
y = y[:,None,...] y = y[:,None,...]
sigmas = sigmas[:,None,...] sigmas = sigmas[:,None,...]
if len(y.shape) <= 2: if len(y.shape) <= 2:
# add an output axis if necessary # add an output axis if necessary
y = y[...,None] y = y[...,None]
if len(sigmas.shape) <= 2:
# add an output axis if necessary
sigmas = sigmas[...,None] sigmas = sigmas[...,None]
# squeeze last dimensions into one # squeeze last dimensions into one
y = y.view((*y.shape[:2], -1)) y = y.view((*y.shape[:2], -1))
......
...@@ -28,55 +28,73 @@ def multivariate_interval_length(dim, q=0.95): ...@@ -28,55 +28,73 @@ def multivariate_interval_length(dim, q=0.95):
:param q: Float, should be between 0 and 1. Defaults to 0.95. :param q: Float, should be between 0 and 1. Defaults to 0.95.
""" """
# use independence of components to reduce to a univariate quantile # use independence of components to reduce to a univariate quantile
univariate_quantile = 0.5 * (1+0.95**(1/dim)) univariate_quantile = 0.5 * (1+q**(1/dim))
return scipy.stats.norm.ppf(univariate_quantile) return scipy.stats.norm.ppf(univariate_quantile)
def epistemic_coverage(prediction_triple, y, q=0.95, normalize_errors=False): def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=False):
""" """
Returns the average coverage of `y` by the interval Returns the average coverage of `y` by the interval
"prefactor * (predictions + q-Interval)", "prefactor * (predictions + q-Interval)",
where "q-Interval" is the interval of measure `q` under the standard normal, where
"predictions" is the first component of `prediction_triple` and prefactor is either - "q-Interval" is the interval of measure `q` under the standard normal,
the epistemic uncertainty, given by the second component of `prediction_triple`, if where
`normalize_errors` is False, or 1 if it is true. The coverage is returned - "predictions" are the entries of the first component of the tuple
as given by `y` and as a theoretical_coverage computed from the epistemic `not_averaged_predictions`,
uncertainty (second component of `prediction_triple`) and the aleatoric uncertainty - "prefactor either equals the epistemic uncertainty, computed from the
(third component of `prediction_triple`) first component of `not_averaged_predictions`,if
:param prediction_triple: A triple of tensors containing (in this order): the `normalize_errors` is set to False, or 1 if it is true.
predictions of the neural net (the average under the posterior), the The coverage is returned as given by the `y` and as a theoretical_coverage
epistemic uncertainty (the standard deviation under the posterior) and computed from the epistemic uncertainty and the aleatoric uncertainty
the aleatoric uncertainty. All tensors are expected to have two dimensions: (second component of `not_averaged_predictions`).
a batch and a feature dimension. :param not_averaged_predictions: A tuple of tensors as in the output of
:param y: A `torch.tensor` of the same shape then the first two components `FNNEIV.predict` with `take_average_of_prediction` set to `False`, i.e.:
of `prediction_triple`. If the feature dimension is missing, it is added. the predictions of the neural net not averaged over the first dimension
(the repetition dimension in `FNNEIV.predict`) and
the aleatoric uncertainty with a batch dimension and a feature dimension.
:param y: A `torch.tensor` of the same shape then the second components
of `not_averaged_predictions`. If the feature dimension is missing, it is added.
:param q: A float between 0 and 1. Defaults to 0.95. :param q: A float between 0 and 1. Defaults to 0.95.
:param normalize_errors: If True, the deviations between predictions and :param normalize_errors: If True, the deviations between predictions and
`y` are normalized by the total uncertainty, computed from the aleatoric `y` are normalized by the total uncertainty, computed from the aleatoric
and epistemic uncertainty and the coverage w.r.t. q-interval is computed. and epistemic uncertainty and the coverage w.r.t. q-interval is computed.
:returns: numerical_coverage, theoretical_coverage :returns: numerical_coverage, theoretical_coverage
""" """
mean, epis_unc, aleat_unc = prediction_triple out, sigmas = not_averaged_predictions
assert epis_unc.shape == aleat_unc.shape y = y[:,None,...]
assert mean.shape == epis_unc.shape sigmas = sigmas[:,None,...]
# Add feature dimension to y if missing # add an output axis if necessary
if len(y.shape) <= 1: if len(y.shape) <= 2:
y = y.view((-1,1)) y = y[...,None]
assert y.shape == mean.shape if len(sigmas.shape) <= 2:
sigmas = sigmas[...,None]
# squeeze last dimensions into one
y = y.view((*y.shape[:2], -1))
sigmas = sigmas.view((*sigmas.shape[:2], -1))
out = out.view((*out.shape[:2], -1))
# check if dimensions consistent
assert y.shape == sigmas.shape
assert y.shape[0] == out.shape[0]
assert y.shape[2] == out.shape[2]
# compute epistemic uncertainty
epis_unc = torch.std(out, dim=1, keepdim=True)
# compute total uncertainty
assert epis_unc.shape == sigmas.shape
total_unc = torch.sqrt(epis_unc**2 + sigmas **2)
# fix interval based on epis_unc # fix interval based on epis_unc
if not normalize_errors: if not normalize_errors:
interval_length = multivariate_interval_length(dim=y.shape[1], q=q) \ interval_length = multivariate_interval_length(dim=y.shape[1], q=q) \
* epis_unc * epis_unc
else: else:
interval_length = multivariate_interval_length(dim=y.shape[1], q=q) interval_length = multivariate_interval_length(dim=y.shape[1], q=q)
total_unc = torch.sqrt(epis_unc**2 + aleat_unc **2)
# numerical computation # numerical computation
errors = mean - y errors = out - y
if normalize_errors: if normalize_errors:
assert errors.shape == total_unc.shape assert errors.shape[0] == total_unc.shape[0]
assert errors.shape[2] == total_unc.shape[2]
errors /= total_unc errors /= total_unc
check_if_in_interval = logical_and_along_dimension( check_if_in_interval = logical_and_along_dimension(
torch.abs(errors) <= interval_length, dim=1) torch.abs(errors) <= interval_length, dim=2)
numerical_coverage = torch.mean( numerical_coverage = torch.mean(
check_if_in_interval.to(torch.float32) check_if_in_interval.to(torch.float32)
).cpu().detach().item() ).cpu().detach().item()
...@@ -85,34 +103,52 @@ def epistemic_coverage(prediction_triple, y, q=0.95, normalize_errors=False): ...@@ -85,34 +103,52 @@ def epistemic_coverage(prediction_triple, y, q=0.95, normalize_errors=False):
cdf_args = (interval_length/total_unc).detach().cpu().numpy() cdf_args = (interval_length/total_unc).detach().cpu().numpy()
cdf_values = scipy.stats.norm.cdf(cdf_args) cdf_values = scipy.stats.norm.cdf(cdf_args)
prob_values = 2*cdf_values -1 prob_values = 2*cdf_values -1
assert len(cdf_values.shape) == 2 assert len(cdf_values.shape) == 3
theoretical_coverage = np.mean(np.prod(prob_values, axis=1)).item() assert cdf_values.shape[1] == 1
# take product over feature dimension
# and average over batch dimension
theoretical_coverage = np.mean(np.prod(prob_values, axis=2)).item()
else: else:
theoretical_coverage = q theoretical_coverage = q
return numerical_coverage, theoretical_coverage return numerical_coverage, theoretical_coverage
def normalized_std(prediction_triple, y): def normalized_std(not_averaged_predictions, y):
""" """
Returns the standard deviation of normalized residuals. In theory this Returns the standard deviation of normalized residuals, averaged over the
feature dimension. In theory this
number should be equal to 1.0. number should be equal to 1.0.
:param prediction_triple: A triple of tensors containing (in this order): the :param not_averaged_predictions: A triple of tensors containing (in this order): the
predictions of the neural net (the average under the posterior), the predictions of the neural net (the average under the posterior), the
epistemic uncertainty (the standard deviation under the posterior) and epistemic uncertainty (the standard deviation under the posterior) and
the aleatoric uncertainty. the aleatoric uncertainty.
:param y: A `torch.tensor` of the same shape then the first two components :param y: A `torch.tensor` of the same shape then the first two components
of `prediction_triple`. If the feature dimension is missing, it is added. of `not_averaged_predictions`. If the feature dimension is missing, it is added.
:returns: numerical_coverage, theoretical_coverage :returns: numerical_coverage, theoretical_coverage
""" """
mean, epis_unc, aleat_unc = prediction_triple out = not_averaged_predictions[0]
assert epis_unc.shape == aleat_unc.shape sigmas = not_averaged_predictions[1]
assert mean.shape == epis_unc.shape # add repetition dimension
# Add feature dimension to y if missing y = y[:,None,...]
if len(y.shape) <= 1: sigmas = sigmas[:,None,...]
y = y.view((-1,1)) # add an output axis if necessary
assert y.shape == mean.shape if len(y.shape) <= 2:
total_unc = torch.sqrt(epis_unc**2 + aleat_unc **2) y = y[...,None]
if len(sigmas.shape) <= 2:
sigmas = sigmas[...,None]
# squeeze last dimensions into one
y = y.view((*y.shape[:2], -1))
sigmas = sigmas.view((*sigmas.shape[:2], -1))
out = out.view((*out.shape[:2], -1))
# check if dimensions consistent
assert y.shape == sigmas.shape
assert y.shape[0] == out.shape[0]
assert y.shape[2] == out.shape[2]
# compute epistemic uncertainty
epis_unc = torch.std(out, dim=1, keepdim=True)
assert epis_unc.shape == sigmas.shape
total_unc = torch.sqrt(epis_unc**2 + sigmas **2)
# numerical computation # numerical computation
errors = mean - y errors = out - y
assert errors.shape == total_unc.shape
errors /= total_unc errors /= total_unc
return torch.mean(torch.std(errors, dim=0)).item() # average over feature dimension
return torch.mean(torch.std(errors, dim=(0,1))).item()
...@@ -67,13 +67,13 @@ def collect_metrics(x,y, seed=0, ...@@ -67,13 +67,13 @@ def collect_metrics(x,y, seed=0,
# RMSE # RMSE
training_state = net.training training_state = net.training
net.train() net.train()
###### not_averaged_predictions = net.predict(x, number_of_draws=noneiv_number_of_draws,
prediction_triple =\ take_average_of_prediction=False)
net.predict_mean_and_unc(x, number_of_draws=noneiv_number_of_draws) noneiv_mean = torch.mean(not_averaged_predictions[0], dim=1)
if len(y.shape) <= 1: if len(y.shape) <= 1:
y = y.view((-1,1)) y = y.view((-1,1))
assert y.shape == prediction_triple[0].shape assert y.shape == noneiv_mean.shape
res = y-prediction_triple[0] res = y-noneiv_mean
if scale_outputs: if scale_outputs:
scale = train_data.dataset.std_labels.to(device) scale = train_data.dataset.std_labels.to(device)
scaled_res = res * scale.view((1,-1)) scaled_res = res * scale.view((1,-1))
...@@ -83,10 +83,10 @@ def collect_metrics(x,y, seed=0, ...@@ -83,10 +83,10 @@ def collect_metrics(x,y, seed=0,
noneiv_metrics['rmse'] = np.sqrt(np.mean(scaled_res**2)) noneiv_metrics['rmse'] = np.sqrt(np.mean(scaled_res**2))
noneiv_metrics['bias'] = np.mean(scaled_res) noneiv_metrics['bias'] = np.mean(scaled_res)
noneiv_metrics['coverage_numerical'], noneiv_metrics['coverage_theory'] =\ noneiv_metrics['coverage_numerical'], noneiv_metrics['coverage_theory'] =\
epistemic_coverage(prediction_triple, y, normalize_errors=False) epistemic_coverage(not_averaged_predictions, y, normalize_errors=False)
noneiv_metrics['coverage_normalized'],_ =\ noneiv_metrics['coverage_normalized'],_ =\
epistemic_coverage(prediction_triple, y, normalize_errors=True) epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
noneiv_metrics['res_std'] = normalized_std(prediction_triple, y) noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y)
...@@ -95,7 +95,8 @@ def collect_metrics(x,y, seed=0, ...@@ -95,7 +95,8 @@ def collect_metrics(x,y, seed=0,
scale_labels = train_data.dataset.std_labels.view((-1,)).to(device) scale_labels = train_data.dataset.std_labels.view((-1,)).to(device)
else: else:
scale_labels = None scale_labels = None
noneiv_metrics['logdens' ]= net.predictive_logdensity(x, y, noneiv_metrics['logdens' ]= net.predictive_logdensity(
not_averaged_predictions, y,
number_of_draws=100, number_of_draws=100,
decouple_dimensions=decouple_dimensions, decouple_dimensions=decouple_dimensions,
scale_labels=scale_labels).mean().detach().cpu().numpy() scale_labels=scale_labels).mean().detach().cpu().numpy()
...@@ -127,12 +128,13 @@ def collect_metrics(x,y, seed=0, ...@@ -127,12 +128,13 @@ def collect_metrics(x,y, seed=0,
noise_state = net.noise_is_on noise_state = net.noise_is_on
net.train() net.train()
net.noise_on() net.noise_on()
prediction_triple =\ not_averaged_predictions = net.predict(x, number_of_draws=noneiv_number_of_draws,
net.predict_mean_and_unc(x, number_of_draws=eiv_number_of_draws) take_average_of_prediction=False)
eiv_mean = torch.mean(not_averaged_predictions[0], dim=1)
if len(y.shape) <= 1: if len(y.shape) <= 1:
y = y.view((-1,1)) y = y.view((-1,1))
assert y.shape == prediction_triple[0].shape assert y.shape == eiv_mean.shape
res = y-prediction_triple[0] res = y-eiv_mean
scale = train_data.dataset.std_labels.to(device) scale = train_data.dataset.std_labels.to(device)
if scale_outputs: if scale_outputs:
scale = train_data.dataset.std_labels.to(device) scale = train_data.dataset.std_labels.to(device)
...@@ -143,10 +145,10 @@ def collect_metrics(x,y, seed=0, ...@@ -143,10 +145,10 @@ def collect_metrics(x,y, seed=0,
eiv_metrics['rmse' ]= np.sqrt(np.mean(scaled_res**2)) eiv_metrics['rmse' ]= np.sqrt(np.mean(scaled_res**2))
eiv_metrics['bias' ]= np.mean(scaled_res) eiv_metrics['bias' ]= np.mean(scaled_res)
eiv_metrics['coverage_numerical'], eiv_metrics['coverage_theory'] =\ eiv_metrics['coverage_numerical'], eiv_metrics['coverage_theory'] =\
epistemic_coverage(prediction_triple, y, normalize_errors=False) epistemic_coverage(not_averaged_predictions, y, normalize_errors=False)
eiv_metrics['coverage_normalized'],_ =\ eiv_metrics['coverage_normalized'],_ =\
epistemic_coverage(prediction_triple, y, normalize_errors=True) epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
eiv_metrics['res_std' ]= normalized_std(prediction_triple, y) eiv_metrics['res_std' ]= normalized_std(not_averaged_predictions, y)
# NLL # NLL
...@@ -154,7 +156,8 @@ def collect_metrics(x,y, seed=0, ...@@ -154,7 +156,8 @@ def collect_metrics(x,y, seed=0,
scale_labels = train_data.dataset.std_labels.view((-1,)).to(device) scale_labels = train_data.dataset.std_labels.view((-1,)).to(device)
else: else:
scale_labels = None scale_labels = None
eiv_metrics['logdens' ]= net.predictive_logdensity(x, y, eiv_metrics['logdens' ]= net.predictive_logdensity(
not_averaged_predictions, y,
number_of_draws=eiv_number_of_draws, number_of_draws=eiv_number_of_draws,
decouple_dimensions=decouple_dimensions, decouple_dimensions=decouple_dimensions,
scale_labels=scale_labels).mean().detach().cpu().numpy() scale_labels=scale_labels).mean().detach().cpu().numpy()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment