Skip to content
Snippets Groups Projects
Commit 952d77ad authored by Jörg Martin's avatar Jörg Martin
Browse files

Coverage metrics now based on parameter sampling

parents 84a30b5a 08722085
No related branches found
No related tags found
No related merge requests found
......@@ -158,8 +158,6 @@ class FNNEIV(nn.Module):
regularization += self.main[0].neg_x_evidence(x)
return regularization
# TODO: repetition
# + Test via breakpoints
def predict(self, x, number_of_draws=[100,5], number_of_parameter_chunks = None,
remove_graph=True
, take_average_of_prediction=True):
......@@ -217,17 +215,16 @@ class FNNEIV(nn.Module):
sigma = torch.mean(sigma, dim=1)
return pred, sigma
# TODO: repetition
# + Implement
# + Test via breakpoints
def predictive_logdensity(self, x, y, number_of_draws=[100, 5], number_of_parameter_chunks = None, remove_graph=True,
def predictive_logdensity(self, x_or_predictions, y, number_of_draws=[100, 5], number_of_parameter_chunks = None, remove_graph=True,
average_batch_dimension=True, scale_labels=None,
decouple_dimensions=False):
"""
Computes the logarithm of the predictive density evaluated at `y`. If
`average_batch_dimension` is `True` these values will be averaged over
the batch dimension.
:param x: A torch.tensor, the input
:param x_or_predictions: Either a torch.tensor or a tuple. If
`x_or_predictions' is a tensor, it will be used as input. If it is a
tuple, it will be interpreted as the output of `predict` with `take_average_of_prediction` set to False.
:param y: A torch.tensor, labels on which to evaluate the density
:param number_of_draws: An integer or a list. If an integer
`number_of_draws`, will be converted internally to
......@@ -250,10 +247,13 @@ class FNNEIV(nn.Module):
average, to make results comparable with the literature. Defaults to
False.
"""
out, sigmas = self.predict(x, number_of_draws=number_of_draws,
if type(x_or_predictions) is torch.tensor:
out, sigmas = self.predict(x_or_predictions, number_of_draws=number_of_draws,
number_of_parameter_chunks=number_of_parameter_chunks,
remove_graph=remove_graph,
take_average_of_prediction=False)
else:
out, sigmas = x_or_predictions
# Add "repetition" dimension to y and sigmas
y = y[:,None,...]
sigmas = sigmas[:,None,...]
......@@ -427,14 +427,16 @@ class FNNBer(nn.Module):
sigma = torch.mean(sigma, dim=1)
return pred, sigma
def predictive_logdensity(self, x, y, number_of_draws=100, remove_graph=True,
def predictive_logdensity(self, x_or_predictions, y, number_of_draws=100, remove_graph=True,
average_batch_dimension=True, scale_labels=None,
decouple_dimensions=False):
"""
Computes the logarithm of the predictive density evaluated at `y`. If
`average_batch_dimension` is `True` these values will be averaged over
the batch dimension.
:param x: A torch.tensor, the input
:param x_or_predictions: Either a torch.tensor or a tuple. If
`x_or_predictions' is a tensor, it will be used as input. If it is a
tuple, it will be interpreted as the output of `predict` with `take_average_of_prediction` set to False.
:param y: A torch.tensor, labels on which to evaluate the density
:param number_of_draws: Number of draws to obtain from x
:param remove_graph: If True (default) the output will
......@@ -448,14 +450,20 @@ class FNNBer(nn.Module):
average, to make results comparable with the literature. Defaults to
False.
"""
out, sigmas = self.predict(x, number_of_draws=number_of_draws,
take_average_of_prediction=False, remove_graph=remove_graph)
# Add "repetition" dimension to y and out
if type(x_or_predictions) is torch.tensor:
out, sigmas = self.predict(x_or_predictions,
number_of_draws=number_of_draws,
take_average_of_prediction=False, remove_graph=remove_graph)
else:
out, sigmas = x_or_predictions
# Add "repetition" dimension to y and sigmas
y = y[:,None,...]
sigmas = sigmas[:,None,...]
if len(y.shape) <= 2:
# add an output axis if necessary
y = y[...,None]
if len(sigmas.shape) <= 2:
# add an output axis if necessary
sigmas = sigmas[...,None]
# squeeze last dimensions into one
y = y.view((*y.shape[:2], -1))
......
......@@ -28,55 +28,73 @@ def multivariate_interval_length(dim, q=0.95):
:param q: Float, should be between 0 and 1. Defaults to 0.95.
"""
# use independence of components to reduce to a univariate quantile
univariate_quantile = 0.5 * (1+0.95**(1/dim))
univariate_quantile = 0.5 * (1+q**(1/dim))
return scipy.stats.norm.ppf(univariate_quantile)
def epistemic_coverage(prediction_triple, y, q=0.95, normalize_errors=False):
def epistemic_coverage(not_averaged_predictions, y, q=0.95, normalize_errors=False):
"""
Returns the average coverage of `y` by the interval
"prefactor * (predictions + q-Interval)",
where "q-Interval" is the interval of measure `q` under the standard normal,
"predictions" is the first component of `prediction_triple` and prefactor is either
the epistemic uncertainty, given by the second component of `prediction_triple`, if
`normalize_errors` is False, or 1 if it is true. The coverage is returned
as given by `y` and as a theoretical_coverage computed from the epistemic
uncertainty (second component of `prediction_triple`) and the aleatoric uncertainty
(third component of `prediction_triple`)
:param prediction_triple: A triple of tensors containing (in this order): the
predictions of the neural net (the average under the posterior), the
epistemic uncertainty (the standard deviation under the posterior) and
the aleatoric uncertainty. All tensors are expected to have two dimensions:
a batch and a feature dimension.
:param y: A `torch.tensor` of the same shape then the first two components
of `prediction_triple`. If the feature dimension is missing, it is added.
where
- "q-Interval" is the interval of measure `q` under the standard normal,
where
- "predictions" are the entries of the first component of the tuple
`not_averaged_predictions`,
- "prefactor either equals the epistemic uncertainty, computed from the
first component of `not_averaged_predictions`,if
`normalize_errors` is set to False, or 1 if it is true.
The coverage is returned as given by the `y` and as a theoretical_coverage
computed from the epistemic uncertainty and the aleatoric uncertainty
(second component of `not_averaged_predictions`).
:param not_averaged_predictions: A tuple of tensors as in the output of
`FNNEIV.predict` with `take_average_of_prediction` set to `False`, i.e.:
the predictions of the neural net not averaged over the first dimension
(the repetition dimension in `FNNEIV.predict`) and
the aleatoric uncertainty with a batch dimension and a feature dimension.
:param y: A `torch.tensor` of the same shape then the second components
of `not_averaged_predictions`. If the feature dimension is missing, it is added.
:param q: A float between 0 and 1. Defaults to 0.95.
:param normalize_errors: If True, the deviations between predictions and
`y` are normalized by the total uncertainty, computed from the aleatoric
and epistemic uncertainty and the coverage w.r.t. q-interval is computed.
:returns: numerical_coverage, theoretical_coverage
"""
mean, epis_unc, aleat_unc = prediction_triple
assert epis_unc.shape == aleat_unc.shape
assert mean.shape == epis_unc.shape
# Add feature dimension to y if missing
if len(y.shape) <= 1:
y = y.view((-1,1))
assert y.shape == mean.shape
out, sigmas = not_averaged_predictions
y = y[:,None,...]
sigmas = sigmas[:,None,...]
# add an output axis if necessary
if len(y.shape) <= 2:
y = y[...,None]
if len(sigmas.shape) <= 2:
sigmas = sigmas[...,None]
# squeeze last dimensions into one
y = y.view((*y.shape[:2], -1))
sigmas = sigmas.view((*sigmas.shape[:2], -1))
out = out.view((*out.shape[:2], -1))
# check if dimensions consistent
assert y.shape == sigmas.shape
assert y.shape[0] == out.shape[0]
assert y.shape[2] == out.shape[2]
# compute epistemic uncertainty
epis_unc = torch.std(out, dim=1, keepdim=True)
# compute total uncertainty
assert epis_unc.shape == sigmas.shape
total_unc = torch.sqrt(epis_unc**2 + sigmas **2)
# fix interval based on epis_unc
if not normalize_errors:
interval_length = multivariate_interval_length(dim=y.shape[1], q=q) \
* epis_unc
else:
interval_length = multivariate_interval_length(dim=y.shape[1], q=q)
total_unc = torch.sqrt(epis_unc**2 + aleat_unc **2)
# numerical computation
errors = mean - y
errors = out - y
if normalize_errors:
assert errors.shape == total_unc.shape
assert errors.shape[0] == total_unc.shape[0]
assert errors.shape[2] == total_unc.shape[2]
errors /= total_unc
check_if_in_interval = logical_and_along_dimension(
torch.abs(errors) <= interval_length, dim=1)
torch.abs(errors) <= interval_length, dim=2)
numerical_coverage = torch.mean(
check_if_in_interval.to(torch.float32)
).cpu().detach().item()
......@@ -85,34 +103,52 @@ def epistemic_coverage(prediction_triple, y, q=0.95, normalize_errors=False):
cdf_args = (interval_length/total_unc).detach().cpu().numpy()
cdf_values = scipy.stats.norm.cdf(cdf_args)
prob_values = 2*cdf_values -1
assert len(cdf_values.shape) == 2
theoretical_coverage = np.mean(np.prod(prob_values, axis=1)).item()
assert len(cdf_values.shape) == 3
assert cdf_values.shape[1] == 1
# take product over feature dimension
# and average over batch dimension
theoretical_coverage = np.mean(np.prod(prob_values, axis=2)).item()
else:
theoretical_coverage = q
return numerical_coverage, theoretical_coverage
def normalized_std(prediction_triple, y):
def normalized_std(not_averaged_predictions, y):
"""
Returns the standard deviation of normalized residuals. In theory this
Returns the standard deviation of normalized residuals, averaged over the
feature dimension. In theory this
number should be equal to 1.0.
:param prediction_triple: A triple of tensors containing (in this order): the
:param not_averaged_predictions: A triple of tensors containing (in this order): the
predictions of the neural net (the average under the posterior), the
epistemic uncertainty (the standard deviation under the posterior) and
the aleatoric uncertainty.
:param y: A `torch.tensor` of the same shape then the first two components
of `prediction_triple`. If the feature dimension is missing, it is added.
of `not_averaged_predictions`. If the feature dimension is missing, it is added.
:returns: numerical_coverage, theoretical_coverage
"""
mean, epis_unc, aleat_unc = prediction_triple
assert epis_unc.shape == aleat_unc.shape
assert mean.shape == epis_unc.shape
# Add feature dimension to y if missing
if len(y.shape) <= 1:
y = y.view((-1,1))
assert y.shape == mean.shape
total_unc = torch.sqrt(epis_unc**2 + aleat_unc **2)
out = not_averaged_predictions[0]
sigmas = not_averaged_predictions[1]
# add repetition dimension
y = y[:,None,...]
sigmas = sigmas[:,None,...]
# add an output axis if necessary
if len(y.shape) <= 2:
y = y[...,None]
if len(sigmas.shape) <= 2:
sigmas = sigmas[...,None]
# squeeze last dimensions into one
y = y.view((*y.shape[:2], -1))
sigmas = sigmas.view((*sigmas.shape[:2], -1))
out = out.view((*out.shape[:2], -1))
# check if dimensions consistent
assert y.shape == sigmas.shape
assert y.shape[0] == out.shape[0]
assert y.shape[2] == out.shape[2]
# compute epistemic uncertainty
epis_unc = torch.std(out, dim=1, keepdim=True)
assert epis_unc.shape == sigmas.shape
total_unc = torch.sqrt(epis_unc**2 + sigmas **2)
# numerical computation
errors = mean - y
assert errors.shape == total_unc.shape
errors = out - y
errors /= total_unc
return torch.mean(torch.std(errors, dim=0)).item()
# average over feature dimension
return torch.mean(torch.std(errors, dim=(0,1))).item()
......@@ -67,13 +67,13 @@ def collect_metrics(x,y, seed=0,
# RMSE
training_state = net.training
net.train()
######
prediction_triple =\
net.predict_mean_and_unc(x, number_of_draws=noneiv_number_of_draws)
not_averaged_predictions = net.predict(x, number_of_draws=noneiv_number_of_draws,
take_average_of_prediction=False)
noneiv_mean = torch.mean(not_averaged_predictions[0], dim=1)
if len(y.shape) <= 1:
y = y.view((-1,1))
assert y.shape == prediction_triple[0].shape
res = y-prediction_triple[0]
assert y.shape == noneiv_mean.shape
res = y-noneiv_mean
if scale_outputs:
scale = train_data.dataset.std_labels.to(device)
scaled_res = res * scale.view((1,-1))
......@@ -83,10 +83,10 @@ def collect_metrics(x,y, seed=0,
noneiv_metrics['rmse'] = np.sqrt(np.mean(scaled_res**2))
noneiv_metrics['bias'] = np.mean(scaled_res)
noneiv_metrics['coverage_numerical'], noneiv_metrics['coverage_theory'] =\
epistemic_coverage(prediction_triple, y, normalize_errors=False)
epistemic_coverage(not_averaged_predictions, y, normalize_errors=False)
noneiv_metrics['coverage_normalized'],_ =\
epistemic_coverage(prediction_triple, y, normalize_errors=True)
noneiv_metrics['res_std'] = normalized_std(prediction_triple, y)
epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y)
......@@ -95,7 +95,8 @@ def collect_metrics(x,y, seed=0,
scale_labels = train_data.dataset.std_labels.view((-1,)).to(device)
else:
scale_labels = None
noneiv_metrics['logdens' ]= net.predictive_logdensity(x, y,
noneiv_metrics['logdens' ]= net.predictive_logdensity(
not_averaged_predictions, y,
number_of_draws=100,
decouple_dimensions=decouple_dimensions,
scale_labels=scale_labels).mean().detach().cpu().numpy()
......@@ -127,12 +128,13 @@ def collect_metrics(x,y, seed=0,
noise_state = net.noise_is_on
net.train()
net.noise_on()
prediction_triple =\
net.predict_mean_and_unc(x, number_of_draws=eiv_number_of_draws)
not_averaged_predictions = net.predict(x, number_of_draws=noneiv_number_of_draws,
take_average_of_prediction=False)
eiv_mean = torch.mean(not_averaged_predictions[0], dim=1)
if len(y.shape) <= 1:
y = y.view((-1,1))
assert y.shape == prediction_triple[0].shape
res = y-prediction_triple[0]
assert y.shape == eiv_mean.shape
res = y-eiv_mean
scale = train_data.dataset.std_labels.to(device)
if scale_outputs:
scale = train_data.dataset.std_labels.to(device)
......@@ -143,10 +145,10 @@ def collect_metrics(x,y, seed=0,
eiv_metrics['rmse' ]= np.sqrt(np.mean(scaled_res**2))
eiv_metrics['bias' ]= np.mean(scaled_res)
eiv_metrics['coverage_numerical'], eiv_metrics['coverage_theory'] =\
epistemic_coverage(prediction_triple, y, normalize_errors=False)
epistemic_coverage(not_averaged_predictions, y, normalize_errors=False)
eiv_metrics['coverage_normalized'],_ =\
epistemic_coverage(prediction_triple, y, normalize_errors=True)
eiv_metrics['res_std' ]= normalized_std(prediction_triple, y)
epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
eiv_metrics['res_std' ]= normalized_std(not_averaged_predictions, y)
# NLL
......@@ -154,7 +156,8 @@ def collect_metrics(x,y, seed=0,
scale_labels = train_data.dataset.std_labels.view((-1,)).to(device)
else:
scale_labels = None
eiv_metrics['logdens' ]= net.predictive_logdensity(x, y,
eiv_metrics['logdens' ]= net.predictive_logdensity(
not_averaged_predictions, y,
number_of_draws=eiv_number_of_draws,
decouple_dimensions=decouple_dimensions,
scale_labels=scale_labels).mean().detach().cpu().numpy()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment