diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py index e72dc1180fd7bcd041a58ac5ff895524e7198457..2f8f6c849c90ea279fd0e90984dea3188715e1f9 100644 --- a/Experiments/evaluate_metrics.py +++ b/Experiments/evaluate_metrics.py @@ -111,7 +111,8 @@ def collect_metrics(x_y_pairs, seed=0, # RMSE training_state = net.training net.train() - not_averaged_predictions = net.predict(x, number_of_draws=noneiv_number_of_draws, + not_averaged_predictions = net.predict(x,\ + number_of_draws=noneiv_number_of_draws, take_average_of_prediction=False) noneiv_mean = torch.mean(not_averaged_predictions[0], dim=1) if len(y.shape) <= 1: @@ -127,9 +128,11 @@ def collect_metrics(x_y_pairs, seed=0, noneiv_metrics['rmse'] = np.sqrt(np.mean(scaled_res**2)) noneiv_metrics['bias'] = np.mean(scaled_res) noneiv_metrics['coverage_numerical'], noneiv_metrics['coverage_theory'] =\ - epistemic_coverage(not_averaged_predictions, y, normalize_errors=False) + epistemic_coverage(not_averaged_predictions, y,\ + normalize_errors=False) noneiv_metrics['coverage_normalized'],_ =\ - epistemic_coverage(not_averaged_predictions, y, normalize_errors=True) + epistemic_coverage(not_averaged_predictions, y,\ + normalize_errors=True) noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y) # metrics that need a ground truth @@ -138,8 +141,8 @@ def collect_metrics(x_y_pairs, seed=0, noneiv_metrics['true_coverage_theory'] =\ epistemic_coverage(not_averaged_predictions, true_y, normalize_errors=False, noisy_y=False) - true_res = true_y - noneiv_mean - noneiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2)) + true_res = (true_y - noneiv_mean).detach().cpu().numpy().flatten() + noneiv_metrics['true_rmse'] = np.sqrt(np.mean(true_res**2)) # NLL @@ -208,8 +211,8 @@ def collect_metrics(x_y_pairs, seed=0, epistemic_coverage(not_averaged_predictions, true_y, normalize_errors=False, noisy_y=False) - true_res = true_y - eiv_mean - eiv_metrics['true_rmse'] = np.sqrt(np.mean(scaled_res**2)) + true_res = (true_y - eiv_mean).detach().cpu().numpy().flatten() + eiv_metrics['true_rmse'] = np.sqrt(np.mean(true_res**2)) # NLL if scale_outputs: