diff --git a/EIVPackage/EIVData/linear.py b/EIVPackage/EIVData/linear.py new file mode 100644 index 0000000000000000000000000000000000000000..5dc363baa5a62d0a5a8858c2e5d169eb5b8e8d55 --- /dev/null +++ b/EIVPackage/EIVData/linear.py @@ -0,0 +1,68 @@ +import torch +import sys +from torch.utils.data import TensorDataset, random_split + +total_number_of_datapoints = 10000 +input_range = [-1,1] +slope = 1.0 +intercept = 0.0 +x_noise_strength = 0.1 +y_noise_strength = 0.1 + +def get_normalization(*args): + """ + Returns the mean and standard deviations (in tuples) of the tensors in *args. + """ + normalization_collection = [] + for t in args: + t_mean = torch.mean(t, dim=0, keepdim=True) + t_std = torch.std(t, dim=0, keepdim=True) + normalization_collection.append((t_mean, t_std)) + return tuple(normalization_collection) + +def load_data(seed=0, splitting_part=0.8, normalize=True, + return_ground_truth=False): + """ + Loads one-dimensional data + :param seed: Seed for drawing and splitting the data. + :param splitting_part: Which fraction of the data to use as training + data. Defaults to 0.8. + :param normalize: Whether to normalize the data, defaults to True. + :param return_ground_truth: Boolean. If True, the unnoisy ground truth will + also be returned. Defaults to False. + :returns: linear_trainset, linear_testset if return_ground_truth is False, + else linear_trainset, linear_testset, (true_x, true_y) + """ + random_generator = torch.Generator().manual_seed(seed) + # draw different seeds for noise and splitting + seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator) + # create new generators from tensor seeds + create_generator = lambda tensor_seed:\ + torch.Generator().manual_seed(tensor_seed.item()) + true_x = input_range[0] + (input_range[1]-input_range[0])\ + * torch.rand((total_number_of_datapoints,1), + generator=create_generator(seeds[0])) + true_y = slope * true_x + intercept + noisy_x = true_x + x_noise_strength * \ + torch.randn((total_number_of_datapoints,1), + generator=create_generator(seeds[1])) + noisy_y = true_y + y_noise_strength * \ + torch.randn((total_number_of_datapoints,1), + generator=create_generator(seeds[2])) + if normalize: + normalization_x, normalization_y = get_normalization(noisy_x, noisy_y) + noisy_x = (noisy_x-normalization_x[0])/normalization_x[1] + true_x = (true_x-normalization_x[0])/normalization_x[1] + noisy_y = (noisy_y-normalization_y[0])/normalization_y[1] + true_y = (true_y-normalization_y[0])/normalization_y[1] + linear_dataset = TensorDataset(noisy_x, noisy_y) + dataset_len = len(linear_dataset) + train_len = int(dataset_len*splitting_part) + test_len = dataset_len - train_len + linear_trainset, linear_testset = random_split(linear_dataset, + lengths=[train_len, test_len], + generator=create_generator(seeds[3])) + if not return_ground_truth: + return linear_trainset, linear_testset + else: + return linear_trainset, linear_testset, (true_x, true_y) diff --git a/EIVPackage/EIVData/power_plant.py b/EIVPackage/EIVData/power_plant.py index 6f40c65d0ac2b201f2549a347f9823cba9a10035..0b6e2c57cde9f7898dba7f656a944356189975cf 100644 --- a/EIVPackage/EIVData/power_plant.py +++ b/EIVPackage/EIVData/power_plant.py @@ -4,24 +4,24 @@ from torch.utils.data import random_split def load_data(seed=0, splitting_part=0.8, normalize=True): """ - Loads the naval propulsion dataset + Loads the power plant dataset :param seed: Seed for splitting and shuffling the data. Defaults to 0. :param splitting_part: Which fraction of the data to use as training data. Defaults to 0.8. :normalize: Whether to normalize the data, defaults to True. - :returns: naval_trainset, naval_testset + :returns: power_trainset, power_testset """ - naval_dataset = CSVData('~/SharedData/AI/datasets/combined_cycle_power_plant/Folds5x2_pp_single_sheet.csv', + power_dataset = CSVData('~/SharedData/AI/datasets/combined_cycle_power_plant/Folds5x2_pp_single_sheet.csv', class_name="PE", shuffle_seed=seed, normalize=normalize, delimiter=",") - dataset_len = len(naval_dataset) + dataset_len = len(power_dataset) train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len - naval_trainset, naval_testset = random_split(naval_dataset, + power_trainset, power_testset = random_split(power_dataset, lengths=[train_len, test_len], generator=torch.Generator().manual_seed(seed)) - return naval_trainset, naval_testset + return power_trainset, power_testset diff --git a/Experiments/configurations/eiv_linear.json b/Experiments/configurations/eiv_linear.json new file mode 100644 index 0000000000000000000000000000000000000000..92c292ef49d22cfbd7ffc561e6f64c015f3e35f3 --- /dev/null +++ b/Experiments/configurations/eiv_linear.json @@ -0,0 +1,21 @@ +{ + "long_dataname": "linear", + "short_dataname": "linear", + "lr": 1e-3, + "batch_size": 100, + "test_batch_size": 800, + "number_of_epochs": 100, + "unscaled_reg": 10, + "report_point": 5, + "p": 0.1, + "lr_update": 20, + "std_y_update_points": [1,40], + "eiv_prediction_number_of_draws": [100,5], + "eiv_prediction_number_of_batches": 10, + "init_std_y_list": [0.5], + "gamma": 0.5, + "hidden_layers": [1024, 1024, 1024, 1024], + "fixed_std_x": 0.1, + "seed_range": [0,10], + "gpu_number": 1 +} diff --git a/Experiments/configurations/noneiv_linear.json b/Experiments/configurations/noneiv_linear.json new file mode 100644 index 0000000000000000000000000000000000000000..67c6ea438f7a6fbead0e0a350e69560fd86fa1ff --- /dev/null +++ b/Experiments/configurations/noneiv_linear.json @@ -0,0 +1,20 @@ +{ + "long_dataname": "linear", + "short_dataname": "linear", + "lr": 1e-3, + "batch_size": 100, + "test_batch_size": 800, + "number_of_epochs": 100, + "unscaled_reg": 10, + "report_point": 5, + "p": 0.1, + "lr_update": 20, + "std_y_update_points": [1,40] , + "noneiv_prediction_number_of_draws": 100, + "noneiv_prediction_number_of_batches": 10, + "init_std_y_list": [0.5], + "gamma": 0.5, + "hidden_layers": [1024, 1024, 1024, 1024], + "seed_range": [0,10], + "gpu_number": 1 +} diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py index 5dbc16fcc5277e95db69d61b5decaee1a17fc0df..d694558ba35333831660c26edab47bc0d3d014f1 100644 --- a/Experiments/evaluate_metrics.py +++ b/Experiments/evaluate_metrics.py @@ -1,3 +1,7 @@ +""" +Compute metrics for datasets for which there is not necessarily a ground truth. +Results will be stored in the results folder +""" import importlib import os import argparse @@ -15,7 +19,7 @@ from EIVGeneral.coverage_metrics import epistemic_coverage, normalized_std # read in data via --data option parser = argparse.ArgumentParser() -parser.add_argument("--data", help="Loads data", default='california') +parser.add_argument("--data", help="Loads data", default='linear') parser.add_argument("--no-autoindent", help="", action="store_true") # to avoid conflics in IPython args = parser.parse_args() @@ -73,6 +77,7 @@ def collect_metrics(x,y, seed=0, of Gal et al. is followed where, in the evaluation of the log-posterior-predictive, each dimension is treated independently and then averaged. If False (default), a multivariate distribution is used. + :param device: The device to use. :param scale_output: Boolean, scale the outputs for the RMSE, the bias and the log-dens to make them comparable with the literature. :returns: Dictionaries noneiv_metrics, eiv_metrics @@ -167,7 +172,6 @@ def collect_metrics(x,y, seed=0, y = y.view((-1,1)) assert y.shape == eiv_mean.shape res = y-eiv_mean - scale = train_data.dataset.std_labels.to(device) if scale_outputs: scale = train_data.dataset.std_labels.to(device) scaled_res = res * scale.view((1,-1))