diff --git a/EIVPackage/EIVData/cubic.py b/EIVPackage/EIVData/cubic.py new file mode 100644 index 0000000000000000000000000000000000000000..5d5c63ef97fff31f4cae7cd316ba238aef407e9c --- /dev/null +++ b/EIVPackage/EIVData/cubic.py @@ -0,0 +1,62 @@ +import torch +import sys +from torch.utils.data import TensorDataset + +from EIVGeneral.manipulate_tensors import add_noise + +total_number_of_datapoints = 2000 +input_range = [-4,4] +slope = 1.0 +intercept = 0.0 +x_noise_strength = 0.05 * (input_range[1] - input_range[0])/2 +y_noise_strength = 3 + +def load_data(seed=0, splitting_part=0.8, normalize=True, + return_ground_truth=False): + """ + Loads one-dimensional, cubic data as in Hernandez-Lobato, Adams 2015. + :param seed: Seed for drawing and splitting the data. + :param splitting_part: Which fraction of the data to use as training + data. Defaults to 0.8. + :param normalize: Whether to normalize the data, defaults to True. + :param return_ground_truth: Boolean. If True, the unnoisy ground truth will + also be returned. Defaults to False. + :returns: cubic_trainset, cubic_testset if return_ground_truth is False, + else cubic_trainset, cubic_testset, true_cubic_trainset, + true_cubic_testset. The later two return **four tensors**: The true x,y and + their noisy counterparts. + """ + random_generator = torch.Generator().manual_seed(seed) + # draw different seeds for noise and splitting + seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\ + generator=random_generator)] + # create new generators from tensor seeds + true_x = input_range[0] + (input_range[1]-input_range[0])\ + * torch.rand((total_number_of_datapoints,1), + generator=torch.Generator().manual_seed(seeds[0])) + true_y = slope * true_x**3 + intercept + # add noise and normalize x and y + (noisy_x, noisy_y), (true_x, true_y) = add_noise( + tensor_list=(true_x, true_y), + noise_strength_list=(x_noise_strength, y_noise_strength), + seed_list=seeds[1:3], + normalize=normalize) + # create datasets + dataset_len = noisy_x.shape[0] + train_len = int(dataset_len*splitting_part) + test_len = dataset_len - train_len + true_train_x, true_test_x = torch.split(true_x, [train_len, test_len]) + true_train_y, true_test_y = torch.split(true_y, [train_len, test_len]) + noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len]) + noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len]) + cubic_trainset = TensorDataset(noisy_train_x, noisy_train_y) + cubic_testset = TensorDataset(noisy_test_x, noisy_test_y) + true_cubic_trainset = TensorDataset(true_train_x, true_train_y, + noisy_train_x, noisy_train_y) + true_cubic_testset = TensorDataset(true_test_x, true_test_y, + noisy_test_x, noisy_test_y) + if not return_ground_truth: + return cubic_trainset, cubic_testset + else: + return cubic_trainset, cubic_testset, true_cubic_trainset,\ + true_cubic_testset diff --git a/EIVPackage/EIVData/sine.py b/EIVPackage/EIVData/sine.py new file mode 100644 index 0000000000000000000000000000000000000000..d9ca0b85feb9160b83084b5103845185971ca84b --- /dev/null +++ b/EIVPackage/EIVData/sine.py @@ -0,0 +1,63 @@ +import torch +import sys +from torch.utils.data import TensorDataset + +from EIVGeneral.manipulate_tensors import add_noise + +total_number_of_datapoints = 2000 +input_range = [-0.2,0.8] +intercept = 0.0 +x_noise_strength = 0.02 +y_noise_strength = 0.05 + +def load_data(seed=0, splitting_part=0.8, normalize=True, + return_ground_truth=False): + """ + Loads one-dimensional, sine shaped data as in Blundell et al. 2014. + :param seed: Seed for drawing and splitting the data. + :param splitting_part: Which fraction of the data to use as training + data. Defaults to 0.8. + :param normalize: Whether to normalize the data, defaults to True. + :param return_ground_truth: Boolean. If True, the unnoisy ground truth will + also be returned. Defaults to False. + :returns: sine_trainset, sine_testset if return_ground_truth is False, + else sine_trainset, sine_testset, true_sine_trainset, + true_sine_testset. The later two return **four tensors**: The true x,y and + their noisy counterparts. + """ + random_generator = torch.Generator().manual_seed(seed) + # draw different seeds for noise and splitting + seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\ + generator=random_generator)] + # create new generators from tensor seeds + true_x = input_range[0] + (input_range[1]-input_range[0])\ + * torch.rand((total_number_of_datapoints,1), + generator=torch.Generator().manual_seed(seeds[0])) + true_y = true_x +\ + torch.sin(2 * torch.pi * true_x) +\ + torch.sin(4 * torch.pi * true_x) + # add noise and normalize x and y + (noisy_x, noisy_y), (true_x, true_y) = add_noise( + tensor_list=(true_x, true_y), + noise_strength_list=(x_noise_strength, y_noise_strength), + seed_list=seeds[1:3], + normalize=normalize) + # create datasets + dataset_len = noisy_x.shape[0] + train_len = int(dataset_len*splitting_part) + test_len = dataset_len - train_len + true_train_x, true_test_x = torch.split(true_x, [train_len, test_len]) + true_train_y, true_test_y = torch.split(true_y, [train_len, test_len]) + noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len]) + noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len]) + sine_trainset = TensorDataset(noisy_train_x, noisy_train_y) + sine_testset = TensorDataset(noisy_test_x, noisy_test_y) + true_sine_trainset = TensorDataset(true_train_x, true_train_y, + noisy_train_x, noisy_train_y) + true_sine_testset = TensorDataset(true_test_x, true_test_y, + noisy_test_x, noisy_test_y) + if not return_ground_truth: + return sine_trainset, sine_testset + else: + return sine_trainset, sine_testset, true_sine_trainset,\ + true_sine_testset diff --git a/Experiments/configurations/eiv_cubic.json b/Experiments/configurations/eiv_cubic.json new file mode 100644 index 0000000000000000000000000000000000000000..6b81fef1f5abb8e8b812aa5c198f1ea94f3bca32 --- /dev/null +++ b/Experiments/configurations/eiv_cubic.json @@ -0,0 +1,21 @@ +{ + "long_dataname": "cubic", + "short_dataname": "cubic", + "lr": 1e-3, + "batch_size": 64, + "test_batch_size": 800, + "number_of_epochs": 100, + "unscaled_reg": 10, + "report_point": 5, + "p": 0.1, + "lr_update": 20, + "std_y_update_points": [1,40], + "eiv_prediction_number_of_draws": [100,5], + "eiv_prediction_number_of_batches": 10, + "init_std_y_list": [0.5], + "gamma": 0.5, + "hidden_layers": [128, 128, 128, 128], + "fixed_std_x": 0.05, + "seed_range": [0,10], + "gpu_number": 1 +} diff --git a/Experiments/configurations/eiv_sine.json b/Experiments/configurations/eiv_sine.json new file mode 100644 index 0000000000000000000000000000000000000000..b632e1d56291f9bfa6dbb20b645d740cb78986d3 --- /dev/null +++ b/Experiments/configurations/eiv_sine.json @@ -0,0 +1,21 @@ +{ + "long_dataname": "sine", + "short_dataname": "sine", + "lr": 1e-3, + "batch_size": 64, + "test_batch_size": 800, + "number_of_epochs": 100, + "unscaled_reg": 10, + "report_point": 5, + "p": 0.1, + "lr_update": 20, + "std_y_update_points": [1,40], + "eiv_prediction_number_of_draws": [100,5], + "eiv_prediction_number_of_batches": 10, + "init_std_y_list": [0.1], + "gamma": 0.5, + "hidden_layers": [128, 128, 128, 128], + "fixed_std_x": 0.02, + "seed_range": [0,10], + "gpu_number": 1 +} diff --git a/Experiments/configurations/noneiv_cubic.json b/Experiments/configurations/noneiv_cubic.json new file mode 100644 index 0000000000000000000000000000000000000000..00fdce9b442f206a4fb9d6c28d7446a177c909a8 --- /dev/null +++ b/Experiments/configurations/noneiv_cubic.json @@ -0,0 +1,20 @@ +{ + "long_dataname": "cubic", + "short_dataname": "cubic", + "lr": 1e-3, + "batch_size": 64, + "test_batch_size": 800, + "number_of_epochs": 100, + "unscaled_reg": 10, + "report_point": 5, + "p": 0.1, + "lr_update": 20, + "std_y_update_points": [1,40] , + "noneiv_prediction_number_of_draws": 100, + "noneiv_prediction_number_of_batches": 10, + "init_std_y_list": [0.5], + "gamma": 0.5, + "hidden_layers": [128, 128, 128, 128], + "seed_range": [0,10], + "gpu_number": 1 +} diff --git a/Experiments/configurations/noneiv_sine.json b/Experiments/configurations/noneiv_sine.json new file mode 100644 index 0000000000000000000000000000000000000000..c94c589df94fbbb1ef263f8ba943a6521507fa84 --- /dev/null +++ b/Experiments/configurations/noneiv_sine.json @@ -0,0 +1,20 @@ +{ + "long_dataname": "sine", + "short_dataname": "sine", + "lr": 1e-3, + "batch_size": 64, + "test_batch_size": 800, + "number_of_epochs": 100, + "unscaled_reg": 10, + "report_point": 5, + "p": 0.1, + "lr_update": 20, + "std_y_update_points": [1,40] , + "noneiv_prediction_number_of_draws": 100, + "noneiv_prediction_number_of_batches": 10, + "init_std_y_list": [0.1], + "gamma": 0.5, + "hidden_layers": [128, 128, 128, 128], + "seed_range": [0,10], + "gpu_number": 1 +}