From 20b8dad97a38172b9b76f983dda185c6cc830d03 Mon Sep 17 00:00:00 2001
From: Joerg Martin <joerg.martin@ptb.de>
Date: Tue, 25 Jan 2022 13:30:52 +0000
Subject: [PATCH] cubic and sine dataset added

---
 EIVPackage/EIVData/cubic.py                  | 62 +++++++++++++++++++
 EIVPackage/EIVData/sine.py                   | 63 ++++++++++++++++++++
 Experiments/configurations/eiv_cubic.json    | 21 +++++++
 Experiments/configurations/eiv_sine.json     | 21 +++++++
 Experiments/configurations/noneiv_cubic.json | 20 +++++++
 Experiments/configurations/noneiv_sine.json  | 20 +++++++
 6 files changed, 207 insertions(+)
 create mode 100644 EIVPackage/EIVData/cubic.py
 create mode 100644 EIVPackage/EIVData/sine.py
 create mode 100644 Experiments/configurations/eiv_cubic.json
 create mode 100644 Experiments/configurations/eiv_sine.json
 create mode 100644 Experiments/configurations/noneiv_cubic.json
 create mode 100644 Experiments/configurations/noneiv_sine.json

diff --git a/EIVPackage/EIVData/cubic.py b/EIVPackage/EIVData/cubic.py
new file mode 100644
index 0000000..5d5c63e
--- /dev/null
+++ b/EIVPackage/EIVData/cubic.py
@@ -0,0 +1,62 @@
+import torch
+import sys
+from torch.utils.data import TensorDataset
+
+from EIVGeneral.manipulate_tensors import add_noise
+
+total_number_of_datapoints = 2000
+input_range = [-4,4]
+slope = 1.0
+intercept = 0.0
+x_noise_strength = 0.05 * (input_range[1] - input_range[0])/2
+y_noise_strength = 3
+
+def load_data(seed=0, splitting_part=0.8, normalize=True,
+        return_ground_truth=False):
+    """
+    Loads one-dimensional, cubic data as in Hernandez-Lobato, Adams 2015.
+    :param seed: Seed for drawing and splitting the data.
+    :param splitting_part: Which fraction of the data to use as training
+    data. Defaults to 0.8.
+    :param normalize: Whether to normalize the data, defaults to True.
+    :param return_ground_truth: Boolean. If True, the unnoisy ground truth will
+    also be returned. Defaults to False.
+    :returns: cubic_trainset, cubic_testset if return_ground_truth is False,
+    else cubic_trainset, cubic_testset,  true_cubic_trainset,
+    true_cubic_testset. The later two return **four tensors**: The true x,y and
+    their noisy counterparts.
+    """
+    random_generator = torch.Generator().manual_seed(seed)
+    # draw different seeds for noise and splitting
+    seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
+            generator=random_generator)]
+    # create new generators from tensor seeds
+    true_x = input_range[0] + (input_range[1]-input_range[0])\
+                  * torch.rand((total_number_of_datapoints,1),
+                          generator=torch.Generator().manual_seed(seeds[0]))
+    true_y = slope * true_x**3 + intercept 
+    # add noise and normalize x and y
+    (noisy_x, noisy_y), (true_x, true_y) = add_noise(
+            tensor_list=(true_x, true_y),
+            noise_strength_list=(x_noise_strength, y_noise_strength),
+            seed_list=seeds[1:3],
+            normalize=normalize)
+    # create datasets
+    dataset_len = noisy_x.shape[0]
+    train_len = int(dataset_len*splitting_part)
+    test_len = dataset_len - train_len
+    true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
+    true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
+    noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
+    noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
+    cubic_trainset = TensorDataset(noisy_train_x, noisy_train_y)
+    cubic_testset = TensorDataset(noisy_test_x, noisy_test_y)
+    true_cubic_trainset = TensorDataset(true_train_x, true_train_y,
+            noisy_train_x, noisy_train_y)
+    true_cubic_testset = TensorDataset(true_test_x, true_test_y,
+            noisy_test_x, noisy_test_y)
+    if not return_ground_truth:
+        return cubic_trainset, cubic_testset
+    else:
+        return cubic_trainset, cubic_testset, true_cubic_trainset,\
+            true_cubic_testset
diff --git a/EIVPackage/EIVData/sine.py b/EIVPackage/EIVData/sine.py
new file mode 100644
index 0000000..d9ca0b8
--- /dev/null
+++ b/EIVPackage/EIVData/sine.py
@@ -0,0 +1,63 @@
+import torch
+import sys
+from torch.utils.data import TensorDataset
+
+from EIVGeneral.manipulate_tensors import add_noise
+
+total_number_of_datapoints = 2000
+input_range = [-0.2,0.8]
+intercept = 0.0
+x_noise_strength = 0.02 
+y_noise_strength = 0.05
+
+def load_data(seed=0, splitting_part=0.8, normalize=True,
+        return_ground_truth=False):
+    """
+    Loads one-dimensional, sine shaped data as in Blundell et al. 2014.
+    :param seed: Seed for drawing and splitting the data.
+    :param splitting_part: Which fraction of the data to use as training
+    data. Defaults to 0.8.
+    :param normalize: Whether to normalize the data, defaults to True.
+    :param return_ground_truth: Boolean. If True, the unnoisy ground truth will
+    also be returned. Defaults to False.
+    :returns: sine_trainset, sine_testset if return_ground_truth is False,
+    else sine_trainset, sine_testset,  true_sine_trainset,
+    true_sine_testset. The later two return **four tensors**: The true x,y and
+    their noisy counterparts.
+    """
+    random_generator = torch.Generator().manual_seed(seed)
+    # draw different seeds for noise and splitting
+    seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
+            generator=random_generator)]
+    # create new generators from tensor seeds
+    true_x = input_range[0] + (input_range[1]-input_range[0])\
+                  * torch.rand((total_number_of_datapoints,1),
+                          generator=torch.Generator().manual_seed(seeds[0]))
+    true_y = true_x +\
+            torch.sin(2 * torch.pi * true_x) +\
+            torch.sin(4 * torch.pi * true_x)
+    # add noise and normalize x and y
+    (noisy_x, noisy_y), (true_x, true_y) = add_noise(
+            tensor_list=(true_x, true_y),
+            noise_strength_list=(x_noise_strength, y_noise_strength),
+            seed_list=seeds[1:3],
+            normalize=normalize)
+    # create datasets
+    dataset_len = noisy_x.shape[0]
+    train_len = int(dataset_len*splitting_part)
+    test_len = dataset_len - train_len
+    true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
+    true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
+    noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
+    noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
+    sine_trainset = TensorDataset(noisy_train_x, noisy_train_y)
+    sine_testset = TensorDataset(noisy_test_x, noisy_test_y)
+    true_sine_trainset = TensorDataset(true_train_x, true_train_y,
+            noisy_train_x, noisy_train_y)
+    true_sine_testset = TensorDataset(true_test_x, true_test_y,
+            noisy_test_x, noisy_test_y)
+    if not return_ground_truth:
+        return sine_trainset, sine_testset
+    else:
+        return sine_trainset, sine_testset, true_sine_trainset,\
+            true_sine_testset
diff --git a/Experiments/configurations/eiv_cubic.json b/Experiments/configurations/eiv_cubic.json
new file mode 100644
index 0000000..6b81fef
--- /dev/null
+++ b/Experiments/configurations/eiv_cubic.json
@@ -0,0 +1,21 @@
+{
+	"long_dataname": "cubic",
+	"short_dataname": "cubic",
+	"lr": 1e-3,
+	"batch_size": 64,
+	"test_batch_size": 800,
+	"number_of_epochs": 100,
+	"unscaled_reg": 10,
+	"report_point": 5,
+	"p": 0.1,
+	"lr_update": 20,
+	"std_y_update_points": [1,40],
+	"eiv_prediction_number_of_draws": [100,5],
+	"eiv_prediction_number_of_batches": 10,
+	"init_std_y_list": [0.5],
+	"gamma": 0.5,
+	"hidden_layers": [128, 128, 128, 128],
+	"fixed_std_x": 0.05,
+	"seed_range": [0,10],
+	"gpu_number": 1
+}
diff --git a/Experiments/configurations/eiv_sine.json b/Experiments/configurations/eiv_sine.json
new file mode 100644
index 0000000..b632e1d
--- /dev/null
+++ b/Experiments/configurations/eiv_sine.json
@@ -0,0 +1,21 @@
+{
+	"long_dataname": "sine",
+	"short_dataname": "sine",
+	"lr": 1e-3,
+	"batch_size": 64,
+	"test_batch_size": 800,
+	"number_of_epochs": 100,
+	"unscaled_reg": 10,
+	"report_point": 5,
+	"p": 0.1,
+	"lr_update": 20,
+	"std_y_update_points": [1,40],
+	"eiv_prediction_number_of_draws": [100,5],
+	"eiv_prediction_number_of_batches": 10,
+	"init_std_y_list": [0.1],
+	"gamma": 0.5,
+	"hidden_layers": [128, 128, 128, 128],
+	"fixed_std_x": 0.02,
+	"seed_range": [0,10],
+	"gpu_number": 1
+}
diff --git a/Experiments/configurations/noneiv_cubic.json b/Experiments/configurations/noneiv_cubic.json
new file mode 100644
index 0000000..00fdce9
--- /dev/null
+++ b/Experiments/configurations/noneiv_cubic.json
@@ -0,0 +1,20 @@
+{
+	"long_dataname": "cubic",
+	"short_dataname": "cubic",
+	"lr": 1e-3,
+	"batch_size": 64,
+	"test_batch_size": 800,
+	"number_of_epochs": 100,
+	"unscaled_reg": 10,
+	"report_point": 5,
+	"p": 0.1,
+	"lr_update": 20,
+	"std_y_update_points": [1,40] ,
+	"noneiv_prediction_number_of_draws": 100,
+	"noneiv_prediction_number_of_batches": 10,
+	"init_std_y_list": [0.5],
+	"gamma": 0.5,
+	"hidden_layers": [128, 128, 128, 128],
+	"seed_range": [0,10],
+	"gpu_number": 1
+}
diff --git a/Experiments/configurations/noneiv_sine.json b/Experiments/configurations/noneiv_sine.json
new file mode 100644
index 0000000..c94c589
--- /dev/null
+++ b/Experiments/configurations/noneiv_sine.json
@@ -0,0 +1,20 @@
+{
+	"long_dataname": "sine",
+	"short_dataname": "sine",
+	"lr": 1e-3,
+	"batch_size": 64,
+	"test_batch_size": 800,
+	"number_of_epochs": 100,
+	"unscaled_reg": 10,
+	"report_point": 5,
+	"p": 0.1,
+	"lr_update": 20,
+	"std_y_update_points": [1,40] ,
+	"noneiv_prediction_number_of_draws": 100,
+	"noneiv_prediction_number_of_batches": 10,
+	"init_std_y_list": [0.1],
+	"gamma": 0.5,
+	"hidden_layers": [128, 128, 128, 128],
+	"seed_range": [0,10],
+	"gpu_number": 1
+}
-- 
GitLab