Added quadratic data

9d701ff4 · Jörg Martin · cfc67cc6 · 9d701ff4 · 9d701ff4 · 9d701ff4
Commit 9d701ff4 authored 3 years ago by Jörg Martin
--- a/EIVPackage/EIVData/quadratic.py
+++ b/EIVPackage/EIVData/quadratic.py
+import torch
+import sys
+from torch.utils.data import TensorDataset, random_split
+total_number_of_datapoints = 2000
+input_range = [-1,1]
+slope = 1.0
+intercept = 0.0
+x_noise_strength = 0.05
+y_noise_strength = 0.1
+def get_normalization(*args):
+    """
+    Returns the mean and standard deviations (in tuples) of the tensors in *args.
+    """
+    normalization_collection = []
+    for t in args:
+        t_mean = torch.mean(t, dim=0, keepdim=True)
+        t_std = torch.std(t, dim=0, keepdim=True)
+        normalization_collection.append((t_mean, t_std))
+    return tuple(normalization_collection)
+def load_data(seed=0, splitting_part=0.8, normalize=True,
+        return_ground_truth=False):
+    """
+    Loads one-dimensional data
+    :param seed: Seed for drawing and splitting the data.
+    :param splitting_part: Which fraction of the data to use as training
+    data. Defaults to 0.8.
+    :param normalize: Whether to normalize the data, defaults to True.
+    :param return_ground_truth: Boolean. If True, the unnoisy ground truth will
+    also be returned. Defaults to False.
+    :returns: linear_trainset, linear_testset if return_ground_truth is False,
+    else linear_trainset, linear_testset, (true_x, true_y)
+    """
+    random_generator = torch.Generator().manual_seed(seed)
+    # draw different seeds for noise and splitting
+    seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator)
+    # create new generators from tensor seeds
+    create_generator = lambda tensor_seed:\
+            torch.Generator().manual_seed(tensor_seed.item())
+    true_x = input_range[0] + (input_range[1]-input_range[0])\
+                  * torch.rand((total_number_of_datapoints,1),
+                          generator=create_generator(seeds[0]))
+    true_y = slope * true_x**2 + intercept 
+    noisy_x = true_x + x_noise_strength * \
+            torch.randn((total_number_of_datapoints,1),
+            generator=create_generator(seeds[1]))
+    noisy_y = true_y + y_noise_strength * \
+            torch.randn((total_number_of_datapoints,1),
+            generator=create_generator(seeds[2]))
+    if normalize:
+        normalization_x, normalization_y = get_normalization(noisy_x, noisy_y)
+        noisy_x = (noisy_x-normalization_x[0])/normalization_x[1]
+        true_x = (true_x-normalization_x[0])/normalization_x[1]
+        noisy_y = (noisy_y-normalization_y[0])/normalization_y[1]
+        true_y = (true_y-normalization_y[0])/normalization_y[1]
+    linear_dataset = TensorDataset(noisy_x, noisy_y)
+    dataset_len = len(linear_dataset)
+    train_len = int(dataset_len*splitting_part)
+    test_len = dataset_len - train_len
+    linear_trainset, linear_testset = random_split(linear_dataset,
+            lengths=[train_len, test_len],
+            generator=create_generator(seeds[3]))
+    if not return_ground_truth:
+        return linear_trainset, linear_testset
+    else:
+        return linear_trainset, linear_testset, (true_x, true_y)
--- a/Experiments/configurations/eiv_quadratic.json
+++ b/Experiments/configurations/eiv_quadratic.json
+{
+	"long_dataname": "quadratic",
+	"short_dataname": "quadratic",
+	"lr": 1e-3,
+	"batch_size": 64,
+	"test_batch_size": 800,
+	"number_of_epochs": 100,
+	"unscaled_reg": 10,
+	"report_point": 5,
+	"p": 0.1,
+	"lr_update": 20,
+	"std_y_update_points": [1,40],
+	"eiv_prediction_number_of_draws": [100,5],
+	"eiv_prediction_number_of_batches": 10,
+	"init_std_y_list": [0.5],
+	"gamma": 0.5,
+	"hidden_layers": [128, 128, 128, 128],
+	"fixed_std_x": 0.05,
+	"seed_range": [0,10],
+	"gpu_number": 1
+}
--- a/Experiments/configurations/noneiv_quadratic.json
+++ b/Experiments/configurations/noneiv_quadratic.json
+{
+	"long_dataname": "quadratic",
+	"short_dataname": "quadratic",
+	"lr": 1e-3,
+	"batch_size": 64,
+	"test_batch_size": 800,
+	"number_of_epochs": 100,
+	"unscaled_reg": 10,
+	"report_point": 5,
+	"p": 0.1,
+	"lr_update": 20,
+	"std_y_update_points": [1,40] ,
+	"noneiv_prediction_number_of_draws": 100,
+	"noneiv_prediction_number_of_batches": 10,
+	"init_std_y_list": [0.5],
+	"gamma": 0.5,
+	"hidden_layers": [128, 128, 128, 128],
+	"seed_range": [0,10],
+	"gpu_number": 1
+}