Simplified quadratic and linear

4d96c6dd · Jörg Martin · 667997a2 · 4d96c6dd · 4d96c6dd · 4d96c6dd
Commit 4d96c6dd authored 3 years ago by Jörg Martin
--- a/EIVPackage/EIVData/linear.py
+++ b/EIVPackage/EIVData/linear.py
@@ -2,6 +2,8 @@ import torch
 import sys
 from torch.utils.data import TensorDataset

+from EIVGeneral.manipulate_tensors import add_noise
+
 total_number_of_datapoints = 2000
 input_range = [-1,1]
 slope = 1.0
@@ -9,17 +11,6 @@ intercept = 0.0
 x_noise_strength = 0.05
 y_noise_strength = 0.1

-def get_normalization(*args):
-    """
-    Returns the mean and standard deviations (in tuples) of the tensors in *args.
-    """
-    normalization_collection = []
-    for t in args:
-        t_mean = torch.mean(t, dim=0, keepdim=True)
-        t_std = torch.std(t, dim=0, keepdim=True)
-        normalization_collection.append((t_mean, t_std))
-    return tuple(normalization_collection)
-
 def load_data(seed=0, splitting_part=0.8, normalize=True,
        return_ground_truth=False):
    """
@@ -37,26 +28,20 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
    """
    random_generator = torch.Generator().manual_seed(seed)
    # draw different seeds for noise and splitting
-    seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator)
+    seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
+            generator=random_generator)]
    # create new generators from tensor seeds
-    create_generator = lambda tensor_seed:\
-            torch.Generator().manual_seed(tensor_seed.item())
    true_x = input_range[0] + (input_range[1]-input_range[0])\
                  * torch.rand((total_number_of_datapoints,1),
-                          generator=create_generator(seeds[0]))
+                          generator=torch.Generator().manual_seed(seeds[0]))
    true_y = slope * true_x + intercept 
-    noisy_x = true_x + x_noise_strength * \
-            torch.randn((total_number_of_datapoints,1),
-            generator=create_generator(seeds[1]))
-    noisy_y = true_y + y_noise_strength * \
-            torch.randn((total_number_of_datapoints,1),
-            generator=create_generator(seeds[2]))
-    if normalize:
-        normalization_x, normalization_y = get_normalization(noisy_x, noisy_y)
-        noisy_x = (noisy_x-normalization_x[0])/normalization_x[1]
-        true_x = (true_x-normalization_x[0])/normalization_x[1]
-        noisy_y = (noisy_y-normalization_y[0])/normalization_y[1]
-        true_y = (true_y-normalization_y[0])/normalization_y[1]
+    # add noise and normalize x and y
+    (noisy_x, noisy_y), (true_x, true_y) = add_noise(
+            tensor_list=(true_x, true_y),
+            noise_strength_list=(x_noise_strength, y_noise_strength),
+            seed_list=seeds[1:3],
+            normalize=normalize)
+    # create datasets
    dataset_len = noisy_x.shape[0]
    train_len = int(dataset_len*splitting_part)
    test_len = dataset_len - train_len
@@ -75,4 +60,3 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
    else:
        return linear_trainset, linear_testset, true_linear_trainset,\
            true_linear_testset
-
--- a/EIVPackage/EIVData/quadratic.py
+++ b/EIVPackage/EIVData/quadratic.py
@@ -2,6 +2,8 @@ import torch
 import sys
 from torch.utils.data import TensorDataset

+from EIVGeneral.manipulate_tensors import add_noise
+
 total_number_of_datapoints = 2000
 input_range = [-1,1]
 slope = 1.0
@@ -9,18 +11,6 @@ intercept = 0.0
 x_noise_strength = 0.05
 y_noise_strength = 0.1

-def get_normalization(*args):
-    """
-    Returns the mean and standard deviations (in tuples) of the tensors in
-    *args.
-    """
-    normalization_collection = []
-    for t in args:
-        t_mean = torch.mean(t, dim=0, keepdim=True)
-        t_std = torch.std(t, dim=0, keepdim=True)
-        normalization_collection.append((t_mean, t_std))
-    return tuple(normalization_collection)
-
 def load_data(seed=0, splitting_part=0.8, normalize=True,
        return_ground_truth=False):
    """
@@ -38,26 +28,20 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
    """
    random_generator = torch.Generator().manual_seed(seed)
    # draw different seeds for noise and splitting
-    seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator)
+    seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
+            generator=random_generator)]
    # create new generators from tensor seeds
-    create_generator = lambda tensor_seed:\
-            torch.Generator().manual_seed(tensor_seed.item())
    true_x = input_range[0] + (input_range[1]-input_range[0])\
                  * torch.rand((total_number_of_datapoints,1),
-                          generator=create_generator(seeds[0]))
+                          generator=torch.Generator().manual_seed(seeds[0]))
    true_y = slope * true_x**2 + intercept 
-    noisy_x = true_x + x_noise_strength * \
-            torch.randn((total_number_of_datapoints,1),
-            generator=create_generator(seeds[1]))
-    noisy_y = true_y + y_noise_strength * \
-            torch.randn((total_number_of_datapoints,1),
-            generator=create_generator(seeds[2]))
-    if normalize:
-        normalization_x, normalization_y = get_normalization(noisy_x, noisy_y)
-        noisy_x = (noisy_x-normalization_x[0])/normalization_x[1]
-        true_x = (true_x-normalization_x[0])/normalization_x[1]
-        noisy_y = (noisy_y-normalization_y[0])/normalization_y[1]
-        true_y = (true_y-normalization_y[0])/normalization_y[1]
+    # add noise and normalize x and y
+    (noisy_x, noisy_y), (true_x, true_y) = add_noise(
+            tensor_list=(true_x, true_y),
+            noise_strength_list=(x_noise_strength, y_noise_strength),
+            seed_list=seeds[1:3],
+            normalize=normalize)
+    # create datasets
    dataset_len = noisy_x.shape[0]
    train_len = int(dataset_len*splitting_part)
    test_len = dataset_len - train_len
@@ -76,4 +60,3 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
    else:
        return quadratic_trainset, quadratic_testset, true_quadratic_trainset,\
            true_quadratic_testset
-
--- a/EIVPackage/EIVData/repeated_sampling.py
+++ b/EIVPackage/EIVData/repeated_sampling.py
+class repeated_sampling():
+    def __init__(self, dataclass, fixed_seed=0):
+        self.dataclass = dataclass
+        self.fixed_seed = fixed_seed
+        self.x_noise_strength = dataclass.x_noise_strength
+        self.y_noise_strength = dataclass.y_noise_strength
+
+    def __call__(self,seed=0, splitting_part=0.8, normalize=True,
+            return_ground_truth=False):
+        _, _, _, true_testset = self.dataclass.load_data(
+                        seed=self.fixed_seed, splitting_part=splitting_part,
+                        normalize=normalize,
+                        return_ground_truth=return_ground_truth)
+        true_x, true_y = true_testset.tensors[:2]
+
+
+
+
--- a/EIVPackage/EIVGeneral/manipulate_tensors.py
+++ b/EIVPackage/EIVGeneral/manipulate_tensors.py
+"""
+Collection of functions to manipulate tensors
+"""
+import torch
+
+def get_normalization(t):
+    """
+    Returns the mean and standard deviations (in tuples) of the tensor `t`
+    """
+    t_mean = torch.mean(t, dim=0, keepdim=True)
+    t_std = torch.std(t, dim=0, keepdim=True)
+    return (t_mean, t_std)
+
+def normalize_tensor(t, mean_std):
+    """
+    Normalize the tensor `t` by the mean `mean_std[0]` and the standard
+    devation `mean_std[1]`
+    """
+    return (t-mean_std[0])/mean_std[1]
+
+
+def add_noise(tensor_list, noise_strength_list, seed_list, normalize=True):
+    """
+    Takes the tensors in `tensor_list`, adds random noise using the standard
+    deviations in `noise_strength_list` and the seeds in `seed_list`, then, if
+    normalize is True (default), computes according normalization and returns
+    the normalized noisy tensors and the normalized unnoisy tensors. If
+    `normalize` is False, no normalization is performed and the second returned
+    list will coincide with `tensor_list`.
+    :param tensor_list: A list of torch.tensors
+    :param noise_strength_list: A list of positive floats
+    :param seed_list: A list of integers.
+    :param normalize: A Boolean, defaults to True.
+    :returns: noisy_tensor_list, unnoisy_tensor_list, both normalized
+    """
+    noisy_t_list = []
+    unnoisy_t_list = []
+    for t,noise,seed in zip(tensor_list, noise_strength_list, seed_list):
+        noisy_t = t + noise * torch.randn(t.shape,
+                generator=torch.Generator().manual_seed(seed))
+        if normalize:
+            noisy_t_normalization = get_normalization(noisy_t)
+            noisy_t = normalize_tensor(noisy_t, noisy_t_normalization)
+            t = normalize_tensor(t, noisy_t_normalization)
+        noisy_t_list.append(noisy_t)
+        unnoisy_t_list.append(t)
+    return noisy_t_list, unnoisy_t_list
+