diff --git a/EIVPackage/EIVData/linear.py b/EIVPackage/EIVData/linear.py index 9b69b6b310cf61382e8d9f5204f6ae18a2d1c2fc..75c7e400e558b3825b700ce9d4108786e4a9bc62 100644 --- a/EIVPackage/EIVData/linear.py +++ b/EIVPackage/EIVData/linear.py @@ -2,6 +2,8 @@ import torch import sys from torch.utils.data import TensorDataset +from EIVGeneral.manipulate_tensors import add_noise + total_number_of_datapoints = 2000 input_range = [-1,1] slope = 1.0 @@ -9,17 +11,6 @@ intercept = 0.0 x_noise_strength = 0.05 y_noise_strength = 0.1 -def get_normalization(*args): - """ - Returns the mean and standard deviations (in tuples) of the tensors in *args. - """ - normalization_collection = [] - for t in args: - t_mean = torch.mean(t, dim=0, keepdim=True) - t_std = torch.std(t, dim=0, keepdim=True) - normalization_collection.append((t_mean, t_std)) - return tuple(normalization_collection) - def load_data(seed=0, splitting_part=0.8, normalize=True, return_ground_truth=False): """ @@ -37,26 +28,20 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, """ random_generator = torch.Generator().manual_seed(seed) # draw different seeds for noise and splitting - seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator) + seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\ + generator=random_generator)] # create new generators from tensor seeds - create_generator = lambda tensor_seed:\ - torch.Generator().manual_seed(tensor_seed.item()) true_x = input_range[0] + (input_range[1]-input_range[0])\ * torch.rand((total_number_of_datapoints,1), - generator=create_generator(seeds[0])) + generator=torch.Generator().manual_seed(seeds[0])) true_y = slope * true_x + intercept - noisy_x = true_x + x_noise_strength * \ - torch.randn((total_number_of_datapoints,1), - generator=create_generator(seeds[1])) - noisy_y = true_y + y_noise_strength * \ - torch.randn((total_number_of_datapoints,1), - generator=create_generator(seeds[2])) - if normalize: - normalization_x, normalization_y = get_normalization(noisy_x, noisy_y) - noisy_x = (noisy_x-normalization_x[0])/normalization_x[1] - true_x = (true_x-normalization_x[0])/normalization_x[1] - noisy_y = (noisy_y-normalization_y[0])/normalization_y[1] - true_y = (true_y-normalization_y[0])/normalization_y[1] + # add noise and normalize x and y + (noisy_x, noisy_y), (true_x, true_y) = add_noise( + tensor_list=(true_x, true_y), + noise_strength_list=(x_noise_strength, y_noise_strength), + seed_list=seeds[1:3], + normalize=normalize) + # create datasets dataset_len = noisy_x.shape[0] train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len @@ -75,4 +60,3 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, else: return linear_trainset, linear_testset, true_linear_trainset,\ true_linear_testset - diff --git a/EIVPackage/EIVData/quadratic.py b/EIVPackage/EIVData/quadratic.py index aa4f4605094822116d36e12f64e854c38849a406..a42148205f481665dc420172e5797b84ffaab4ad 100644 --- a/EIVPackage/EIVData/quadratic.py +++ b/EIVPackage/EIVData/quadratic.py @@ -2,6 +2,8 @@ import torch import sys from torch.utils.data import TensorDataset +from EIVGeneral.manipulate_tensors import add_noise + total_number_of_datapoints = 2000 input_range = [-1,1] slope = 1.0 @@ -9,18 +11,6 @@ intercept = 0.0 x_noise_strength = 0.05 y_noise_strength = 0.1 -def get_normalization(*args): - """ - Returns the mean and standard deviations (in tuples) of the tensors in - *args. - """ - normalization_collection = [] - for t in args: - t_mean = torch.mean(t, dim=0, keepdim=True) - t_std = torch.std(t, dim=0, keepdim=True) - normalization_collection.append((t_mean, t_std)) - return tuple(normalization_collection) - def load_data(seed=0, splitting_part=0.8, normalize=True, return_ground_truth=False): """ @@ -38,26 +28,20 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, """ random_generator = torch.Generator().manual_seed(seed) # draw different seeds for noise and splitting - seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator) + seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\ + generator=random_generator)] # create new generators from tensor seeds - create_generator = lambda tensor_seed:\ - torch.Generator().manual_seed(tensor_seed.item()) true_x = input_range[0] + (input_range[1]-input_range[0])\ * torch.rand((total_number_of_datapoints,1), - generator=create_generator(seeds[0])) + generator=torch.Generator().manual_seed(seeds[0])) true_y = slope * true_x**2 + intercept - noisy_x = true_x + x_noise_strength * \ - torch.randn((total_number_of_datapoints,1), - generator=create_generator(seeds[1])) - noisy_y = true_y + y_noise_strength * \ - torch.randn((total_number_of_datapoints,1), - generator=create_generator(seeds[2])) - if normalize: - normalization_x, normalization_y = get_normalization(noisy_x, noisy_y) - noisy_x = (noisy_x-normalization_x[0])/normalization_x[1] - true_x = (true_x-normalization_x[0])/normalization_x[1] - noisy_y = (noisy_y-normalization_y[0])/normalization_y[1] - true_y = (true_y-normalization_y[0])/normalization_y[1] + # add noise and normalize x and y + (noisy_x, noisy_y), (true_x, true_y) = add_noise( + tensor_list=(true_x, true_y), + noise_strength_list=(x_noise_strength, y_noise_strength), + seed_list=seeds[1:3], + normalize=normalize) + # create datasets dataset_len = noisy_x.shape[0] train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len @@ -76,4 +60,3 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, else: return quadratic_trainset, quadratic_testset, true_quadratic_trainset,\ true_quadratic_testset - diff --git a/EIVPackage/EIVData/repeated_sampling.py b/EIVPackage/EIVData/repeated_sampling.py new file mode 100644 index 0000000000000000000000000000000000000000..360cd508b065b4ddb6c475d7c4e8142369ad4884 --- /dev/null +++ b/EIVPackage/EIVData/repeated_sampling.py @@ -0,0 +1,18 @@ +class repeated_sampling(): + def __init__(self, dataclass, fixed_seed=0): + self.dataclass = dataclass + self.fixed_seed = fixed_seed + self.x_noise_strength = dataclass.x_noise_strength + self.y_noise_strength = dataclass.y_noise_strength + + def __call__(self,seed=0, splitting_part=0.8, normalize=True, + return_ground_truth=False): + _, _, _, true_testset = self.dataclass.load_data( + seed=self.fixed_seed, splitting_part=splitting_part, + normalize=normalize, + return_ground_truth=return_ground_truth) + true_x, true_y = true_testset.tensors[:2] + + + + diff --git a/EIVPackage/EIVGeneral/manipulate_tensors.py b/EIVPackage/EIVGeneral/manipulate_tensors.py new file mode 100644 index 0000000000000000000000000000000000000000..8fa4fd8fee483f8ca250f55e067ff3a2a397e17a --- /dev/null +++ b/EIVPackage/EIVGeneral/manipulate_tensors.py @@ -0,0 +1,48 @@ +""" +Collection of functions to manipulate tensors +""" +import torch + +def get_normalization(t): + """ + Returns the mean and standard deviations (in tuples) of the tensor `t` + """ + t_mean = torch.mean(t, dim=0, keepdim=True) + t_std = torch.std(t, dim=0, keepdim=True) + return (t_mean, t_std) + +def normalize_tensor(t, mean_std): + """ + Normalize the tensor `t` by the mean `mean_std[0]` and the standard + devation `mean_std[1]` + """ + return (t-mean_std[0])/mean_std[1] + + +def add_noise(tensor_list, noise_strength_list, seed_list, normalize=True): + """ + Takes the tensors in `tensor_list`, adds random noise using the standard + deviations in `noise_strength_list` and the seeds in `seed_list`, then, if + normalize is True (default), computes according normalization and returns + the normalized noisy tensors and the normalized unnoisy tensors. If + `normalize` is False, no normalization is performed and the second returned + list will coincide with `tensor_list`. + :param tensor_list: A list of torch.tensors + :param noise_strength_list: A list of positive floats + :param seed_list: A list of integers. + :param normalize: A Boolean, defaults to True. + :returns: noisy_tensor_list, unnoisy_tensor_list, both normalized + """ + noisy_t_list = [] + unnoisy_t_list = [] + for t,noise,seed in zip(tensor_list, noise_strength_list, seed_list): + noisy_t = t + noise * torch.randn(t.shape, + generator=torch.Generator().manual_seed(seed)) + if normalize: + noisy_t_normalization = get_normalization(noisy_t) + noisy_t = normalize_tensor(noisy_t, noisy_t_normalization) + t = normalize_tensor(t, noisy_t_normalization) + noisy_t_list.append(noisy_t) + unnoisy_t_list.append(t) + return noisy_t_list, unnoisy_t_list +