Skip to content
Snippets Groups Projects
Commit 4d96c6dd authored by Jörg Martin's avatar Jörg Martin
Browse files

Simplified quadratic and linear

parent 667997a2
Branches
Tags
No related merge requests found
......@@ -2,6 +2,8 @@ import torch
import sys
from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise
total_number_of_datapoints = 2000
input_range = [-1,1]
slope = 1.0
......@@ -9,17 +11,6 @@ intercept = 0.0
x_noise_strength = 0.05
y_noise_strength = 0.1
def get_normalization(*args):
"""
Returns the mean and standard deviations (in tuples) of the tensors in *args.
"""
normalization_collection = []
for t in args:
t_mean = torch.mean(t, dim=0, keepdim=True)
t_std = torch.std(t, dim=0, keepdim=True)
normalization_collection.append((t_mean, t_std))
return tuple(normalization_collection)
def load_data(seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False):
"""
......@@ -37,26 +28,20 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
"""
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting
seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator)
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
generator=random_generator)]
# create new generators from tensor seeds
create_generator = lambda tensor_seed:\
torch.Generator().manual_seed(tensor_seed.item())
true_x = input_range[0] + (input_range[1]-input_range[0])\
* torch.rand((total_number_of_datapoints,1),
generator=create_generator(seeds[0]))
generator=torch.Generator().manual_seed(seeds[0]))
true_y = slope * true_x + intercept
noisy_x = true_x + x_noise_strength * \
torch.randn((total_number_of_datapoints,1),
generator=create_generator(seeds[1]))
noisy_y = true_y + y_noise_strength * \
torch.randn((total_number_of_datapoints,1),
generator=create_generator(seeds[2]))
if normalize:
normalization_x, normalization_y = get_normalization(noisy_x, noisy_y)
noisy_x = (noisy_x-normalization_x[0])/normalization_x[1]
true_x = (true_x-normalization_x[0])/normalization_x[1]
noisy_y = (noisy_y-normalization_y[0])/normalization_y[1]
true_y = (true_y-normalization_y[0])/normalization_y[1]
# add noise and normalize x and y
(noisy_x, noisy_y), (true_x, true_y) = add_noise(
tensor_list=(true_x, true_y),
noise_strength_list=(x_noise_strength, y_noise_strength),
seed_list=seeds[1:3],
normalize=normalize)
# create datasets
dataset_len = noisy_x.shape[0]
train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len
......@@ -75,4 +60,3 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
else:
return linear_trainset, linear_testset, true_linear_trainset,\
true_linear_testset
......@@ -2,6 +2,8 @@ import torch
import sys
from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise
total_number_of_datapoints = 2000
input_range = [-1,1]
slope = 1.0
......@@ -9,18 +11,6 @@ intercept = 0.0
x_noise_strength = 0.05
y_noise_strength = 0.1
def get_normalization(*args):
"""
Returns the mean and standard deviations (in tuples) of the tensors in
*args.
"""
normalization_collection = []
for t in args:
t_mean = torch.mean(t, dim=0, keepdim=True)
t_std = torch.std(t, dim=0, keepdim=True)
normalization_collection.append((t_mean, t_std))
return tuple(normalization_collection)
def load_data(seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False):
"""
......@@ -38,26 +28,20 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
"""
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting
seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator)
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
generator=random_generator)]
# create new generators from tensor seeds
create_generator = lambda tensor_seed:\
torch.Generator().manual_seed(tensor_seed.item())
true_x = input_range[0] + (input_range[1]-input_range[0])\
* torch.rand((total_number_of_datapoints,1),
generator=create_generator(seeds[0]))
generator=torch.Generator().manual_seed(seeds[0]))
true_y = slope * true_x**2 + intercept
noisy_x = true_x + x_noise_strength * \
torch.randn((total_number_of_datapoints,1),
generator=create_generator(seeds[1]))
noisy_y = true_y + y_noise_strength * \
torch.randn((total_number_of_datapoints,1),
generator=create_generator(seeds[2]))
if normalize:
normalization_x, normalization_y = get_normalization(noisy_x, noisy_y)
noisy_x = (noisy_x-normalization_x[0])/normalization_x[1]
true_x = (true_x-normalization_x[0])/normalization_x[1]
noisy_y = (noisy_y-normalization_y[0])/normalization_y[1]
true_y = (true_y-normalization_y[0])/normalization_y[1]
# add noise and normalize x and y
(noisy_x, noisy_y), (true_x, true_y) = add_noise(
tensor_list=(true_x, true_y),
noise_strength_list=(x_noise_strength, y_noise_strength),
seed_list=seeds[1:3],
normalize=normalize)
# create datasets
dataset_len = noisy_x.shape[0]
train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len
......@@ -76,4 +60,3 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
else:
return quadratic_trainset, quadratic_testset, true_quadratic_trainset,\
true_quadratic_testset
class repeated_sampling():
def __init__(self, dataclass, fixed_seed=0):
self.dataclass = dataclass
self.fixed_seed = fixed_seed
self.x_noise_strength = dataclass.x_noise_strength
self.y_noise_strength = dataclass.y_noise_strength
def __call__(self,seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False):
_, _, _, true_testset = self.dataclass.load_data(
seed=self.fixed_seed, splitting_part=splitting_part,
normalize=normalize,
return_ground_truth=return_ground_truth)
true_x, true_y = true_testset.tensors[:2]
"""
Collection of functions to manipulate tensors
"""
import torch
def get_normalization(t):
"""
Returns the mean and standard deviations (in tuples) of the tensor `t`
"""
t_mean = torch.mean(t, dim=0, keepdim=True)
t_std = torch.std(t, dim=0, keepdim=True)
return (t_mean, t_std)
def normalize_tensor(t, mean_std):
"""
Normalize the tensor `t` by the mean `mean_std[0]` and the standard
devation `mean_std[1]`
"""
return (t-mean_std[0])/mean_std[1]
def add_noise(tensor_list, noise_strength_list, seed_list, normalize=True):
"""
Takes the tensors in `tensor_list`, adds random noise using the standard
deviations in `noise_strength_list` and the seeds in `seed_list`, then, if
normalize is True (default), computes according normalization and returns
the normalized noisy tensors and the normalized unnoisy tensors. If
`normalize` is False, no normalization is performed and the second returned
list will coincide with `tensor_list`.
:param tensor_list: A list of torch.tensors
:param noise_strength_list: A list of positive floats
:param seed_list: A list of integers.
:param normalize: A Boolean, defaults to True.
:returns: noisy_tensor_list, unnoisy_tensor_list, both normalized
"""
noisy_t_list = []
unnoisy_t_list = []
for t,noise,seed in zip(tensor_list, noise_strength_list, seed_list):
noisy_t = t + noise * torch.randn(t.shape,
generator=torch.Generator().manual_seed(seed))
if normalize:
noisy_t_normalization = get_normalization(noisy_t)
noisy_t = normalize_tensor(noisy_t, noisy_t_normalization)
t = normalize_tensor(t, noisy_t_normalization)
noisy_t_list.append(noisy_t)
unnoisy_t_list.append(t)
return noisy_t_list, unnoisy_t_list
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment