Skip to content
Snippets Groups Projects
Commit b4f1f576 authored by Jörg Martin's avatar Jörg Martin
Browse files

Simulated datasets now use the same seed for creation

parent ca655781
No related branches found
No related tags found
No related merge requests found
...@@ -2,7 +2,8 @@ import torch ...@@ -2,7 +2,8 @@ import torch
import sys import sys
from torch.utils.data import TensorDataset from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise from EIVGeneral.manipulate_tensors import add_noise, normalize_tensor,\
unnormalize_tensor
total_number_of_datapoints = 2000 total_number_of_datapoints = 2000
input_range = [-4,4] input_range = [-4,4]
...@@ -11,38 +12,63 @@ intercept = 0.0 ...@@ -11,38 +12,63 @@ intercept = 0.0
x_noise_strength = 0.05 * (input_range[1] - input_range[0])/2 x_noise_strength = 0.05 * (input_range[1] - input_range[0])/2
y_noise_strength = 3 y_noise_strength = 3
func = lambda true_x: slope * true_x**3 + intercept func = lambda true_x: slope * true_x**3 + intercept
def load_data(seed=0, splitting_part=0.8, normalize=True, def load_data(seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False): return_ground_truth=False,
return_normalized_func=False,
fixed_seed = 0):
""" """
Loads one-dimensional, cubic data as in Hernandez-Lobato, Adams 2015. Loads one-dimensional data, cubic data as in Hernandez-Lobato, Adams 2015.
:param seed: Seed for drawing and splitting the data. :param seed: Seed for shuffling the data before splitting.
:param splitting_part: Which fraction of the data to use as training :param splitting_part: Which fraction of the data to use as training
data. Defaults to 0.8. data. Defaults to 0.8.
:param normalize: Whether to normalize the data, defaults to True. :param normalize: Whether to normalize the data, defaults to True.
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will :param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False. also be returned. Defaults to False.
:returns: cubic_trainset, cubic_testset if return_ground_truth is False, :param return_normalized_func: Boolean (default False). If True, the
normalized version of the used function is returned as a last element.
:param fixed_seed: Used to generate the full dataset (test and train).
Defaults to 0.
:returns: cubic_trainset, cubic_testset, (, normalized_func) if
return_ground_truth is False,
else cubic_trainset, cubic_testset, true_cubic_trainset, else cubic_trainset, cubic_testset, true_cubic_trainset,
true_cubic_testset. The later two return **four tensors**: The true x,y and true_cubic_testset, (, normalized_func). The "true" datasets each return
their noisy counterparts. **four tensors**: The true x,y and their noisy counterparts.
""" """
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting # draw different seeds for noise and splitting
random_generator = torch.Generator().manual_seed(fixed_seed)
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\ seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
generator=random_generator)] generator=random_generator)]
# create new generators from tensor seeds # create new generators from tensor seeds
true_x = input_range[0] + (input_range[1]-input_range[0])\ true_x = input_range[0] + (input_range[1]-input_range[0])\
* torch.rand((total_number_of_datapoints,1), * torch.rand((total_number_of_datapoints,1),
generator=torch.Generator().manual_seed(seeds[0])) generator=torch.Generator().manual_seed(seeds[0]))
true_y = func(true_x) true_y = func(true_x)
# add noise and normalize x and y # add noise and normalize x and y
(noisy_x, noisy_y), (true_x, true_y) = add_noise( (noisy_x, noisy_y), (true_x, true_y), normalization_list = add_noise(
tensor_list=(true_x, true_y), tensor_list=(true_x, true_y),
noise_strength_list=(x_noise_strength, y_noise_strength), noise_strength_list=(x_noise_strength, y_noise_strength),
seed_list=seeds[1:3], seed_list=seeds[1:3],
normalize=normalize) normalize=normalize,
# create datasets return_normalization=True)
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
dataset_len = noisy_x.shape[0] dataset_len = noisy_x.shape[0]
# shuffle via seed
new_order = torch.randperm(dataset_len,
generator=torch.Generator().manual_seed(seed))
true_x = true_x[new_order, ...]
true_y = true_y[new_order, ...]
noisy_x = noisy_x[new_order, ...]
noisy_y = noisy_y[new_order, ...]
# create datasets
train_len = int(dataset_len*splitting_part) train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len test_len = dataset_len - train_len
true_train_x, true_test_x = torch.split(true_x, [train_len, test_len]) true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
...@@ -55,8 +81,19 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, ...@@ -55,8 +81,19 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
noisy_train_x, noisy_train_y) noisy_train_x, noisy_train_y)
true_cubic_testset = TensorDataset(true_test_x, true_test_y, true_cubic_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y) noisy_test_x, noisy_test_y)
# return different objects, depending on Booleans
if not return_ground_truth: if not return_ground_truth:
return cubic_trainset, cubic_testset if not return_normalized_func:
return cubic_trainset, cubic_testset
else:
return cubic_trainset, cubic_testset, normalized_func
else: else:
return cubic_trainset, cubic_testset, true_cubic_trainset,\ if not return_normalized_func:
true_cubic_testset return cubic_trainset, cubic_testset, true_cubic_trainset,\
true_cubic_testset
else:
return cubic_trainset, cubic_testset, true_cubic_trainset,\
true_cubic_testset, normalized_func
...@@ -2,7 +2,8 @@ import torch ...@@ -2,7 +2,8 @@ import torch
import sys import sys
from torch.utils.data import TensorDataset from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise from EIVGeneral.manipulate_tensors import add_noise, normalize_tensor,\
unnormalize_tensor
total_number_of_datapoints = 2000 total_number_of_datapoints = 2000
input_range = [-1,1] input_range = [-1,1]
...@@ -13,37 +14,61 @@ y_noise_strength = 0.1 ...@@ -13,37 +14,61 @@ y_noise_strength = 0.1
func = lambda true_x: slope * true_x + intercept func = lambda true_x: slope * true_x + intercept
def load_data(seed=0, splitting_part=0.8, normalize=True, def load_data(seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False): return_ground_truth=False,
return_normalized_func=False,
fixed_seed = 0):
""" """
Loads one-dimensional data Loads one-dimensional data
:param seed: Seed for drawing and splitting the data. :param seed: Seed for shuffling the data before splitting.
:param splitting_part: Which fraction of the data to use as training :param splitting_part: Which fraction of the data to use as training
data. Defaults to 0.8. data. Defaults to 0.8.
:param normalize: Whether to normalize the data, defaults to True. :param normalize: Whether to normalize the data, defaults to True.
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will :param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False. also be returned. Defaults to False.
:returns: linear_trainset, linear_testset if return_ground_truth is False, :param return_normalized_func: Boolean (default False). If True, the
normalized version of the used function is returned as a last element.
:param fixed_seed: Used to generate the full dataset (test and train).
Defaults to 0.
:returns: linear_trainset, linear_testset, (, normalized_func) if
return_ground_truth is False,
else linear_trainset, linear_testset, true_linear_trainset, else linear_trainset, linear_testset, true_linear_trainset,
true_linear_testset. The later two return **four tensors**: The true x,y and true_linear_testset, (, normalized_func). The "true" datasets each return
their noisy counterparts. **four tensors**: The true x,y and their noisy counterparts.
""" """
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting # draw different seeds for noise and splitting
random_generator = torch.Generator().manual_seed(fixed_seed)
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\ seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
generator=random_generator)] generator=random_generator)]
# create new generators from tensor seeds # create new generators from tensor seeds
true_x = input_range[0] + (input_range[1]-input_range[0])\ true_x = input_range[0] + (input_range[1]-input_range[0])\
* torch.rand((total_number_of_datapoints,1), * torch.rand((total_number_of_datapoints,1),
generator=torch.Generator().manual_seed(seeds[0])) generator=torch.Generator().manual_seed(seeds[0]))
true_y = func(true_x) true_y = func(true_x)
# add noise and normalize x and y # add noise and normalize x and y
(noisy_x, noisy_y), (true_x, true_y) = add_noise( (noisy_x, noisy_y), (true_x, true_y), normalization_list = add_noise(
tensor_list=(true_x, true_y), tensor_list=(true_x, true_y),
noise_strength_list=(x_noise_strength, y_noise_strength), noise_strength_list=(x_noise_strength, y_noise_strength),
seed_list=seeds[1:3], seed_list=seeds[1:3],
normalize=normalize) normalize=normalize,
# create datasets return_normalization=True)
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
dataset_len = noisy_x.shape[0] dataset_len = noisy_x.shape[0]
# shuffle via seed
new_order = torch.randperm(dataset_len,
generator=torch.Generator().manual_seed(seed))
true_x = true_x[new_order, ...]
true_y = true_y[new_order, ...]
noisy_x = noisy_x[new_order, ...]
noisy_y = noisy_y[new_order, ...]
# create datasets
train_len = int(dataset_len*splitting_part) train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len test_len = dataset_len - train_len
true_train_x, true_test_x = torch.split(true_x, [train_len, test_len]) true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
...@@ -56,8 +81,19 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, ...@@ -56,8 +81,19 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
noisy_train_x, noisy_train_y) noisy_train_x, noisy_train_y)
true_linear_testset = TensorDataset(true_test_x, true_test_y, true_linear_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y) noisy_test_x, noisy_test_y)
# return different objects, depending on Booleans
if not return_ground_truth: if not return_ground_truth:
return linear_trainset, linear_testset if not return_normalized_func:
return linear_trainset, linear_testset
else:
return linear_trainset, linear_testset, normalized_func
else: else:
return linear_trainset, linear_testset, true_linear_trainset,\ if not return_normalized_func:
true_linear_testset return linear_trainset, linear_testset, true_linear_trainset,\
true_linear_testset
else:
return linear_trainset, linear_testset, true_linear_trainset,\
true_linear_testset, normalized_func
...@@ -2,7 +2,8 @@ import torch ...@@ -2,7 +2,8 @@ import torch
import sys import sys
from torch.utils.data import TensorDataset from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise from EIVGeneral.manipulate_tensors import add_noise, normalize_tensor,\
unnormalize_tensor
total_number_of_datapoints = 2000 total_number_of_datapoints = 2000
input_range = [-1,1] input_range = [-1,1]
...@@ -13,37 +14,61 @@ y_noise_strength = 0.1 ...@@ -13,37 +14,61 @@ y_noise_strength = 0.1
func = lambda true_x: slope * true_x**2 + intercept func = lambda true_x: slope * true_x**2 + intercept
def load_data(seed=0, splitting_part=0.8, normalize=True, def load_data(seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False): return_ground_truth=False,
return_normalized_func=False,
fixed_seed = 0):
""" """
Loads one-dimensional data Loads one-dimensional data
:param seed: Seed for drawing and splitting the data. :param seed: Seed for shuffling the data before splitting.
:param splitting_part: Which fraction of the data to use as training :param splitting_part: Which fraction of the data to use as training
data. Defaults to 0.8. data. Defaults to 0.8.
:param normalize: Whether to normalize the data, defaults to True. :param normalize: Whether to normalize the data, defaults to True.
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will :param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False. also be returned. Defaults to False.
:returns: quadratic_trainset, quadratic_testset if return_ground_truth is False, :param return_normalized_func: Boolean (default False). If True, the
normalized version of the used function is returned as a last element.
:param fixed_seed: Used to generate the full dataset (test and train).
Defaults to 0.
:returns: quadratic_trainset, quadratic_testset, (, normalized_func) if
return_ground_truth is False,
else quadratic_trainset, quadratic_testset, true_quadratic_trainset, else quadratic_trainset, quadratic_testset, true_quadratic_trainset,
true_quadratic_testset. The later two return **four tensors**: The true x,y and true_quadratic_testset, (, normalized_func). The "true" datasets each return
their noisy counterparts. **four tensors**: The true x,y and their noisy counterparts.
""" """
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting # draw different seeds for noise and splitting
random_generator = torch.Generator().manual_seed(fixed_seed)
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\ seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
generator=random_generator)] generator=random_generator)]
# create new generators from tensor seeds # create new generators from tensor seeds
true_x = input_range[0] + (input_range[1]-input_range[0])\ true_x = input_range[0] + (input_range[1]-input_range[0])\
* torch.rand((total_number_of_datapoints,1), * torch.rand((total_number_of_datapoints,1),
generator=torch.Generator().manual_seed(seeds[0])) generator=torch.Generator().manual_seed(seeds[0]))
true_y = func(true_x) true_y = func(true_x)
# add noise and normalize x and y # add noise and normalize x and y
(noisy_x, noisy_y), (true_x, true_y) = add_noise( (noisy_x, noisy_y), (true_x, true_y), normalization_list = add_noise(
tensor_list=(true_x, true_y), tensor_list=(true_x, true_y),
noise_strength_list=(x_noise_strength, y_noise_strength), noise_strength_list=(x_noise_strength, y_noise_strength),
seed_list=seeds[1:3], seed_list=seeds[1:3],
normalize=normalize) normalize=normalize,
# create datasets return_normalization=True)
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
dataset_len = noisy_x.shape[0] dataset_len = noisy_x.shape[0]
# shuffle via seed
new_order = torch.randperm(dataset_len,
generator=torch.Generator().manual_seed(seed))
true_x = true_x[new_order, ...]
true_y = true_y[new_order, ...]
noisy_x = noisy_x[new_order, ...]
noisy_y = noisy_y[new_order, ...]
# create datasets
train_len = int(dataset_len*splitting_part) train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len test_len = dataset_len - train_len
true_train_x, true_test_x = torch.split(true_x, [train_len, test_len]) true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
...@@ -56,8 +81,19 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, ...@@ -56,8 +81,19 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
noisy_train_x, noisy_train_y) noisy_train_x, noisy_train_y)
true_quadratic_testset = TensorDataset(true_test_x, true_test_y, true_quadratic_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y) noisy_test_x, noisy_test_y)
# return different objects, depending on Booleans
if not return_ground_truth: if not return_ground_truth:
return quadratic_trainset, quadratic_testset if not return_normalized_func:
return quadratic_trainset, quadratic_testset
else:
return quadratic_trainset, quadratic_testset, normalized_func
else: else:
return quadratic_trainset, quadratic_testset, true_quadratic_trainset,\ if not return_normalized_func:
true_quadratic_testset return quadratic_trainset, quadratic_testset, true_quadratic_trainset,\
true_quadratic_testset
else:
return quadratic_trainset, quadratic_testset, true_quadratic_trainset,\
true_quadratic_testset, normalized_func
...@@ -2,7 +2,8 @@ import torch ...@@ -2,7 +2,8 @@ import torch
import sys import sys
from torch.utils.data import TensorDataset from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise from EIVGeneral.manipulate_tensors import add_noise, normalize_tensor,\
unnormalize_tensor
total_number_of_datapoints = 2000 total_number_of_datapoints = 2000
input_range = [-0.2,0.8] input_range = [-0.2,0.8]
...@@ -14,37 +15,61 @@ func = lambda true_x: true_x +\ ...@@ -14,37 +15,61 @@ func = lambda true_x: true_x +\
torch.sin(4 * torch.pi * true_x) torch.sin(4 * torch.pi * true_x)
def load_data(seed=0, splitting_part=0.8, normalize=True, def load_data(seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False): return_ground_truth=False,
return_normalized_func=False,
fixed_seed = 0):
""" """
Loads one-dimensional, sine shaped data as in Blundell et al. 2014. Loads one-dimensional, sine shaped data as in Blundell et al. 2014.
:param seed: Seed for drawing and splitting the data. :param seed: Seed for shuffling the data before splitting.
:param splitting_part: Which fraction of the data to use as training :param splitting_part: Which fraction of the data to use as training
data. Defaults to 0.8. data. Defaults to 0.8.
:param normalize: Whether to normalize the data, defaults to True. :param normalize: Whether to normalize the data, defaults to True.
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will :param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False. also be returned. Defaults to False.
:returns: sine_trainset, sine_testset if return_ground_truth is False, :param return_normalized_func: Boolean (default False). If True, the
normalized version of the used function is returned as a last element.
:param fixed_seed: Used to generate the full dataset (test and train).
Defaults to 0.
:returns: sine_trainset, sine_testset, (, normalized_func) if
return_ground_truth is False,
else sine_trainset, sine_testset, true_sine_trainset, else sine_trainset, sine_testset, true_sine_trainset,
true_sine_testset. The later two return **four tensors**: The true x,y and true_sine_testset, (, normalized_func). The "true" datasets each return
their noisy counterparts. **four tensors**: The true x,y and their noisy counterparts.
""" """
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting # draw different seeds for noise and splitting
random_generator = torch.Generator().manual_seed(fixed_seed)
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\ seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
generator=random_generator)] generator=random_generator)]
# create new generators from tensor seeds # create new generators from tensor seeds
true_x = input_range[0] + (input_range[1]-input_range[0])\ true_x = input_range[0] + (input_range[1]-input_range[0])\
* torch.rand((total_number_of_datapoints,1), * torch.rand((total_number_of_datapoints,1),
generator=torch.Generator().manual_seed(seeds[0])) generator=torch.Generator().manual_seed(seeds[0]))
true_y = func(true_x) true_y = func(true_x)
# add noise and normalize x and y # add noise and normalize x and y
(noisy_x, noisy_y), (true_x, true_y) = add_noise( (noisy_x, noisy_y), (true_x, true_y), normalization_list = add_noise(
tensor_list=(true_x, true_y), tensor_list=(true_x, true_y),
noise_strength_list=(x_noise_strength, y_noise_strength), noise_strength_list=(x_noise_strength, y_noise_strength),
seed_list=seeds[1:3], seed_list=seeds[1:3],
normalize=normalize) normalize=normalize,
# create datasets return_normalization=True)
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
dataset_len = noisy_x.shape[0] dataset_len = noisy_x.shape[0]
# shuffle via seed
new_order = torch.randperm(dataset_len,
generator=torch.Generator().manual_seed(seed))
true_x = true_x[new_order, ...]
true_y = true_y[new_order, ...]
noisy_x = noisy_x[new_order, ...]
noisy_y = noisy_y[new_order, ...]
# create datasets
train_len = int(dataset_len*splitting_part) train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len test_len = dataset_len - train_len
true_train_x, true_test_x = torch.split(true_x, [train_len, test_len]) true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
...@@ -57,8 +82,19 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, ...@@ -57,8 +82,19 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
noisy_train_x, noisy_train_y) noisy_train_x, noisy_train_y)
true_sine_testset = TensorDataset(true_test_x, true_test_y, true_sine_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y) noisy_test_x, noisy_test_y)
# return different objects, depending on Booleans
if not return_ground_truth: if not return_ground_truth:
return sine_trainset, sine_testset if not return_normalized_func:
return sine_trainset, sine_testset
else:
return sine_trainset, sine_testset, normalized_func
else: else:
return sine_trainset, sine_testset, true_sine_trainset,\ if not return_normalized_func:
true_sine_testset return sine_trainset, sine_testset, true_sine_trainset,\
true_sine_testset
else:
return sine_trainset, sine_testset, true_sine_trainset,\
true_sine_testset, normalized_func
...@@ -19,3 +19,7 @@ class VerticalCut(Dataset): ...@@ -19,3 +19,7 @@ class VerticalCut(Dataset):
def __len__(self): def __len__(self):
return len(self.dataset) return len(self.dataset)
@property
def tensors(self):
return itemgetter(*self.components_to_pick)(self.dataset.tensors)
...@@ -18,9 +18,17 @@ def normalize_tensor(t, mean_std): ...@@ -18,9 +18,17 @@ def normalize_tensor(t, mean_std):
""" """
return (t-mean_std[0])/mean_std[1] return (t-mean_std[0])/mean_std[1]
def unnormalize_tensor(t, mean_std):
"""
Unnormalize the tensor `t` by the mean `mean_std[0]` and the standard
devation `mean_std[1]`. The inverse of `normalize_tensor`.
"""
return t * mean_std[1] + mean_std[0]
def add_noise(tensor_list, noise_strength_list, seed_list, normalize=True, def add_noise(tensor_list, noise_strength_list, seed_list, normalize=True,
normalization_list = None): normalization_list = None, return_normalization = False):
""" """
Takes the tensors in `tensor_list`, adds random noise using the standard Takes the tensors in `tensor_list`, adds random noise using the standard
deviations in `noise_strength_list` and the seeds in `seed_list`, then, if deviations in `noise_strength_list` and the seeds in `seed_list`, then, if
...@@ -35,10 +43,14 @@ def add_noise(tensor_list, noise_strength_list, seed_list, normalize=True, ...@@ -35,10 +43,14 @@ def add_noise(tensor_list, noise_strength_list, seed_list, normalize=True,
:param normalization_list: Either None (default) or a list of tensors. :param normalization_list: Either None (default) or a list of tensors.
If the latter, these tensors will be used for normalization and `normalize` If the latter, these tensors will be used for normalization and `normalize`
is assumed to be True. is assumed to be True.
:returns: noisy_tensor_list, unnoisy_tensor_list, both normalized :param list_of_normalization: Boolean. If True (default: False) the
used normalizations will be returned.
:returns: noisy_tensor_list, unnoisy_tensor_list(, list_of_normalization)
""" """
noisy_t_list = [] noisy_t_list = []
unnoisy_t_list = [] unnoisy_t_list = []
# store tuples that were used for normalization in here
list_of_normalization = []
if normalization_list is not None: if normalization_list is not None:
assert len(normalization_list) == len(tensor_list) assert len(normalization_list) == len(tensor_list)
for i, (t,noise,seed) in enumerate(zip(tensor_list, noise_strength_list,\ for i, (t,noise,seed) in enumerate(zip(tensor_list, noise_strength_list,\
...@@ -51,9 +63,12 @@ def add_noise(tensor_list, noise_strength_list, seed_list, normalize=True, ...@@ -51,9 +63,12 @@ def add_noise(tensor_list, noise_strength_list, seed_list, normalize=True,
get_normalization(normalization_list[i]) get_normalization(normalization_list[i])
else: else:
noisy_t_normalization = get_normalization(noisy_t) noisy_t_normalization = get_normalization(noisy_t)
list_of_normalization.append(noisy_t_normalization)
noisy_t = normalize_tensor(noisy_t, noisy_t_normalization) noisy_t = normalize_tensor(noisy_t, noisy_t_normalization)
t = normalize_tensor(t, noisy_t_normalization) t = normalize_tensor(t, noisy_t_normalization)
noisy_t_list.append(noisy_t) noisy_t_list.append(noisy_t)
unnoisy_t_list.append(t) unnoisy_t_list.append(t)
return noisy_t_list, unnoisy_t_list if return_normalization:
return noisy_t_list, unnoisy_t_list, list_of_normalization
else:
return noisy_t_list, unnoisy_t_list
""" """
Plot predictions with uncertainties for (simulated) datasets with a ground Plot predictions with uncertainties for (simulated) datasets with a ground
truth. truth. At the moment it is assumed that the used datasets have an output
dimension equal to 1. Plots are only produced for datasets with input dimension
and output dimension 1.
""" """
import importlib import importlib
import os import os
...@@ -25,15 +27,20 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws, ...@@ -25,15 +27,20 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
throuth their JSON configuration and evaluate their prediction and throuth their JSON configuration and evaluate their prediction and
uncertainties for the (true) x in `x_range`. The results returned are as numpy uncertainties for the (true) x in `x_range`. The results returned are as numpy
arrays included in a `plotting_dictionary` that contains the predictions arrays included in a `plotting_dictionary` that contains the predictions
and uncertainties via the keys "prediction" and "uncertainty" but also the and uncertainties via the keys "prediction" and "uncertainty" but also
noisified version of `x_range` and the corresponding y values (key - A tuple of `x_range` and the corresponding values of y (key
"range_points") and `number_of_test_datapoints` points from the test "range_points")
dataset with seed `plotting_seed` (key "test_data_points"). - the noisified version of `x_range` and the corresponding y values (key
"noisy_range_points") and
- `number_of_test_datapoints` points from the test
dataset with seed `plotting_seed` (key "test_data_points") and
- the input dimension (key "input_dim").
**Note**: The output of the neural networks are assumed to be **Note**: The output of the neural networks are assumed to be
one-dimensional . one-dimensional .
:data: String, short dataname :data: String, short dataname. The corresponding module should contain
`x_noise_strength`, `y_noise_strength`.
:x_range: An iterator yielding the (true) x values to consider :x_range: An iterator yielding the (true) x values to consider
:eiv: Boolean. If True an EiV model is used, else an non-EiV model. :eiv: Boolean. If True an EiV model is used, else an non-EiV model.
:number_of_draws: Number of draws to use for prediction. Take an int for :number_of_draws: Number of draws to use for prediction. Take an int for
...@@ -41,7 +48,7 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws, ...@@ -41,7 +48,7 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
:x_noise_seed: An integer. Will be used as a seed to generate the noise put :x_noise_seed: An integer. Will be used as a seed to generate the noise put
on `x_range`. on `x_range`.
:y_noise_seed: An integer. Will be used as a seed to generate the noise put :y_noise_seed: An integer. Will be used as a seed to generate the noise put
on `func(x_range)` that will be returned with `range_values` in the on `normalized_func(x_range)` that will be returned with `range_values` in the
`plotting_dictionary`. `plotting_dictionary`.
:plotting_seed: An integer. Needed for choosing which of the test datasets :plotting_seed: An integer. Needed for choosing which of the test datasets
will be used to returning the test datapoints. will be used to returning the test datapoints.
...@@ -59,16 +66,19 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws, ...@@ -59,16 +66,19 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
conf_file: conf_file:
conf_dict = json.load(conf_file) conf_dict = json.load(conf_file)
# get datanames
long_dataname = conf_dict["long_dataname"] long_dataname = conf_dict["long_dataname"]
short_dataname = conf_dict["short_dataname"] short_dataname = conf_dict["short_dataname"]
# load hyperparameters
load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
x_noise_strength =\ x_noise_strength =\
importlib.import_module(f'EIVData.{long_dataname}').x_noise_strength importlib.import_module(f'EIVData.{long_dataname}').x_noise_strength
y_noise_strength =\ y_noise_strength =\
importlib.import_module(f'EIVData.{long_dataname}').y_noise_strength importlib.import_module(f'EIVData.{long_dataname}').y_noise_strength
func = importlib.import_module(f'EIVData.{long_dataname}').func seed_list = range(conf_dict["seed_range"][0],
conf_dict["seed_range"][1])
# switch to gpu, if possible # switch to gpu, if possible
try: try:
...@@ -87,19 +97,21 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws, ...@@ -87,19 +97,21 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
device = torch.device('cpu') device = torch.device('cpu')
# determine dimensions
_, test_data = load_data(seed=plotting_seed, return_ground_truth=False) _, test_data = load_data(seed=plotting_seed, return_ground_truth=False)
input_dim = test_data[0][0].numel() input_dim = test_data[0][0].numel()
output_dim = test_data[0][1].numel() output_dim = test_data[0][1].numel()
assert output_dim == 1 assert output_dim == 1
# store in plotting_dictionary
plotting_dictionary['input_dim'] = input_dim
# store test datapoints
test_x, test_y = test_data[:number_of_test_datapoints] test_x, test_y = test_data[:number_of_test_datapoints]
plotting_dictionary['test_data_points'] = (test_x.detach().cpu().numpy(), plotting_dictionary['test_data_points'] = (test_x.detach().cpu().numpy(),
test_y.detach().cpu().numpy()) test_y.detach().cpu().numpy())
## Create iterators for get_coverage_distribution
seed_list = range(conf_dict["seed_range"][0],
conf_dict["seed_range"][1])
# iterator for networks # iterator for looping through networks
def net_iterator(eiv=eiv, seed_list=seed_list): def net_iterator(eiv=eiv, seed_list=seed_list):
""" """
Yields EiV models (if `eiv`) or Yields EiV models (if `eiv`) or
...@@ -145,17 +157,28 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws, ...@@ -145,17 +157,28 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
net=net, device=device) net=net, device=device)
yield net yield net
# add feature dimension (if necessary)
x_range = x_range.view((-1, input_dim)) x_range = x_range.view((-1, input_dim))
y_range = normalized_func(x_range)
# noisify
noisy_x_range = x_range + x_noise_strength * torch.randn(x_range.shape, noisy_x_range = x_range + x_noise_strength * torch.randn(x_range.shape,
generator=torch.Generator().manual_seed(x_noise_seed)) generator=torch.Generator().manual_seed(x_noise_seed))
# add feature dimension (if necessary) and move to device # move to device for later processing
noisy_x_range = noisy_x_range.to(device) noisy_x_range = noisy_x_range.to(device)
noisy_y_range = func(x_range) + y_noise_strength *\ # y values for noisy_x_range (not on device)
noisy_y_range = y_range + y_noise_strength *\
torch.randn(x_range.shape, torch.randn(x_range.shape,
generator=torch.Generator().manual_seed(y_noise_seed)) generator=torch.Generator().manual_seed(y_noise_seed))
plotting_dictionary['range_points'] =\ # save in plotting_dictionary
plotting_dictionary['noisy_range_points'] =\
(noisy_x_range.detach().cpu().numpy(), (noisy_x_range.detach().cpu().numpy(),
noisy_y_range.detach().cpu().numpy()) noisy_y_range.detach().cpu().numpy())
plotting_dictionary['range_points'] =\
(x_range.detach().cpu().numpy(),
y_range.detach().cpu().numpy())
# loop through networks and predict
mean_collection, unc_collection = [], [] mean_collection, unc_collection = [], []
for net in net_iterator(eiv=eiv): for net in net_iterator(eiv=eiv):
mean, unc = net.predict_mean_and_unc(noisy_x_range, mean, unc = net.predict_mean_and_unc(noisy_x_range,
...@@ -176,7 +199,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws, ...@@ -176,7 +199,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
# stack collections along a new, first dimension # stack collections along a new, first dimension
mean_collection = torch.stack(mean_collection, dim=0) mean_collection = torch.stack(mean_collection, dim=0)
unc_collection = torch.stack(unc_collection, dim=0) unc_collection = torch.stack(unc_collection, dim=0)
# average
# save average in plotting_dictionary
plotting_dictionary['prediction'] =\ plotting_dictionary['prediction'] =\
torch.mean(mean_collection, dim=0).detach().cpu().numpy() torch.mean(mean_collection, dim=0).detach().cpu().numpy()
plotting_dictionary['uncertainty'] =\ plotting_dictionary['uncertainty'] =\
...@@ -184,3 +208,36 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws, ...@@ -184,3 +208,36 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
return plotting_dictionary return plotting_dictionary
data_list = ['sine'] # short datanames
list_x_range = [torch.linspace(0.0,1.0, 50)]
list_color = [('red','blue')]
list_number_of_draws = [((100,5), 100)]
for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list,
list_x_range, list_color, list_number_of_draws)):
eiv_plotting_dictionary = compute_predictions_and_uncertainties(
data=data,
x_range=x_range,
eiv=True,
number_of_draws=number_of_draws[0])
noneiv_plotting_dictionary = compute_predictions_and_uncertainties(
data=data,
x_range=x_range,
eiv=False,
number_of_draws=number_of_draws[1])
input_dim = eiv_plotting_dictionary['input_dim']
if input_dim == 1:
plt.figure(i)
plt.clf()
x_values, y_values = eiv_plotting_dictionary['range_points']
plt.plot(x_values, y_values,'-', color='k')
# plt.plot(x_values, eiv_plotting_dictionary['prediction'],'-',
# color=color[0])
# plt.plot(x_values, noneiv_plotting_dictionary['prediction'],'-',
# color=color[1])
else:
# multidimensional handling not included yet
pass
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment