Skip to content
Snippets Groups Projects
Commit 9efa18aa authored by Jörg Martin's avatar Jörg Martin
Browse files

Plotting predictions for simulated data

parents befe7634 5fd3aa28
No related branches found
No related tags found
No related merge requests found
Showing
with 801 additions and 66 deletions
import torch
import sys
from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise, normalize_tensor,\
unnormalize_tensor
total_number_of_datapoints = 1000
input_range = [-1,1]
slope = 1.0
intercept = 0.0
x_noise_strength = 0.1
y_noise_strength = 0.1
func = lambda true_x: slope * true_x**3 + intercept
def load_data(seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False,
return_normalized_func=False,
fixed_seed = 0):
"""
Loads one-dimensional data, cubic data as in Hernandez-Lobato, Adams 2015.
:param seed: Seed for shuffling the data before splitting.
:param splitting_part: Which fraction of the data to use as training
data. Defaults to 0.8.
:param normalize: Whether to normalize the data, defaults to True.
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False.
:param return_normalized_func: Boolean (default False). If True, the
normalized version of the used function is returned as a last element.
:param fixed_seed: Used to generate the full dataset (test and train).
Defaults to 0.
:returns: cubic_trainset, cubic_testset, (, normalized_func) if
return_ground_truth is False,
else cubic_trainset, cubic_testset, true_cubic_trainset,
true_cubic_testset, (, normalized_func). The "true" datasets each return
**four tensors**: The true x,y and their noisy counterparts.
"""
# draw different seeds for noise and splitting
random_generator = torch.Generator().manual_seed(fixed_seed)
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
generator=random_generator)]
# create new generators from tensor seeds
true_x = input_range[0] + (input_range[1]-input_range[0])\
* torch.rand((total_number_of_datapoints,1),
generator=torch.Generator().manual_seed(seeds[0]))
true_y = func(true_x)
# add noise and normalize x and y
(noisy_x, noisy_y), (true_x, true_y), normalization_list = add_noise(
tensor_list=(true_x, true_y),
noise_strength_list=(x_noise_strength, y_noise_strength),
seed_list=seeds[1:3],
normalize=normalize,
return_normalization=True)
if normalize:
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return func(x)
dataset_len = noisy_x.shape[0]
# shuffle via seed
new_order = torch.randperm(dataset_len,
generator=torch.Generator().manual_seed(seed))
true_x = true_x[new_order, ...]
true_y = true_y[new_order, ...]
noisy_x = noisy_x[new_order, ...]
noisy_y = noisy_y[new_order, ...]
# create datasets
train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len
true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
cubic_trainset = TensorDataset(noisy_train_x, noisy_train_y)
cubic_testset = TensorDataset(noisy_test_x, noisy_test_y)
true_cubic_trainset = TensorDataset(true_train_x, true_train_y,
noisy_train_x, noisy_train_y)
true_cubic_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y)
# return different objects, depending on Booleans
if not return_ground_truth:
if not return_normalized_func:
return cubic_trainset, cubic_testset
else:
return cubic_trainset, cubic_testset, normalized_func
else:
if not return_normalized_func:
return cubic_trainset, cubic_testset, true_cubic_trainset,\
true_cubic_testset
else:
return cubic_trainset, cubic_testset, true_cubic_trainset,\
true_cubic_testset, normalized_func
......@@ -2,47 +2,77 @@ import torch
import sys
from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise
from EIVGeneral.manipulate_tensors import add_noise, normalize_tensor,\
unnormalize_tensor
total_number_of_datapoints = 2000
total_number_of_datapoints = 500
input_range = [-1,1]
slope = 1.0
intercept = 0.0
x_noise_strength = 0.05
x_noise_strength = 0.1
y_noise_strength = 0.1
func = lambda true_x: slope * true_x + intercept
def load_data(seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False):
return_ground_truth=False,
return_normalized_func=False,
fixed_seed = 0):
"""
Loads one-dimensional data
:param seed: Seed for drawing and splitting the data.
:param seed: Seed for shuffling the data before splitting.
:param splitting_part: Which fraction of the data to use as training
data. Defaults to 0.8.
:param normalize: Whether to normalize the data, defaults to True.
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False.
:returns: linear_trainset, linear_testset if return_ground_truth is False,
:param return_normalized_func: Boolean (default False). If True, the
normalized version of the used function is returned as a last element.
:param fixed_seed: Used to generate the full dataset (test and train).
Defaults to 0.
:returns: linear_trainset, linear_testset, (, normalized_func) if
return_ground_truth is False,
else linear_trainset, linear_testset, true_linear_trainset,
true_linear_testset. The later two return **four tensors**: The true x,y and
their noisy counterparts.
true_linear_testset, (, normalized_func). The "true" datasets each return
**four tensors**: The true x,y and their noisy counterparts.
"""
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting
random_generator = torch.Generator().manual_seed(fixed_seed)
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
generator=random_generator)]
# create new generators from tensor seeds
true_x = input_range[0] + (input_range[1]-input_range[0])\
* torch.rand((total_number_of_datapoints,1),
generator=torch.Generator().manual_seed(seeds[0]))
true_y = slope * true_x + intercept
true_y = func(true_x)
# add noise and normalize x and y
(noisy_x, noisy_y), (true_x, true_y) = add_noise(
(noisy_x, noisy_y), (true_x, true_y), normalization_list = add_noise(
tensor_list=(true_x, true_y),
noise_strength_list=(x_noise_strength, y_noise_strength),
seed_list=seeds[1:3],
normalize=normalize)
# create datasets
normalize=normalize,
return_normalization=True)
if normalize:
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return func(x)
dataset_len = noisy_x.shape[0]
# shuffle via seed
new_order = torch.randperm(dataset_len,
generator=torch.Generator().manual_seed(seed))
true_x = true_x[new_order, ...]
true_y = true_y[new_order, ...]
noisy_x = noisy_x[new_order, ...]
noisy_y = noisy_y[new_order, ...]
# create datasets
train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len
true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
......@@ -55,8 +85,19 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
noisy_train_x, noisy_train_y)
true_linear_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y)
# return different objects, depending on Booleans
if not return_ground_truth:
return linear_trainset, linear_testset
if not return_normalized_func:
return linear_trainset, linear_testset
else:
return linear_trainset, linear_testset, normalized_func
else:
return linear_trainset, linear_testset, true_linear_trainset,\
true_linear_testset
if not return_normalized_func:
return linear_trainset, linear_testset, true_linear_trainset,\
true_linear_testset
else:
return linear_trainset, linear_testset, true_linear_trainset,\
true_linear_testset, normalized_func
......@@ -2,47 +2,77 @@ import torch
import sys
from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise
from EIVGeneral.manipulate_tensors import add_noise, normalize_tensor,\
unnormalize_tensor
total_number_of_datapoints = 2000
total_number_of_datapoints = 500
input_range = [-1,1]
slope = 1.0
intercept = 0.0
x_noise_strength = 0.05
x_noise_strength = 0.1
y_noise_strength = 0.1
func = lambda true_x: slope * true_x**2 + intercept
def load_data(seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False):
return_ground_truth=False,
return_normalized_func=False,
fixed_seed = 0):
"""
Loads one-dimensional data
:param seed: Seed for drawing and splitting the data.
:param seed: Seed for shuffling the data before splitting.
:param splitting_part: Which fraction of the data to use as training
data. Defaults to 0.8.
:param normalize: Whether to normalize the data, defaults to True.
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False.
:returns: quadratic_trainset, quadratic_testset if return_ground_truth is False,
:param return_normalized_func: Boolean (default False). If True, the
normalized version of the used function is returned as a last element.
:param fixed_seed: Used to generate the full dataset (test and train).
Defaults to 0.
:returns: quadratic_trainset, quadratic_testset, (, normalized_func) if
return_ground_truth is False,
else quadratic_trainset, quadratic_testset, true_quadratic_trainset,
true_quadratic_testset. The later two return **four tensors**: The true x,y and
their noisy counterparts.
true_quadratic_testset, (, normalized_func). The "true" datasets each return
**four tensors**: The true x,y and their noisy counterparts.
"""
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting
random_generator = torch.Generator().manual_seed(fixed_seed)
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
generator=random_generator)]
# create new generators from tensor seeds
true_x = input_range[0] + (input_range[1]-input_range[0])\
* torch.rand((total_number_of_datapoints,1),
generator=torch.Generator().manual_seed(seeds[0]))
true_y = slope * true_x**2 + intercept
true_y = func(true_x)
# add noise and normalize x and y
(noisy_x, noisy_y), (true_x, true_y) = add_noise(
(noisy_x, noisy_y), (true_x, true_y), normalization_list = add_noise(
tensor_list=(true_x, true_y),
noise_strength_list=(x_noise_strength, y_noise_strength),
seed_list=seeds[1:3],
normalize=normalize)
# create datasets
normalize=normalize,
return_normalization=True)
if normalize:
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return func(x)
dataset_len = noisy_x.shape[0]
# shuffle via seed
new_order = torch.randperm(dataset_len,
generator=torch.Generator().manual_seed(seed))
true_x = true_x[new_order, ...]
true_y = true_y[new_order, ...]
noisy_x = noisy_x[new_order, ...]
noisy_y = noisy_y[new_order, ...]
# create datasets
train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len
true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
......@@ -55,8 +85,19 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
noisy_train_x, noisy_train_y)
true_quadratic_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y)
# return different objects, depending on Booleans
if not return_ground_truth:
return quadratic_trainset, quadratic_testset
if not return_normalized_func:
return quadratic_trainset, quadratic_testset
else:
return quadratic_trainset, quadratic_testset, normalized_func
else:
return quadratic_trainset, quadratic_testset, true_quadratic_trainset,\
true_quadratic_testset
if not return_normalized_func:
return quadratic_trainset, quadratic_testset, true_quadratic_trainset,\
true_quadratic_testset
else:
return quadratic_trainset, quadratic_testset, true_quadratic_trainset,\
true_quadratic_testset, normalized_func
......@@ -7,7 +7,8 @@ import sys
import torch
from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise
from EIVGeneral.manipulate_tensors import add_noise, normalize_tensor,\
unnormalize_tensor
class repeated_sampling():
"""
......@@ -25,17 +26,35 @@ class repeated_sampling():
"""
def __init__(self, dataclass, fixed_seed=0):
self.dataclass = dataclass
self.func = dataclass.func
self.fixed_seed = fixed_seed
self.x_noise_strength = dataclass.x_noise_strength
self.y_noise_strength = dataclass.y_noise_strength
def __call__(self,seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False):
return_ground_truth=False,
return_normalized_func=False):
_, _, true_trainset, true_testset\
= self.dataclass.load_data(
seed=self.fixed_seed, splitting_part=splitting_part,
normalize=False,
return_ground_truth=True)
"""
Loads repeated sampling data
:param seed: Seed for the used noise
:param splitting_part: Which fraction of the data to use as training
data. Defaults to 0.8.
:param normalize: Whether to normalize the data, defaults to True.
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False.
:param return_normalized_func: Boolean (default False). If True, the
normalized version of the used function is returned as a last element.
:returns: trainset, testset, (, normalized_func) if
return_ground_truth is False,
else trainset, testset, true_trainset,
true_testset, (, normalized_func). The "true" datasets each return
**four tensors**: The true x,y and their noisy counterparts.
"""
full_noisy_x = torch.concat((true_trainset.tensors[2],
true_testset.tensors[2]), dim=0)
full_noisy_y = torch.concat((true_trainset.tensors[3],
......@@ -46,23 +65,45 @@ class repeated_sampling():
# draw different seeds for noise and splitting
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(2,),\
generator=random_generator)]
(noisy_train_x, noisy_train_y), (true_train_x, true_train_y) =\
add_noise((true_train_x, true_train_y),
# use same normalization for train and test
(noisy_train_x, noisy_train_y), (true_train_x, true_train_y),\
normalization_list= add_noise((true_train_x, true_train_y),
(self.x_noise_strength, self.y_noise_strength), seeds,
normalize=normalize,
normalization_list=[full_noisy_x, full_noisy_y])
normalization_list=[full_noisy_x, full_noisy_y],
return_normalization=True)
(noisy_test_x, noisy_test_y), (true_test_x, true_test_y) =\
add_noise((true_test_x, true_test_y),
(self.x_noise_strength, self.y_noise_strength), seeds,
normalize=normalize,
normalization_list=[full_noisy_x, full_noisy_y])
normalization_list=[full_noisy_x, full_noisy_y],
return_normalization=False) # same normalization
if normalize:
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = self.func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return self.func(x)
trainset = TensorDataset(noisy_train_x, noisy_train_y)
testset = TensorDataset(noisy_test_x, noisy_test_y)
true_trainset = TensorDataset(true_train_x, true_train_y,
noisy_train_x, noisy_train_y)
true_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y)
# return different objects, depending on Booleans
if not return_ground_truth:
return trainset, testset
if not return_normalized_func:
return trainset, testset
else:
return trainset, testset, normalized_func
else:
return trainset, testset, true_trainset, true_testset
if not return_normalized_func:
return trainset, testset, true_trainset,\
true_testset
else:
return trainset, testset, true_trainset,\
true_testset, normalized_func
import torch
import sys
from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise, normalize_tensor,\
unnormalize_tensor
total_number_of_datapoints = 2000
input_range = [-0.2,0.8]
intercept = 0.0
x_noise_strength = 0.02
y_noise_strength = 0.02
func = lambda true_x: true_x +\
torch.sin(2 * torch.pi * true_x) +\
torch.sin(4 * torch.pi * true_x)
def load_data(seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False,
return_normalized_func=False,
fixed_seed = 0):
"""
Loads one-dimensional, sine shaped data as in Blundell et al. 2014.
:param seed: Seed for shuffling the data before splitting.
:param splitting_part: Which fraction of the data to use as training
data. Defaults to 0.8.
:param normalize: Whether to normalize the data, defaults to True.
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False.
:param return_normalized_func: Boolean (default False). If True, the
normalized version of the used function is returned as a last element.
:param fixed_seed: Used to generate the full dataset (test and train).
Defaults to 0.
:returns: sine_trainset, sine_testset, (, normalized_func) if
return_ground_truth is False,
else sine_trainset, sine_testset, true_sine_trainset,
true_sine_testset, (, normalized_func). The "true" datasets each return
**four tensors**: The true x,y and their noisy counterparts.
"""
# draw different seeds for noise and splitting
random_generator = torch.Generator().manual_seed(fixed_seed)
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
generator=random_generator)]
# create new generators from tensor seeds
true_x = input_range[0] + (input_range[1]-input_range[0])\
* torch.rand((total_number_of_datapoints,1),
generator=torch.Generator().manual_seed(seeds[0]))
true_y = func(true_x)
# add noise and normalize x and y
(noisy_x, noisy_y), (true_x, true_y), normalization_list = add_noise(
tensor_list=(true_x, true_y),
noise_strength_list=(x_noise_strength, y_noise_strength),
seed_list=seeds[1:3],
normalize=normalize,
return_normalization=True)
if normalize:
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return func(x)
dataset_len = noisy_x.shape[0]
# shuffle via seed
new_order = torch.randperm(dataset_len,
generator=torch.Generator().manual_seed(seed))
true_x = true_x[new_order, ...]
true_y = true_y[new_order, ...]
noisy_x = noisy_x[new_order, ...]
noisy_y = noisy_y[new_order, ...]
# create datasets
train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len
true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
sine_trainset = TensorDataset(noisy_train_x, noisy_train_y)
sine_testset = TensorDataset(noisy_test_x, noisy_test_y)
true_sine_trainset = TensorDataset(true_train_x, true_train_y,
noisy_train_x, noisy_train_y)
true_sine_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y)
# return different objects, depending on Booleans
if not return_ground_truth:
if not return_normalized_func:
return sine_trainset, sine_testset
else:
return sine_trainset, sine_testset, normalized_func
else:
if not return_normalized_func:
return sine_trainset, sine_testset, true_sine_trainset,\
true_sine_testset
else:
return sine_trainset, sine_testset, true_sine_trainset,\
true_sine_testset, normalized_func
......@@ -19,3 +19,7 @@ class VerticalCut(Dataset):
def __len__(self):
return len(self.dataset)
@property
def tensors(self):
return itemgetter(*self.components_to_pick)(self.dataset.tensors)
......@@ -18,9 +18,17 @@ def normalize_tensor(t, mean_std):
"""
return (t-mean_std[0])/mean_std[1]
def unnormalize_tensor(t, mean_std):
"""
Unnormalize the tensor `t` by the mean `mean_std[0]` and the standard
devation `mean_std[1]`. The inverse of `normalize_tensor`.
"""
return t * mean_std[1] + mean_std[0]
def add_noise(tensor_list, noise_strength_list, seed_list, normalize=True,
normalization_list = None):
normalization_list = None, return_normalization = False):
"""
Takes the tensors in `tensor_list`, adds random noise using the standard
deviations in `noise_strength_list` and the seeds in `seed_list`, then, if
......@@ -35,10 +43,14 @@ def add_noise(tensor_list, noise_strength_list, seed_list, normalize=True,
:param normalization_list: Either None (default) or a list of tensors.
If the latter, these tensors will be used for normalization and `normalize`
is assumed to be True.
:returns: noisy_tensor_list, unnoisy_tensor_list, both normalized
:param list_of_normalization: Boolean. If True (default: False) the
used normalizations will be returned.
:returns: noisy_tensor_list, unnoisy_tensor_list(, list_of_normalization)
"""
noisy_t_list = []
unnoisy_t_list = []
# store tuples that were used for normalization in here
list_of_normalization = []
if normalization_list is not None:
assert len(normalization_list) == len(tensor_list)
for i, (t,noise,seed) in enumerate(zip(tensor_list, noise_strength_list,\
......@@ -51,9 +63,12 @@ def add_noise(tensor_list, noise_strength_list, seed_list, normalize=True,
get_normalization(normalization_list[i])
else:
noisy_t_normalization = get_normalization(noisy_t)
list_of_normalization.append(noisy_t_normalization)
noisy_t = normalize_tensor(noisy_t, noisy_t_normalization)
t = normalize_tensor(t, noisy_t_normalization)
noisy_t_list.append(noisy_t)
unnoisy_t_list.append(t)
return noisy_t_list, unnoisy_t_list
if return_normalization:
return noisy_t_list, unnoisy_t_list, list_of_normalization
else:
return noisy_t_list, unnoisy_t_list
{
"long_dataname": "cubic",
"short_dataname": "cubic",
"normalize": false,
"lr": 1e-3,
"batch_size": 16,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.1,
"lr_update": 20,
"std_y_update_points": [1,40],
"eiv_prediction_number_of_draws": [100,5],
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [128, 128, 128, 128],
"fixed_std_x": 0.10,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "linear",
"short_dataname": "linear",
"normalize": false,
"lr": 1e-3,
"batch_size": 64,
"batch_size": 16,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
......@@ -15,7 +16,7 @@
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [128, 128, 128, 128],
"fixed_std_x": 0.05,
"fixed_std_x": 0.10,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "quadratic",
"short_dataname": "quadratic",
"normalize": false,
"lr": 1e-3,
"batch_size": 64,
"batch_size": 16,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
......@@ -15,7 +16,7 @@
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [128, 128, 128, 128],
"fixed_std_x": 0.05,
"fixed_std_x": 0.10,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "sine",
"short_dataname": "sine",
"normalize": false,
"lr": 1e-3,
"batch_size": 16,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.1,
"lr_update": 20,
"std_y_update_points": [1,40],
"eiv_number_of_forward_draws": 10,
"eiv_prediction_number_of_draws": [100,5],
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.1],
"gamma": 0.5,
"hidden_layers": [128, 128, 128, 128],
"fixed_std_x": 0.02,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "cubic",
"short_dataname": "cubic",
"normalize": false,
"lr": 1e-3,
"batch_size": 16,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.1,
"lr_update": 20,
"std_y_update_points": [1,40] ,
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [128, 128, 128, 128],
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "linear",
"short_dataname": "linear",
"normalize": false,
"lr": 1e-3,
"batch_size": 64,
"batch_size": 16,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
......
{
"long_dataname": "quadratic",
"short_dataname": "quadratic",
"normalize": false,
"lr": 1e-3,
"batch_size": 64,
"batch_size": 16,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
......
{
"long_dataname": "sine",
"short_dataname": "sine",
"normalize": false,
"lr": 1e-3,
"batch_size": 16,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.1,
"lr_update": 20,
"std_y_update_points": [1,40] ,
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.1],
"gamma": 0.5,
"hidden_layers": [128, 128, 128, 128],
"seed_range": [0,10],
"gpu_number": 1
}
......@@ -2,7 +2,7 @@ import os
import glob
import json
metrics_to_display = ['rmse','coverage_theory','coverage_numerical','true_coverage_numerical','avg_bias']
metrics_to_display = ['rmse','logdens','coverage_numerical','true_coverage_numerical']
show_incomplete = True
list_of_result_files = glob.glob(os.path.join('results','*.json'))
......
......@@ -31,6 +31,12 @@ with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as conf_file:
eiv_conf_dict = json.load(conf_file)
with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as conf_file:
noneiv_conf_dict = json.load(conf_file)
try:
normalize = eiv_conf_dict['normalize']
assert normalize == noneiv_conf_dict['normalize']
except KeyError:
# normalize by default
normalize = True
long_dataname = eiv_conf_dict["long_dataname"]
short_dataname = eiv_conf_dict["short_dataname"]
......@@ -40,7 +46,7 @@ print(f"Evaluating {long_dataname}")
scale_outputs = False
load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
train_data, test_data = load_data()
train_data, test_data = load_data(normalize=normalize)
input_dim = train_data[0][0].numel()
output_dim = train_data[0][1].numel()
......@@ -240,7 +246,8 @@ def collect_metrics(x_y_pairs, seed=0,
def collect_full_seed_range_metrics(load_data,
seed_range,test_batch_size = 100, test_samples = 10,
noneiv_number_of_draws=100, eiv_number_of_draws=[100,5], device=device,
scale_outputs=scale_outputs):
scale_outputs=scale_outputs,
normalize=normalize):
"""
Collect metrics that need all seeds for their computation.
:param load_data: load_data map should take seed as an argument and,
......@@ -257,6 +264,7 @@ def collect_full_seed_range_metrics(load_data,
:param device: The torch.device to use
:param scale_output: Boolean, scale the outputs for some metrics. Defaults
to False.
:param normalize: Boolean, whether to normalize the data
:returns: Dictionaries noneiv_metrics, eiv_metrics
"""
noneiv_metrics = {}
......@@ -267,9 +275,10 @@ def collect_full_seed_range_metrics(load_data,
# load data according toseed
try:
train_data, test_data, true_train_data, true_test_data \
= load_data(seed=seed, return_ground_truth=True)
= load_data(seed=seed, return_ground_truth=True,
normalize=normalize)
except TypeError:
train_data, test_data = load_data(seed=seed)
train_data, test_data = load_data(seed=seed, normalize=normalize)
true_train_data, true_test_data = None, None
## Compute x-dependant bias
......@@ -460,9 +469,10 @@ number_of_test_samples = 2
for seed in tqdm(seed_list):
try:
train_data, test_data, true_train_data, true_test_data \
= load_data(seed=seed, return_ground_truth=True)
= load_data(seed=seed, return_ground_truth=True,
normalize=normalize)
except TypeError:
train_data, test_data = load_data(seed=seed)
train_data, test_data = load_data(seed=seed, normalize=normalize)
true_train_data, true_test_data = None, None
if true_test_data is None:
test_dataloader = DataLoader(test_data,
......
......@@ -48,6 +48,11 @@ def compute_coverages(data, eiv, number_of_draws,
long_dataname = conf_dict["long_dataname"]
short_dataname = conf_dict["short_dataname"]
try:
normalize = conf_dict['normalize']
except KeyError:
# normalize by default
normalize = True
load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
......@@ -73,12 +78,13 @@ def compute_coverages(data, eiv, number_of_draws,
# test whether there is a ground truth
try:
train_data, _, _,_ \
= load_data(seed=0, return_ground_truth=True)
= load_data(seed=0, return_ground_truth=True,
normalize=normalize)
except TypeError:
# if not, end function
return None,None
else:
train_data, _ = load_data()
train_data, _ = load_data(normalize=normalize)
print(f"Computing {'EiV' if eiv else 'non-EiV'} coverage for {long_dataname}")
......@@ -145,14 +151,15 @@ def compute_coverages(data, eiv, number_of_draws,
"""
for seed in seed_list:
if not use_ground_truth:
_, test_data = load_data(seed=seed)
_, test_data = load_data(seed=seed, normalize=normalize)
test_dataloader = DataLoader(test_data,
batch_size=batch_size,
shuffle=True)
yield test_dataloader
else:
_, _, _, true_test =\
load_data(seed=seed, return_ground_truth=True)
load_data(seed=seed, return_ground_truth=True,
normalize=normalize)
# take noisy x but unnoisy y
cut_true_test = VerticalCut(true_test,
components_to_pick=[2,1])
......@@ -362,8 +369,10 @@ plt.xlabel('q')
plt.ylabel('EiV cov - nonEiV-cov')
# datasets to plot and their coloring
datasets = ['linear', 'quadratic','yacht','wine','power',
'protein','concrete','california','energy','kin8nm','msd','naval']
datasets = ['linear', 'quadratic','cubic','sine',
'yacht','wine','power','protein','concrete',
'california','energy','kin8nm','msd','naval']
colors = cm.rainbow(np.linspace(0,1,len(datasets)))
# loop through data
......
"""
Plot predictions with uncertainties for (simulated) datasets with a ground
truth. At the moment it is assumed that the used datasets have an output
dimension equal to 1. Plots are only produced for datasets with input dimension
and output dimension 1.
"""
import importlib
import os
import json
import torch
import numpy as np
import matplotlib.pyplot as plt
from EIVArchitectures import Networks
from EIVTrainingRoutines import train_and_store
# coverage factor
k = 1.96
def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
x_noise_seed = 0, y_noise_seed = 1,
plotting_seed = 0, number_of_test_datapoints = 100):
"""
Iterate through networks associated with `data` (short dataname)
throuth their JSON configuration and evaluate their prediction and
uncertainties for the (true) x in `x_range`. The results returned are as numpy
arrays included in a `plotting_dictionary` that contains the predictions
and uncertainties via the keys "prediction" and "uncertainty" but also
- A tuple of `x_range` and the corresponding values of y (key
"range_points")
- the noisified version of `x_range` and the corresponding y values (key
"noisy_range_points") and
- `number_of_test_datapoints` points from the test
dataset with seed `plotting_seed` (key "test_data_points") and
- the input dimension (key "input_dim").
**Note**: The output of the neural networks are assumed to be
one-dimensional .
:data: String, short dataname. The corresponding module should contain
`x_noise_strength`, `y_noise_strength`.
:x_range: An iterator yielding the (true) x values to consider
:eiv: Boolean. If True an EiV model is used, else an non-EiV model.
:number_of_draws: Number of draws to use for prediction. Take an int for
non-EiV models and a two-element list for EiV models.
:x_noise_seed: An integer. Will be used as a seed to generate the noise put
on `x_range`.
:y_noise_seed: An integer. Will be used as a seed to generate the noise put
on `normalized_func(x_range)` that will be returned with `range_values` in the
`plotting_dictionary`.
:plotting_seed: An integer. Needed for choosing which of the test datasets
will be used to returning the test datapoints.
:number_of_test_datapoints: A positive integer. Number of test_datapoints
:returns: plotting_dictionary
"""
plotting_dictionary = {}
# load configuration file
if eiv:
with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as\
conf_file:
conf_dict = json.load(conf_file)
else:
with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as\
conf_file:
conf_dict = json.load(conf_file)
# get datanames
long_dataname = conf_dict["long_dataname"]
short_dataname = conf_dict["short_dataname"]
try:
normalize = conf_dict['normalize']
except KeyError:
# normalize by default
normalize = True
# load hyperparameters
load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
x_noise_strength =\
importlib.import_module(f'EIVData.{long_dataname}').x_noise_strength
y_noise_strength =\
importlib.import_module(f'EIVData.{long_dataname}').y_noise_strength
seed_list = range(conf_dict["seed_range"][0],
conf_dict["seed_range"][1])
# switch to gpu, if possible
try:
gpu_number = conf_dict["gpu_number"]
device = torch.device(f'cuda:{gpu_number}')
try:
torch.tensor([0.0]).to(device)
except RuntimeError:
if torch.cuda.is_available():
print('Switched to GPU 0')
device = torch.device('cuda:0')
else:
print('No cuda available, using CPU')
device = torch.device('cpu')
except KeyError:
device = torch.device('cpu')
# determine dimensions
_, test_data, normalized_func = load_data(seed=plotting_seed,
return_ground_truth=False,
return_normalized_func=True,
normalize=normalize)
input_dim = test_data[0][0].numel()
output_dim = test_data[0][1].numel()
assert output_dim == 1
# store in plotting_dictionary
plotting_dictionary['input_dim'] = input_dim
# store test datapoints
test_x, test_y = test_data[:number_of_test_datapoints]
plotting_dictionary['test_data_points'] = (test_x.detach().cpu().numpy(),
test_y.detach().cpu().numpy())
# iterator for looping through networks
def net_iterator(eiv=eiv, seed_list=seed_list):
"""
Yields EiV models (if `eiv`) or
non-EiV models (if not `eiv`) for the seeds in
`seed_list` and `data`.
"""
if eiv:
# load parameters
init_std_y = conf_dict["init_std_y_list"][0]
unscaled_reg = conf_dict["unscaled_reg"]
p = conf_dict["p"]
hidden_layers = conf_dict["hidden_layers"]
fixed_std_x = conf_dict["fixed_std_x"]
net = Networks.FNNEIV(p=p, init_std_y=init_std_y,
h=[input_dim, *hidden_layers, output_dim],
fixed_std_x=fixed_std_x).to(device)
for seed in seed_list:
# load network paramaters
saved_file = os.path.join('saved_networks',
f'eiv_{short_dataname}'\
f'_init_std_y_{init_std_y:.3f}'\
f'_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\
f'_seed_{seed}.pkl')
train_and_store.open_stored_training(saved_file=saved_file,
net=net, device=device)
yield net
else:
# load parameters
init_std_y = conf_dict["init_std_y_list"][0]
unscaled_reg = conf_dict["unscaled_reg"]
p = conf_dict["p"]
hidden_layers = conf_dict["hidden_layers"]
net = Networks.FNNBer(p=p, init_std_y=init_std_y,
h=[input_dim, *hidden_layers, output_dim]).to(device)
for seed in seed_list:
saved_file = os.path.join('saved_networks',
f'noneiv_{short_dataname}'\
f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_seed_{seed}.pkl')
# load network paramaters
train_and_store.open_stored_training(saved_file=saved_file,
net=net, device=device)
yield net
# add feature dimension (if necessary)
x_range = x_range.view((-1, input_dim))
y_range = normalized_func(x_range)
# noisify
noisy_x_range = x_range + x_noise_strength * torch.randn(x_range.shape,
generator=torch.Generator().manual_seed(x_noise_seed))
# move to device for later processing
noisy_x_range = noisy_x_range.to(device)
# y values for noisy_x_range (not on device)
noisy_y_range = y_range + y_noise_strength *\
torch.randn(x_range.shape,
generator=torch.Generator().manual_seed(y_noise_seed))
# save in plotting_dictionary
plotting_dictionary['noisy_range_points'] =\
(noisy_x_range.detach().cpu().numpy(),
noisy_y_range.detach().cpu().numpy())
plotting_dictionary['range_points'] =\
(x_range.detach().cpu().numpy(),
y_range.detach().cpu().numpy())
# loop through networks and predict
mean_collection, unc_collection = [], []
for net in net_iterator(eiv=eiv):
mean, unc = net.predict_mean_and_unc(noisy_x_range,
number_of_draws=number_of_draws)[:2]
if len(mean.shape) > 1:
# assume only batch and output dimension
assert len(mean.shape) == 2
assert len(unc.shape) == 2
# assume one-dimensional outputs
assert mean.shape[1] == 1
assert unc.shape[1] == 1
# make one-dimensional
mean = mean.view((-1,))
unc = unc.view((-1,))
mean_collection.append(mean)
unc_collection.append(unc)
# stack collections along a new, first dimension
mean_collection = torch.stack(mean_collection, dim=0)
unc_collection = torch.stack(unc_collection, dim=0)
# save average in plotting_dictionary
plotting_dictionary['prediction'] =\
torch.mean(mean_collection, dim=0).detach().cpu().numpy()
plotting_dictionary['uncertainty'] =\
torch.mean(unc_collection, dim=0).detach().cpu().numpy()
return plotting_dictionary
data_list = ['linear','quadratic','cubic','sine'] # short datanames
list_x_range = [torch.linspace(-1.0,1.0, 50),
torch.linspace(-1.0,1.0, 50),
torch.linspace(-1.0,1.0, 50),
torch.linspace(-0.2,0.8, 50)]
list_color = [('red','blue')] * len(data_list)
list_number_of_draws = [((100,5), 100)] * len(data_list)
for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list,
list_x_range, list_color, list_number_of_draws)):
eiv_plotting_dictionary = compute_predictions_and_uncertainties(
data=data,
x_range=x_range,
eiv=True,
number_of_draws=number_of_draws[0])
noneiv_plotting_dictionary = compute_predictions_and_uncertainties(
data=data,
x_range=x_range,
eiv=False,
number_of_draws=number_of_draws[1])
input_dim = eiv_plotting_dictionary['input_dim']
if input_dim == 1:
plt.figure(i+1)
plt.clf()
x_values, y_values = eiv_plotting_dictionary['range_points']
plt.plot(x_values.flatten(), y_values.flatten(),'-', color='k')
eiv_pred = eiv_plotting_dictionary['prediction']
eiv_unc = eiv_plotting_dictionary['uncertainty']
plt.plot(x_values, eiv_pred,'-',
color=color[0])
plt.fill_between(x_values.flatten(), eiv_pred-k * eiv_unc,
eiv_pred + k * eiv_unc,
color=color[0], alpha=0.5)
noneiv_pred = noneiv_plotting_dictionary['prediction']
noneiv_unc = noneiv_plotting_dictionary['uncertainty']
plt.plot(x_values.flatten(), noneiv_pred,'-',
color=color[1])
plt.fill_between(x_values.flatten(), noneiv_pred-k * noneiv_unc,
noneiv_pred + k * noneiv_unc,
color=color[1], alpha=0.5)
else:
# multidimensional handling not included yet
pass
plt.show()
......@@ -41,15 +41,26 @@ p = conf_dict["p"]
lr_update = conf_dict["lr_update"]
# offset before updating sigma_y after each epoch
std_y_update_points = conf_dict["std_y_update_points"]
# will be used to predict the RMSE and update sigma_y accordingly
# will be used to predict, to compute the RMSE and update sigma_y accordingly
try:
eiv_number_of_forward_draws = conf_dict['eiv_number_of_forward_draws']
except KeyError:
eiv_number_of_forward_draws = 5
eiv_prediction_number_of_draws = conf_dict["eiv_prediction_number_of_draws"]
eiv_prediction_number_of_batches = \
conf_dict["eiv_prediction_number_of_batches"]
init_std_y_list = conf_dict["init_std_y_list"]
fixed_std_x = conf_dict['fixed_std_x']
gamma = conf_dict["gamma"]
hidden_layers = conf_dict["hidden_layers"]
seed_range = conf_dict['seed_range']
try:
normalize = conf_dict['normalize']
except KeyError:
# normalize by default
normalize = True
print(f"Training on {long_dataname} data")
......@@ -199,7 +210,7 @@ def train_on_data(init_std_y, seed):
set_seeds(seed)
# load Datasets
train_data, test_data = load_data(seed=seed, splitting_part=0.8,
normalize=True)
normalize=normalize)
# make dataloaders
train_dataloader = DataLoader(train_data, batch_size=batch_size,
shuffle=True)
......@@ -220,7 +231,9 @@ def train_on_data(init_std_y, seed):
# regularization
reg = unscaled_reg/len(train_data)
# create epoch_map
criterion = loss_functions.nll_eiv
def criterion(net, x, y, reg):
return loss_functions.nll_eiv(net, x, y, reg,
number_of_draws=eiv_number_of_forward_draws)
epoch_map = UpdatedTrainEpoch(train_dataloader=train_dataloader,
test_dataloader=test_dataloader,
criterion=criterion, std_y_map=std_y_map, std_x_map=std_x_map,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment