Skip to content
Snippets Groups Projects
Commit 20b8dad9 authored by Jörg Martin's avatar Jörg Martin
Browse files

cubic and sine dataset added

parent befe7634
Branches
Tags
No related merge requests found
import torch
import sys
from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise
total_number_of_datapoints = 2000
input_range = [-4,4]
slope = 1.0
intercept = 0.0
x_noise_strength = 0.05 * (input_range[1] - input_range[0])/2
y_noise_strength = 3
def load_data(seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False):
"""
Loads one-dimensional, cubic data as in Hernandez-Lobato, Adams 2015.
:param seed: Seed for drawing and splitting the data.
:param splitting_part: Which fraction of the data to use as training
data. Defaults to 0.8.
:param normalize: Whether to normalize the data, defaults to True.
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False.
:returns: cubic_trainset, cubic_testset if return_ground_truth is False,
else cubic_trainset, cubic_testset, true_cubic_trainset,
true_cubic_testset. The later two return **four tensors**: The true x,y and
their noisy counterparts.
"""
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
generator=random_generator)]
# create new generators from tensor seeds
true_x = input_range[0] + (input_range[1]-input_range[0])\
* torch.rand((total_number_of_datapoints,1),
generator=torch.Generator().manual_seed(seeds[0]))
true_y = slope * true_x**3 + intercept
# add noise and normalize x and y
(noisy_x, noisy_y), (true_x, true_y) = add_noise(
tensor_list=(true_x, true_y),
noise_strength_list=(x_noise_strength, y_noise_strength),
seed_list=seeds[1:3],
normalize=normalize)
# create datasets
dataset_len = noisy_x.shape[0]
train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len
true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
cubic_trainset = TensorDataset(noisy_train_x, noisy_train_y)
cubic_testset = TensorDataset(noisy_test_x, noisy_test_y)
true_cubic_trainset = TensorDataset(true_train_x, true_train_y,
noisy_train_x, noisy_train_y)
true_cubic_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y)
if not return_ground_truth:
return cubic_trainset, cubic_testset
else:
return cubic_trainset, cubic_testset, true_cubic_trainset,\
true_cubic_testset
import torch
import sys
from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise
total_number_of_datapoints = 2000
input_range = [-0.2,0.8]
intercept = 0.0
x_noise_strength = 0.02
y_noise_strength = 0.05
def load_data(seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False):
"""
Loads one-dimensional, sine shaped data as in Blundell et al. 2014.
:param seed: Seed for drawing and splitting the data.
:param splitting_part: Which fraction of the data to use as training
data. Defaults to 0.8.
:param normalize: Whether to normalize the data, defaults to True.
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False.
:returns: sine_trainset, sine_testset if return_ground_truth is False,
else sine_trainset, sine_testset, true_sine_trainset,
true_sine_testset. The later two return **four tensors**: The true x,y and
their noisy counterparts.
"""
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
generator=random_generator)]
# create new generators from tensor seeds
true_x = input_range[0] + (input_range[1]-input_range[0])\
* torch.rand((total_number_of_datapoints,1),
generator=torch.Generator().manual_seed(seeds[0]))
true_y = true_x +\
torch.sin(2 * torch.pi * true_x) +\
torch.sin(4 * torch.pi * true_x)
# add noise and normalize x and y
(noisy_x, noisy_y), (true_x, true_y) = add_noise(
tensor_list=(true_x, true_y),
noise_strength_list=(x_noise_strength, y_noise_strength),
seed_list=seeds[1:3],
normalize=normalize)
# create datasets
dataset_len = noisy_x.shape[0]
train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len
true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
sine_trainset = TensorDataset(noisy_train_x, noisy_train_y)
sine_testset = TensorDataset(noisy_test_x, noisy_test_y)
true_sine_trainset = TensorDataset(true_train_x, true_train_y,
noisy_train_x, noisy_train_y)
true_sine_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y)
if not return_ground_truth:
return sine_trainset, sine_testset
else:
return sine_trainset, sine_testset, true_sine_trainset,\
true_sine_testset
{
"long_dataname": "cubic",
"short_dataname": "cubic",
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.1,
"lr_update": 20,
"std_y_update_points": [1,40],
"eiv_prediction_number_of_draws": [100,5],
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [128, 128, 128, 128],
"fixed_std_x": 0.05,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "sine",
"short_dataname": "sine",
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.1,
"lr_update": 20,
"std_y_update_points": [1,40],
"eiv_prediction_number_of_draws": [100,5],
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.1],
"gamma": 0.5,
"hidden_layers": [128, 128, 128, 128],
"fixed_std_x": 0.02,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "cubic",
"short_dataname": "cubic",
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.1,
"lr_update": 20,
"std_y_update_points": [1,40] ,
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [128, 128, 128, 128],
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "sine",
"short_dataname": "sine",
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.1,
"lr_update": 20,
"std_y_update_points": [1,40] ,
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.1],
"gamma": 0.5,
"hidden_layers": [128, 128, 128, 128],
"seed_range": [0,10],
"gpu_number": 1
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment