-
Jörg Martin authoredJörg Martin authored
linear.py 2.70 KiB
import torch
import sys
from torch.utils.data import TensorDataset
from EIVGeneral.manipulate_tensors import add_noise
total_number_of_datapoints = 2000
input_range = [-1,1]
slope = 1.0
intercept = 0.0
x_noise_strength = 0.05
y_noise_strength = 0.1
def load_data(seed=0, splitting_part=0.8, normalize=True,
return_ground_truth=False):
"""
Loads one-dimensional data
:param seed: Seed for drawing and splitting the data.
:param splitting_part: Which fraction of the data to use as training
data. Defaults to 0.8.
:param normalize: Whether to normalize the data, defaults to True.
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False.
:returns: linear_trainset, linear_testset if return_ground_truth is False,
else linear_trainset, linear_testset, true_linear_trainset,
true_linear_testset. The later two return **four tensors**: The true x,y and
their noisy counterparts.
"""
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting
seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
generator=random_generator)]
# create new generators from tensor seeds
true_x = input_range[0] + (input_range[1]-input_range[0])\
* torch.rand((total_number_of_datapoints,1),
generator=torch.Generator().manual_seed(seeds[0]))
true_y = slope * true_x + intercept
# add noise and normalize x and y
(noisy_x, noisy_y), (true_x, true_y) = add_noise(
tensor_list=(true_x, true_y),
noise_strength_list=(x_noise_strength, y_noise_strength),
seed_list=seeds[1:3],
normalize=normalize)
# create datasets
dataset_len = noisy_x.shape[0]
train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len
true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
linear_trainset = TensorDataset(noisy_train_x, noisy_train_y)
linear_testset = TensorDataset(noisy_test_x, noisy_test_y)
true_linear_trainset = TensorDataset(true_train_x, true_train_y,
noisy_train_x, noisy_train_y)
true_linear_testset = TensorDataset(true_test_x, true_test_y,
noisy_test_x, noisy_test_y)
if not return_ground_truth:
return linear_trainset, linear_testset
else:
return linear_trainset, linear_testset, true_linear_trainset,\
true_linear_testset