Skip to content
Snippets Groups Projects
linear.py 2.70 KiB
import torch
import sys
from torch.utils.data import TensorDataset

from EIVGeneral.manipulate_tensors import add_noise

total_number_of_datapoints = 2000
input_range = [-1,1]
slope = 1.0
intercept = 0.0
x_noise_strength = 0.05
y_noise_strength = 0.1

def load_data(seed=0, splitting_part=0.8, normalize=True,
        return_ground_truth=False):
    """
    Loads one-dimensional data
    :param seed: Seed for drawing and splitting the data.
    :param splitting_part: Which fraction of the data to use as training
    data. Defaults to 0.8.
    :param normalize: Whether to normalize the data, defaults to True.
    :param return_ground_truth: Boolean. If True, the unnoisy ground truth will
    also be returned. Defaults to False.
    :returns: linear_trainset, linear_testset if return_ground_truth is False,
    else linear_trainset, linear_testset,  true_linear_trainset,
    true_linear_testset. The later two return **four tensors**: The true x,y and
    their noisy counterparts.
    """
    random_generator = torch.Generator().manual_seed(seed)
    # draw different seeds for noise and splitting
    seeds = [int(t) for t in torch.randint(0,sys.maxsize,(3,),\
            generator=random_generator)]
    # create new generators from tensor seeds
    true_x = input_range[0] + (input_range[1]-input_range[0])\
                  * torch.rand((total_number_of_datapoints,1),
                          generator=torch.Generator().manual_seed(seeds[0]))
    true_y = slope * true_x + intercept 
    # add noise and normalize x and y
    (noisy_x, noisy_y), (true_x, true_y) = add_noise(
            tensor_list=(true_x, true_y),
            noise_strength_list=(x_noise_strength, y_noise_strength),
            seed_list=seeds[1:3],
            normalize=normalize)
    # create datasets
    dataset_len = noisy_x.shape[0]
    train_len = int(dataset_len*splitting_part)
    test_len = dataset_len - train_len
    true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
    true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
    noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
    noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
    linear_trainset = TensorDataset(noisy_train_x, noisy_train_y)
    linear_testset = TensorDataset(noisy_test_x, noisy_test_y)
    true_linear_trainset = TensorDataset(true_train_x, true_train_y,
            noisy_train_x, noisy_train_y)
    true_linear_testset = TensorDataset(true_test_x, true_test_y,
            noisy_test_x, noisy_test_y)
    if not return_ground_truth:
        return linear_trainset, linear_testset
    else:
        return linear_trainset, linear_testset, true_linear_trainset,\
            true_linear_testset