diff --git a/EIVPackage/EIVData/quadratic.py b/EIVPackage/EIVData/quadratic.py new file mode 100644 index 0000000000000000000000000000000000000000..9b817a5445b4790417e4dd07a2cc15d44950eb94 --- /dev/null +++ b/EIVPackage/EIVData/quadratic.py @@ -0,0 +1,68 @@ +import torch +import sys +from torch.utils.data import TensorDataset, random_split + +total_number_of_datapoints = 2000 +input_range = [-1,1] +slope = 1.0 +intercept = 0.0 +x_noise_strength = 0.05 +y_noise_strength = 0.1 + +def get_normalization(*args): + """ + Returns the mean and standard deviations (in tuples) of the tensors in *args. + """ + normalization_collection = [] + for t in args: + t_mean = torch.mean(t, dim=0, keepdim=True) + t_std = torch.std(t, dim=0, keepdim=True) + normalization_collection.append((t_mean, t_std)) + return tuple(normalization_collection) + +def load_data(seed=0, splitting_part=0.8, normalize=True, + return_ground_truth=False): + """ + Loads one-dimensional data + :param seed: Seed for drawing and splitting the data. + :param splitting_part: Which fraction of the data to use as training + data. Defaults to 0.8. + :param normalize: Whether to normalize the data, defaults to True. + :param return_ground_truth: Boolean. If True, the unnoisy ground truth will + also be returned. Defaults to False. + :returns: linear_trainset, linear_testset if return_ground_truth is False, + else linear_trainset, linear_testset, (true_x, true_y) + """ + random_generator = torch.Generator().manual_seed(seed) + # draw different seeds for noise and splitting + seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator) + # create new generators from tensor seeds + create_generator = lambda tensor_seed:\ + torch.Generator().manual_seed(tensor_seed.item()) + true_x = input_range[0] + (input_range[1]-input_range[0])\ + * torch.rand((total_number_of_datapoints,1), + generator=create_generator(seeds[0])) + true_y = slope * true_x**2 + intercept + noisy_x = true_x + x_noise_strength * \ + torch.randn((total_number_of_datapoints,1), + generator=create_generator(seeds[1])) + noisy_y = true_y + y_noise_strength * \ + torch.randn((total_number_of_datapoints,1), + generator=create_generator(seeds[2])) + if normalize: + normalization_x, normalization_y = get_normalization(noisy_x, noisy_y) + noisy_x = (noisy_x-normalization_x[0])/normalization_x[1] + true_x = (true_x-normalization_x[0])/normalization_x[1] + noisy_y = (noisy_y-normalization_y[0])/normalization_y[1] + true_y = (true_y-normalization_y[0])/normalization_y[1] + linear_dataset = TensorDataset(noisy_x, noisy_y) + dataset_len = len(linear_dataset) + train_len = int(dataset_len*splitting_part) + test_len = dataset_len - train_len + linear_trainset, linear_testset = random_split(linear_dataset, + lengths=[train_len, test_len], + generator=create_generator(seeds[3])) + if not return_ground_truth: + return linear_trainset, linear_testset + else: + return linear_trainset, linear_testset, (true_x, true_y) diff --git a/Experiments/configurations/eiv_quadratic.json b/Experiments/configurations/eiv_quadratic.json new file mode 100644 index 0000000000000000000000000000000000000000..9b5c52e663e597e08114038a2336d5ae7b8c6659 --- /dev/null +++ b/Experiments/configurations/eiv_quadratic.json @@ -0,0 +1,21 @@ +{ + "long_dataname": "quadratic", + "short_dataname": "quadratic", + "lr": 1e-3, + "batch_size": 64, + "test_batch_size": 800, + "number_of_epochs": 100, + "unscaled_reg": 10, + "report_point": 5, + "p": 0.1, + "lr_update": 20, + "std_y_update_points": [1,40], + "eiv_prediction_number_of_draws": [100,5], + "eiv_prediction_number_of_batches": 10, + "init_std_y_list": [0.5], + "gamma": 0.5, + "hidden_layers": [128, 128, 128, 128], + "fixed_std_x": 0.05, + "seed_range": [0,10], + "gpu_number": 1 +} diff --git a/Experiments/configurations/noneiv_quadratic.json b/Experiments/configurations/noneiv_quadratic.json new file mode 100644 index 0000000000000000000000000000000000000000..573d7877bc9def4ce8b1ffba8a847495d09b7f47 --- /dev/null +++ b/Experiments/configurations/noneiv_quadratic.json @@ -0,0 +1,20 @@ +{ + "long_dataname": "quadratic", + "short_dataname": "quadratic", + "lr": 1e-3, + "batch_size": 64, + "test_batch_size": 800, + "number_of_epochs": 100, + "unscaled_reg": 10, + "report_point": 5, + "p": 0.1, + "lr_update": 20, + "std_y_update_points": [1,40] , + "noneiv_prediction_number_of_draws": 100, + "noneiv_prediction_number_of_batches": 10, + "init_std_y_list": [0.5], + "gamma": 0.5, + "hidden_layers": [128, 128, 128, 128], + "seed_range": [0,10], + "gpu_number": 1 +}