From a4f801444f33d9eae043497e6f54198dd1af668f Mon Sep 17 00:00:00 2001 From: Joerg Martin <joerg.martin@ptb.de> Date: Fri, 17 Dec 2021 14:46:16 +0100 Subject: [PATCH] First attempt to deal with ground truth --- EIVPackage/EIVData/linear.py | 21 +++++++++++++-------- EIVPackage/EIVData/quadratic.py | 21 +++++++++++++-------- Experiments/evaluate_metrics.py | 12 ++++++++++-- 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/EIVPackage/EIVData/linear.py b/EIVPackage/EIVData/linear.py index b182590..89dd7a1 100644 --- a/EIVPackage/EIVData/linear.py +++ b/EIVPackage/EIVData/linear.py @@ -31,11 +31,12 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, :param return_ground_truth: Boolean. If True, the unnoisy ground truth will also be returned. Defaults to False. :returns: linear_trainset, linear_testset if return_ground_truth is False, - else linear_trainset, linear_testset, (true_x, true_y) + else linear_trainset, linear_testset, (true_train_x, true_train_y), + (true_test_x, true_test_y) """ random_generator = torch.Generator().manual_seed(seed) # draw different seeds for noise and splitting - seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator) + seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator) # create new generators from tensor seeds create_generator = lambda tensor_seed:\ torch.Generator().manual_seed(tensor_seed.item()) @@ -55,14 +56,18 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, true_x = (true_x-normalization_x[0])/normalization_x[1] noisy_y = (noisy_y-normalization_y[0])/normalization_y[1] true_y = (true_y-normalization_y[0])/normalization_y[1] - linear_dataset = TensorDataset(noisy_x, noisy_y) - dataset_len = len(linear_dataset) + dataset_len = noisy_x.shape[0] train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len - linear_trainset, linear_testset = random_split(linear_dataset, - lengths=[train_len, test_len], - generator=create_generator(seeds[3])) + true_train_x, true_test_x = torch.split(true_x, [train_len, test_len]) + true_train_y, true_test_y = torch.split(true_y, [train_len, test_len]) + noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len]) + noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len]) + linear_trainset = TensorDataset(noisy_train_x, noisy_train_y) + linear_testset = TensorDataset(noisy_test_x, noisy_test_y) if not return_ground_truth: return linear_trainset, linear_testset else: - return linear_trainset, linear_testset, (true_x, true_y) + return linear_trainset, linear_testset, (true_train_x, true_train_y),\ + (true_test_x, true_test_y) + diff --git a/EIVPackage/EIVData/quadratic.py b/EIVPackage/EIVData/quadratic.py index 9b817a5..eab8b6c 100644 --- a/EIVPackage/EIVData/quadratic.py +++ b/EIVPackage/EIVData/quadratic.py @@ -31,11 +31,12 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, :param return_ground_truth: Boolean. If True, the unnoisy ground truth will also be returned. Defaults to False. :returns: linear_trainset, linear_testset if return_ground_truth is False, - else linear_trainset, linear_testset, (true_x, true_y) + else linear_trainset, linear_testset, (true_train_x, true_train_y), + (true_test_x, true_test_y) """ random_generator = torch.Generator().manual_seed(seed) # draw different seeds for noise and splitting - seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator) + seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator) # create new generators from tensor seeds create_generator = lambda tensor_seed:\ torch.Generator().manual_seed(tensor_seed.item()) @@ -55,14 +56,18 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, true_x = (true_x-normalization_x[0])/normalization_x[1] noisy_y = (noisy_y-normalization_y[0])/normalization_y[1] true_y = (true_y-normalization_y[0])/normalization_y[1] - linear_dataset = TensorDataset(noisy_x, noisy_y) - dataset_len = len(linear_dataset) + dataset_len = noisy_x.shape[0] train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len - linear_trainset, linear_testset = random_split(linear_dataset, - lengths=[train_len, test_len], - generator=create_generator(seeds[3])) + true_train_x, true_test_x = torch.split(true_x, [train_len, test_len]) + true_train_y, true_test_y = torch.split(true_y, [train_len, test_len]) + noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len]) + noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len]) + linear_trainset = TensorDataset(noisy_train_x, noisy_train_y) + linear_testset = TensorDataset(noisy_test_x, noisy_test_y) if not return_ground_truth: return linear_trainset, linear_testset else: - return linear_trainset, linear_testset, (true_x, true_y) + return linear_trainset, linear_testset, (true_train_x, true_train_y),\ + (true_test_x, true_test_y) + diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py index d694558..31af058 100644 --- a/Experiments/evaluate_metrics.py +++ b/Experiments/evaluate_metrics.py @@ -124,7 +124,8 @@ def collect_metrics(x,y, seed=0, noneiv_metrics['coverage_normalized'],_ =\ epistemic_coverage(not_averaged_predictions, y, normalize_errors=True) noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y) - + + # metrics that need a ground truth # NLL @@ -221,7 +222,14 @@ seed_list = range(noneiv_conf_dict["seed_range"][0], noneiv_conf_dict["seed_range"][1]) max_batch_number = 2 for seed in tqdm(seed_list): - train_data, test_data = load_data(seed=seed) + try: + train_data, test_data, (true_train_x, true_train_y),\ + (true_test_x, true_test_y) \ + = load_data(seed=seed, return_ground_truth=True) + except TypeError: + train_data, test_data = load_data(seed=seed) + (true_train_x, true_train_y), (true_test_x, true_test_y)\ + = (None,None), (None,None) test_dataloader = DataLoader(test_data, batch_size=int(np.min((len(test_data), 800))), shuffle=True) -- GitLab