diff --git a/EIVPackage/EIVData/linear.py b/EIVPackage/EIVData/linear.py index b1825905acafc69332d1c01418eb7db00e0f28eb..89dd7a18a26f72293de31b39b1eb5f01ae88d0fe 100644 --- a/EIVPackage/EIVData/linear.py +++ b/EIVPackage/EIVData/linear.py @@ -31,11 +31,12 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, :param return_ground_truth: Boolean. If True, the unnoisy ground truth will also be returned. Defaults to False. :returns: linear_trainset, linear_testset if return_ground_truth is False, - else linear_trainset, linear_testset, (true_x, true_y) + else linear_trainset, linear_testset, (true_train_x, true_train_y), + (true_test_x, true_test_y) """ random_generator = torch.Generator().manual_seed(seed) # draw different seeds for noise and splitting - seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator) + seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator) # create new generators from tensor seeds create_generator = lambda tensor_seed:\ torch.Generator().manual_seed(tensor_seed.item()) @@ -55,14 +56,18 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, true_x = (true_x-normalization_x[0])/normalization_x[1] noisy_y = (noisy_y-normalization_y[0])/normalization_y[1] true_y = (true_y-normalization_y[0])/normalization_y[1] - linear_dataset = TensorDataset(noisy_x, noisy_y) - dataset_len = len(linear_dataset) + dataset_len = noisy_x.shape[0] train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len - linear_trainset, linear_testset = random_split(linear_dataset, - lengths=[train_len, test_len], - generator=create_generator(seeds[3])) + true_train_x, true_test_x = torch.split(true_x, [train_len, test_len]) + true_train_y, true_test_y = torch.split(true_y, [train_len, test_len]) + noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len]) + noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len]) + linear_trainset = TensorDataset(noisy_train_x, noisy_train_y) + linear_testset = TensorDataset(noisy_test_x, noisy_test_y) if not return_ground_truth: return linear_trainset, linear_testset else: - return linear_trainset, linear_testset, (true_x, true_y) + return linear_trainset, linear_testset, (true_train_x, true_train_y),\ + (true_test_x, true_test_y) + diff --git a/EIVPackage/EIVData/quadratic.py b/EIVPackage/EIVData/quadratic.py index 9b817a5445b4790417e4dd07a2cc15d44950eb94..eab8b6c6810371573d7885f13561d5c74e84fddb 100644 --- a/EIVPackage/EIVData/quadratic.py +++ b/EIVPackage/EIVData/quadratic.py @@ -31,11 +31,12 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, :param return_ground_truth: Boolean. If True, the unnoisy ground truth will also be returned. Defaults to False. :returns: linear_trainset, linear_testset if return_ground_truth is False, - else linear_trainset, linear_testset, (true_x, true_y) + else linear_trainset, linear_testset, (true_train_x, true_train_y), + (true_test_x, true_test_y) """ random_generator = torch.Generator().manual_seed(seed) # draw different seeds for noise and splitting - seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator) + seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator) # create new generators from tensor seeds create_generator = lambda tensor_seed:\ torch.Generator().manual_seed(tensor_seed.item()) @@ -55,14 +56,18 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, true_x = (true_x-normalization_x[0])/normalization_x[1] noisy_y = (noisy_y-normalization_y[0])/normalization_y[1] true_y = (true_y-normalization_y[0])/normalization_y[1] - linear_dataset = TensorDataset(noisy_x, noisy_y) - dataset_len = len(linear_dataset) + dataset_len = noisy_x.shape[0] train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len - linear_trainset, linear_testset = random_split(linear_dataset, - lengths=[train_len, test_len], - generator=create_generator(seeds[3])) + true_train_x, true_test_x = torch.split(true_x, [train_len, test_len]) + true_train_y, true_test_y = torch.split(true_y, [train_len, test_len]) + noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len]) + noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len]) + linear_trainset = TensorDataset(noisy_train_x, noisy_train_y) + linear_testset = TensorDataset(noisy_test_x, noisy_test_y) if not return_ground_truth: return linear_trainset, linear_testset else: - return linear_trainset, linear_testset, (true_x, true_y) + return linear_trainset, linear_testset, (true_train_x, true_train_y),\ + (true_test_x, true_test_y) + diff --git a/Experiments/evaluate_metrics.py b/Experiments/evaluate_metrics.py index d694558ba35333831660c26edab47bc0d3d014f1..31af058a19075c616231689acacdba39c053f14c 100644 --- a/Experiments/evaluate_metrics.py +++ b/Experiments/evaluate_metrics.py @@ -124,7 +124,8 @@ def collect_metrics(x,y, seed=0, noneiv_metrics['coverage_normalized'],_ =\ epistemic_coverage(not_averaged_predictions, y, normalize_errors=True) noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y) - + + # metrics that need a ground truth # NLL @@ -221,7 +222,14 @@ seed_list = range(noneiv_conf_dict["seed_range"][0], noneiv_conf_dict["seed_range"][1]) max_batch_number = 2 for seed in tqdm(seed_list): - train_data, test_data = load_data(seed=seed) + try: + train_data, test_data, (true_train_x, true_train_y),\ + (true_test_x, true_test_y) \ + = load_data(seed=seed, return_ground_truth=True) + except TypeError: + train_data, test_data = load_data(seed=seed) + (true_train_x, true_train_y), (true_test_x, true_test_y)\ + = (None,None), (None,None) test_dataloader = DataLoader(test_data, batch_size=int(np.min((len(test_data), 800))), shuffle=True)