Skip to content
Snippets Groups Projects
Commit a4f80144 authored by Jörg Martin's avatar Jörg Martin
Browse files

First attempt to deal with ground truth

parent 594fc013
No related branches found
No related tags found
No related merge requests found
...@@ -31,11 +31,12 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, ...@@ -31,11 +31,12 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will :param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False. also be returned. Defaults to False.
:returns: linear_trainset, linear_testset if return_ground_truth is False, :returns: linear_trainset, linear_testset if return_ground_truth is False,
else linear_trainset, linear_testset, (true_x, true_y) else linear_trainset, linear_testset, (true_train_x, true_train_y),
(true_test_x, true_test_y)
""" """
random_generator = torch.Generator().manual_seed(seed) random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting # draw different seeds for noise and splitting
seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator) seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator)
# create new generators from tensor seeds # create new generators from tensor seeds
create_generator = lambda tensor_seed:\ create_generator = lambda tensor_seed:\
torch.Generator().manual_seed(tensor_seed.item()) torch.Generator().manual_seed(tensor_seed.item())
...@@ -55,14 +56,18 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, ...@@ -55,14 +56,18 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
true_x = (true_x-normalization_x[0])/normalization_x[1] true_x = (true_x-normalization_x[0])/normalization_x[1]
noisy_y = (noisy_y-normalization_y[0])/normalization_y[1] noisy_y = (noisy_y-normalization_y[0])/normalization_y[1]
true_y = (true_y-normalization_y[0])/normalization_y[1] true_y = (true_y-normalization_y[0])/normalization_y[1]
linear_dataset = TensorDataset(noisy_x, noisy_y) dataset_len = noisy_x.shape[0]
dataset_len = len(linear_dataset)
train_len = int(dataset_len*splitting_part) train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len test_len = dataset_len - train_len
linear_trainset, linear_testset = random_split(linear_dataset, true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
lengths=[train_len, test_len], true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
generator=create_generator(seeds[3])) noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
linear_trainset = TensorDataset(noisy_train_x, noisy_train_y)
linear_testset = TensorDataset(noisy_test_x, noisy_test_y)
if not return_ground_truth: if not return_ground_truth:
return linear_trainset, linear_testset return linear_trainset, linear_testset
else: else:
return linear_trainset, linear_testset, (true_x, true_y) return linear_trainset, linear_testset, (true_train_x, true_train_y),\
(true_test_x, true_test_y)
...@@ -31,11 +31,12 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, ...@@ -31,11 +31,12 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will :param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False. also be returned. Defaults to False.
:returns: linear_trainset, linear_testset if return_ground_truth is False, :returns: linear_trainset, linear_testset if return_ground_truth is False,
else linear_trainset, linear_testset, (true_x, true_y) else linear_trainset, linear_testset, (true_train_x, true_train_y),
(true_test_x, true_test_y)
""" """
random_generator = torch.Generator().manual_seed(seed) random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting # draw different seeds for noise and splitting
seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator) seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator)
# create new generators from tensor seeds # create new generators from tensor seeds
create_generator = lambda tensor_seed:\ create_generator = lambda tensor_seed:\
torch.Generator().manual_seed(tensor_seed.item()) torch.Generator().manual_seed(tensor_seed.item())
...@@ -55,14 +56,18 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, ...@@ -55,14 +56,18 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
true_x = (true_x-normalization_x[0])/normalization_x[1] true_x = (true_x-normalization_x[0])/normalization_x[1]
noisy_y = (noisy_y-normalization_y[0])/normalization_y[1] noisy_y = (noisy_y-normalization_y[0])/normalization_y[1]
true_y = (true_y-normalization_y[0])/normalization_y[1] true_y = (true_y-normalization_y[0])/normalization_y[1]
linear_dataset = TensorDataset(noisy_x, noisy_y) dataset_len = noisy_x.shape[0]
dataset_len = len(linear_dataset)
train_len = int(dataset_len*splitting_part) train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len test_len = dataset_len - train_len
linear_trainset, linear_testset = random_split(linear_dataset, true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
lengths=[train_len, test_len], true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
generator=create_generator(seeds[3])) noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
linear_trainset = TensorDataset(noisy_train_x, noisy_train_y)
linear_testset = TensorDataset(noisy_test_x, noisy_test_y)
if not return_ground_truth: if not return_ground_truth:
return linear_trainset, linear_testset return linear_trainset, linear_testset
else: else:
return linear_trainset, linear_testset, (true_x, true_y) return linear_trainset, linear_testset, (true_train_x, true_train_y),\
(true_test_x, true_test_y)
...@@ -124,7 +124,8 @@ def collect_metrics(x,y, seed=0, ...@@ -124,7 +124,8 @@ def collect_metrics(x,y, seed=0,
noneiv_metrics['coverage_normalized'],_ =\ noneiv_metrics['coverage_normalized'],_ =\
epistemic_coverage(not_averaged_predictions, y, normalize_errors=True) epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y) noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y)
# metrics that need a ground truth
# NLL # NLL
...@@ -221,7 +222,14 @@ seed_list = range(noneiv_conf_dict["seed_range"][0], ...@@ -221,7 +222,14 @@ seed_list = range(noneiv_conf_dict["seed_range"][0],
noneiv_conf_dict["seed_range"][1]) noneiv_conf_dict["seed_range"][1])
max_batch_number = 2 max_batch_number = 2
for seed in tqdm(seed_list): for seed in tqdm(seed_list):
train_data, test_data = load_data(seed=seed) try:
train_data, test_data, (true_train_x, true_train_y),\
(true_test_x, true_test_y) \
= load_data(seed=seed, return_ground_truth=True)
except TypeError:
train_data, test_data = load_data(seed=seed)
(true_train_x, true_train_y), (true_test_x, true_test_y)\
= (None,None), (None,None)
test_dataloader = DataLoader(test_data, test_dataloader = DataLoader(test_data,
batch_size=int(np.min((len(test_data), batch_size=int(np.min((len(test_data),
800))), shuffle=True) 800))), shuffle=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment