Skip to content
Snippets Groups Projects
Commit a4f80144 authored by Jörg Martin's avatar Jörg Martin
Browse files

First attempt to deal with ground truth

parent 594fc013
Branches
Tags
No related merge requests found
......@@ -31,11 +31,12 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False.
:returns: linear_trainset, linear_testset if return_ground_truth is False,
else linear_trainset, linear_testset, (true_x, true_y)
else linear_trainset, linear_testset, (true_train_x, true_train_y),
(true_test_x, true_test_y)
"""
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting
seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator)
seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator)
# create new generators from tensor seeds
create_generator = lambda tensor_seed:\
torch.Generator().manual_seed(tensor_seed.item())
......@@ -55,14 +56,18 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
true_x = (true_x-normalization_x[0])/normalization_x[1]
noisy_y = (noisy_y-normalization_y[0])/normalization_y[1]
true_y = (true_y-normalization_y[0])/normalization_y[1]
linear_dataset = TensorDataset(noisy_x, noisy_y)
dataset_len = len(linear_dataset)
dataset_len = noisy_x.shape[0]
train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len
linear_trainset, linear_testset = random_split(linear_dataset,
lengths=[train_len, test_len],
generator=create_generator(seeds[3]))
true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
linear_trainset = TensorDataset(noisy_train_x, noisy_train_y)
linear_testset = TensorDataset(noisy_test_x, noisy_test_y)
if not return_ground_truth:
return linear_trainset, linear_testset
else:
return linear_trainset, linear_testset, (true_x, true_y)
return linear_trainset, linear_testset, (true_train_x, true_train_y),\
(true_test_x, true_test_y)
......@@ -31,11 +31,12 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
:param return_ground_truth: Boolean. If True, the unnoisy ground truth will
also be returned. Defaults to False.
:returns: linear_trainset, linear_testset if return_ground_truth is False,
else linear_trainset, linear_testset, (true_x, true_y)
else linear_trainset, linear_testset, (true_train_x, true_train_y),
(true_test_x, true_test_y)
"""
random_generator = torch.Generator().manual_seed(seed)
# draw different seeds for noise and splitting
seeds = torch.randint(0,sys.maxsize,(4,), generator=random_generator)
seeds = torch.randint(0,sys.maxsize,(3,), generator=random_generator)
# create new generators from tensor seeds
create_generator = lambda tensor_seed:\
torch.Generator().manual_seed(tensor_seed.item())
......@@ -55,14 +56,18 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
true_x = (true_x-normalization_x[0])/normalization_x[1]
noisy_y = (noisy_y-normalization_y[0])/normalization_y[1]
true_y = (true_y-normalization_y[0])/normalization_y[1]
linear_dataset = TensorDataset(noisy_x, noisy_y)
dataset_len = len(linear_dataset)
dataset_len = noisy_x.shape[0]
train_len = int(dataset_len*splitting_part)
test_len = dataset_len - train_len
linear_trainset, linear_testset = random_split(linear_dataset,
lengths=[train_len, test_len],
generator=create_generator(seeds[3]))
true_train_x, true_test_x = torch.split(true_x, [train_len, test_len])
true_train_y, true_test_y = torch.split(true_y, [train_len, test_len])
noisy_train_x, noisy_test_x = torch.split(noisy_x, [train_len, test_len])
noisy_train_y, noisy_test_y = torch.split(noisy_y, [train_len, test_len])
linear_trainset = TensorDataset(noisy_train_x, noisy_train_y)
linear_testset = TensorDataset(noisy_test_x, noisy_test_y)
if not return_ground_truth:
return linear_trainset, linear_testset
else:
return linear_trainset, linear_testset, (true_x, true_y)
return linear_trainset, linear_testset, (true_train_x, true_train_y),\
(true_test_x, true_test_y)
......@@ -124,7 +124,8 @@ def collect_metrics(x,y, seed=0,
noneiv_metrics['coverage_normalized'],_ =\
epistemic_coverage(not_averaged_predictions, y, normalize_errors=True)
noneiv_metrics['res_std'] = normalized_std(not_averaged_predictions, y)
# metrics that need a ground truth
# NLL
......@@ -221,7 +222,14 @@ seed_list = range(noneiv_conf_dict["seed_range"][0],
noneiv_conf_dict["seed_range"][1])
max_batch_number = 2
for seed in tqdm(seed_list):
train_data, test_data = load_data(seed=seed)
try:
train_data, test_data, (true_train_x, true_train_y),\
(true_test_x, true_test_y) \
= load_data(seed=seed, return_ground_truth=True)
except TypeError:
train_data, test_data = load_data(seed=seed)
(true_train_x, true_train_y), (true_test_x, true_test_y)\
= (None,None), (None,None)
test_dataloader = DataLoader(test_data,
batch_size=int(np.min((len(test_data),
800))), shuffle=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment