Skip to content
Snippets Groups Projects
Commit e2c57f2d authored by Jörg Martin's avatar Jörg Martin
Browse files

Introduced normalize for normalized data

parent f9f0119d
No related branches found
No related tags found
No related merge requests found
Showing
with 105 additions and 43 deletions
...@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, ...@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
seed_list=seeds[1:3], seed_list=seeds[1:3],
normalize=normalize, normalize=normalize,
return_normalization=True) return_normalization=True)
def normalized_func(x): if normalize:
unnormalized_x = unnormalize_tensor(x, normalization_list[0]) def normalized_func(x):
y = func(unnormalized_x) unnormalized_x = unnormalize_tensor(x, normalization_list[0])
normalized_y = normalize_tensor(y, normalization_list[1]) y = func(unnormalized_x)
return normalized_y normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return func(x)
dataset_len = noisy_x.shape[0] dataset_len = noisy_x.shape[0]
# shuffle via seed # shuffle via seed
......
...@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, ...@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
seed_list=seeds[1:3], seed_list=seeds[1:3],
normalize=normalize, normalize=normalize,
return_normalization=True) return_normalization=True)
def normalized_func(x): if normalize:
unnormalized_x = unnormalize_tensor(x, normalization_list[0]) def normalized_func(x):
y = func(unnormalized_x) unnormalized_x = unnormalize_tensor(x, normalization_list[0])
normalized_y = normalize_tensor(y, normalization_list[1]) y = func(unnormalized_x)
return normalized_y normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return func(x)
dataset_len = noisy_x.shape[0] dataset_len = noisy_x.shape[0]
# shuffle via seed # shuffle via seed
......
...@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, ...@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
seed_list=seeds[1:3], seed_list=seeds[1:3],
normalize=normalize, normalize=normalize,
return_normalization=True) return_normalization=True)
def normalized_func(x): if normalize:
unnormalized_x = unnormalize_tensor(x, normalization_list[0]) def normalized_func(x):
y = func(unnormalized_x) unnormalized_x = unnormalize_tensor(x, normalization_list[0])
normalized_y = normalize_tensor(y, normalization_list[1]) y = func(unnormalized_x)
return normalized_y normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return func(x)
dataset_len = noisy_x.shape[0] dataset_len = noisy_x.shape[0]
# shuffle via seed # shuffle via seed
......
...@@ -78,11 +78,15 @@ class repeated_sampling(): ...@@ -78,11 +78,15 @@ class repeated_sampling():
normalize=normalize, normalize=normalize,
normalization_list=[full_noisy_x, full_noisy_y], normalization_list=[full_noisy_x, full_noisy_y],
return_normalization=False) # same normalization return_normalization=False) # same normalization
def normalized_func(x): if normalize:
unnormalized_x = unnormalize_tensor(x, normalization_list[0]) def normalized_func(x):
y = self.func(unnormalized_x) unnormalized_x = unnormalize_tensor(x, normalization_list[0])
normalized_y = normalize_tensor(y, normalization_list[1]) y = self.func(unnormalized_x)
return normalized_y normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return self.func(x)
trainset = TensorDataset(noisy_train_x, noisy_train_y) trainset = TensorDataset(noisy_train_x, noisy_train_y)
testset = TensorDataset(noisy_test_x, noisy_test_y) testset = TensorDataset(noisy_test_x, noisy_test_y)
true_trainset = TensorDataset(true_train_x, true_train_y, true_trainset = TensorDataset(true_train_x, true_train_y,
......
...@@ -54,11 +54,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True, ...@@ -54,11 +54,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
seed_list=seeds[1:3], seed_list=seeds[1:3],
normalize=normalize, normalize=normalize,
return_normalization=True) return_normalization=True)
def normalized_func(x): if normalize:
unnormalized_x = unnormalize_tensor(x, normalization_list[0]) def normalized_func(x):
y = func(unnormalized_x) unnormalized_x = unnormalize_tensor(x, normalization_list[0])
normalized_y = normalize_tensor(y, normalization_list[1]) y = func(unnormalized_x)
return normalized_y normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return func(x)
dataset_len = noisy_x.shape[0] dataset_len = noisy_x.shape[0]
# shuffle via seed # shuffle via seed
......
{ {
"long_dataname": "cubic", "long_dataname": "cubic",
"short_dataname": "cubic", "short_dataname": "cubic",
"normalize": false,
"lr": 1e-3, "lr": 1e-3,
"batch_size": 64, "batch_size": 64,
"test_batch_size": 800, "test_batch_size": 800,
......
{ {
"long_dataname": "linear", "long_dataname": "linear",
"short_dataname": "linear", "short_dataname": "linear",
"normalize": false,
"lr": 1e-3, "lr": 1e-3,
"batch_size": 64, "batch_size": 64,
"test_batch_size": 800, "test_batch_size": 800,
......
{ {
"long_dataname": "quadratic", "long_dataname": "quadratic",
"short_dataname": "quadratic", "short_dataname": "quadratic",
"normalize": false,
"lr": 1e-3, "lr": 1e-3,
"batch_size": 64, "batch_size": 64,
"test_batch_size": 800, "test_batch_size": 800,
......
{ {
"long_dataname": "sine", "long_dataname": "sine",
"short_dataname": "sine", "short_dataname": "sine",
"normalize": false,
"lr": 1e-3, "lr": 1e-3,
"batch_size": 64, "batch_size": 64,
"test_batch_size": 800, "test_batch_size": 800,
......
{ {
"long_dataname": "cubic", "long_dataname": "cubic",
"short_dataname": "cubic", "short_dataname": "cubic",
"normalize": false,
"lr": 1e-3, "lr": 1e-3,
"batch_size": 64, "batch_size": 64,
"test_batch_size": 800, "test_batch_size": 800,
......
{ {
"long_dataname": "linear", "long_dataname": "linear",
"short_dataname": "linear", "short_dataname": "linear",
"normalize": false,
"lr": 1e-3, "lr": 1e-3,
"batch_size": 64, "batch_size": 64,
"test_batch_size": 800, "test_batch_size": 800,
......
{ {
"long_dataname": "quadratic", "long_dataname": "quadratic",
"short_dataname": "quadratic", "short_dataname": "quadratic",
"normalize": false,
"lr": 1e-3, "lr": 1e-3,
"batch_size": 64, "batch_size": 64,
"test_batch_size": 800, "test_batch_size": 800,
......
{ {
"long_dataname": "sine", "long_dataname": "sine",
"short_dataname": "sine", "short_dataname": "sine",
"normalize": false,
"lr": 1e-3, "lr": 1e-3,
"batch_size": 64, "batch_size": 64,
"test_batch_size": 800, "test_batch_size": 800,
......
...@@ -31,6 +31,12 @@ with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as conf_file: ...@@ -31,6 +31,12 @@ with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as conf_file:
eiv_conf_dict = json.load(conf_file) eiv_conf_dict = json.load(conf_file)
with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as conf_file: with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as conf_file:
noneiv_conf_dict = json.load(conf_file) noneiv_conf_dict = json.load(conf_file)
try:
normalize = eiv_conf_dict['normalize']
assert normalize == noneiv_conf_dict['normalize']
except KeyError:
# normalize by default
normalize = True
long_dataname = eiv_conf_dict["long_dataname"] long_dataname = eiv_conf_dict["long_dataname"]
short_dataname = eiv_conf_dict["short_dataname"] short_dataname = eiv_conf_dict["short_dataname"]
...@@ -40,7 +46,7 @@ print(f"Evaluating {long_dataname}") ...@@ -40,7 +46,7 @@ print(f"Evaluating {long_dataname}")
scale_outputs = False scale_outputs = False
load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
train_data, test_data = load_data() train_data, test_data = load_data(normalize=normalize)
input_dim = train_data[0][0].numel() input_dim = train_data[0][0].numel()
output_dim = train_data[0][1].numel() output_dim = train_data[0][1].numel()
...@@ -240,7 +246,8 @@ def collect_metrics(x_y_pairs, seed=0, ...@@ -240,7 +246,8 @@ def collect_metrics(x_y_pairs, seed=0,
def collect_full_seed_range_metrics(load_data, def collect_full_seed_range_metrics(load_data,
seed_range,test_batch_size = 100, test_samples = 10, seed_range,test_batch_size = 100, test_samples = 10,
noneiv_number_of_draws=100, eiv_number_of_draws=[100,5], device=device, noneiv_number_of_draws=100, eiv_number_of_draws=[100,5], device=device,
scale_outputs=scale_outputs): scale_outputs=scale_outputs,
normalize=normalize):
""" """
Collect metrics that need all seeds for their computation. Collect metrics that need all seeds for their computation.
:param load_data: load_data map should take seed as an argument and, :param load_data: load_data map should take seed as an argument and,
...@@ -257,6 +264,7 @@ def collect_full_seed_range_metrics(load_data, ...@@ -257,6 +264,7 @@ def collect_full_seed_range_metrics(load_data,
:param device: The torch.device to use :param device: The torch.device to use
:param scale_output: Boolean, scale the outputs for some metrics. Defaults :param scale_output: Boolean, scale the outputs for some metrics. Defaults
to False. to False.
:param normalize: Boolean, whether to normalize the data
:returns: Dictionaries noneiv_metrics, eiv_metrics :returns: Dictionaries noneiv_metrics, eiv_metrics
""" """
noneiv_metrics = {} noneiv_metrics = {}
...@@ -267,9 +275,10 @@ def collect_full_seed_range_metrics(load_data, ...@@ -267,9 +275,10 @@ def collect_full_seed_range_metrics(load_data,
# load data according toseed # load data according toseed
try: try:
train_data, test_data, true_train_data, true_test_data \ train_data, test_data, true_train_data, true_test_data \
= load_data(seed=seed, return_ground_truth=True) = load_data(seed=seed, return_ground_truth=True,
normalize=normalize)
except TypeError: except TypeError:
train_data, test_data = load_data(seed=seed) train_data, test_data = load_data(seed=seed, normalize=normalize)
true_train_data, true_test_data = None, None true_train_data, true_test_data = None, None
## Compute x-dependant bias ## Compute x-dependant bias
...@@ -460,9 +469,10 @@ number_of_test_samples = 2 ...@@ -460,9 +469,10 @@ number_of_test_samples = 2
for seed in tqdm(seed_list): for seed in tqdm(seed_list):
try: try:
train_data, test_data, true_train_data, true_test_data \ train_data, test_data, true_train_data, true_test_data \
= load_data(seed=seed, return_ground_truth=True) = load_data(seed=seed, return_ground_truth=True,
normalize=normalize)
except TypeError: except TypeError:
train_data, test_data = load_data(seed=seed) train_data, test_data = load_data(seed=seed, normalize=normalize)
true_train_data, true_test_data = None, None true_train_data, true_test_data = None, None
if true_test_data is None: if true_test_data is None:
test_dataloader = DataLoader(test_data, test_dataloader = DataLoader(test_data,
......
...@@ -48,6 +48,11 @@ def compute_coverages(data, eiv, number_of_draws, ...@@ -48,6 +48,11 @@ def compute_coverages(data, eiv, number_of_draws,
long_dataname = conf_dict["long_dataname"] long_dataname = conf_dict["long_dataname"]
short_dataname = conf_dict["short_dataname"] short_dataname = conf_dict["short_dataname"]
try:
normalize = conf_dict['normalize']
except KeyError:
# normalize by default
normalize = True
load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
...@@ -73,12 +78,13 @@ def compute_coverages(data, eiv, number_of_draws, ...@@ -73,12 +78,13 @@ def compute_coverages(data, eiv, number_of_draws,
# test whether there is a ground truth # test whether there is a ground truth
try: try:
train_data, _, _,_ \ train_data, _, _,_ \
= load_data(seed=0, return_ground_truth=True) = load_data(seed=0, return_ground_truth=True,
normalize=normalize)
except TypeError: except TypeError:
# if not, end function # if not, end function
return None,None return None,None
else: else:
train_data, _ = load_data() train_data, _ = load_data(normalize=normalize)
print(f"Computing {'EiV' if eiv else 'non-EiV'} coverage for {long_dataname}") print(f"Computing {'EiV' if eiv else 'non-EiV'} coverage for {long_dataname}")
...@@ -145,14 +151,15 @@ def compute_coverages(data, eiv, number_of_draws, ...@@ -145,14 +151,15 @@ def compute_coverages(data, eiv, number_of_draws,
""" """
for seed in seed_list: for seed in seed_list:
if not use_ground_truth: if not use_ground_truth:
_, test_data = load_data(seed=seed) _, test_data = load_data(seed=seed, normalize=normalize)
test_dataloader = DataLoader(test_data, test_dataloader = DataLoader(test_data,
batch_size=batch_size, batch_size=batch_size,
shuffle=True) shuffle=True)
yield test_dataloader yield test_dataloader
else: else:
_, _, _, true_test =\ _, _, _, true_test =\
load_data(seed=seed, return_ground_truth=True) load_data(seed=seed, return_ground_truth=True,
normalize=normalize)
# take noisy x but unnoisy y # take noisy x but unnoisy y
cut_true_test = VerticalCut(true_test, cut_true_test = VerticalCut(true_test,
components_to_pick=[2,1]) components_to_pick=[2,1])
......
...@@ -98,7 +98,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws, ...@@ -98,7 +98,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
# determine dimensions # determine dimensions
_, test_data = load_data(seed=plotting_seed, return_ground_truth=False) _, test_data, normalized_func = load_data(seed=plotting_seed, return_ground_truth=False,
return_normalized_func=True)
input_dim = test_data[0][0].numel() input_dim = test_data[0][0].numel()
output_dim = test_data[0][1].numel() output_dim = test_data[0][1].numel()
assert output_dim == 1 assert output_dim == 1
...@@ -209,7 +210,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws, ...@@ -209,7 +210,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
data_list = ['sine'] # short datanames data_list = ['sine'] # short datanames
list_x_range = [torch.linspace(0.0,1.0, 50)] #TODO: Check which ranges are "correct"
list_x_range = [torch.linspace(-2.5,2.5, 50)]
list_color = [('red','blue')] list_color = [('red','blue')]
list_number_of_draws = [((100,5), 100)] list_number_of_draws = [((100,5), 100)]
for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list, for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list,
...@@ -229,11 +231,21 @@ for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list, ...@@ -229,11 +231,21 @@ for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list,
plt.figure(i) plt.figure(i)
plt.clf() plt.clf()
x_values, y_values = eiv_plotting_dictionary['range_points'] x_values, y_values = eiv_plotting_dictionary['range_points']
plt.plot(x_values, y_values,'-', color='k') plt.plot(x_values.flatten(), y_values.flatten(),'-', color='k')
# plt.plot(x_values, eiv_plotting_dictionary['prediction'],'-', eiv_pred = eiv_plotting_dictionary['prediction']
# color=color[0]) eiv_unc = eiv_plotting_dictionary['uncertainty']
# plt.plot(x_values, noneiv_plotting_dictionary['prediction'],'-', plt.plot(x_values, eiv_pred,'-',
# color=color[1]) color=color[0])
plt.fill_between(x_values.flatten(), eiv_pred-k * eiv_unc,
eiv_pred + k * eiv_unc,
color=color[0], alpha=0.5)
noneiv_pred = noneiv_plotting_dictionary['prediction']
noneiv_unc = noneiv_plotting_dictionary['uncertainty']
plt.plot(x_values.flatten(), noneiv_pred,'-',
color=color[1])
plt.fill_between(x_values.flatten(), noneiv_pred-k * noneiv_unc,
noneiv_pred + k * noneiv_unc,
color=color[1], alpha=0.5)
else: else:
# multidimensional handling not included yet # multidimensional handling not included yet
pass pass
......
...@@ -50,6 +50,11 @@ fixed_std_x = conf_dict['fixed_std_x'] ...@@ -50,6 +50,11 @@ fixed_std_x = conf_dict['fixed_std_x']
gamma = conf_dict["gamma"] gamma = conf_dict["gamma"]
hidden_layers = conf_dict["hidden_layers"] hidden_layers = conf_dict["hidden_layers"]
seed_range = conf_dict['seed_range'] seed_range = conf_dict['seed_range']
try:
normalize = conf_dict['normalize']
except KeyError:
# normalize by default
normalize = True
print(f"Training on {long_dataname} data") print(f"Training on {long_dataname} data")
...@@ -199,7 +204,7 @@ def train_on_data(init_std_y, seed): ...@@ -199,7 +204,7 @@ def train_on_data(init_std_y, seed):
set_seeds(seed) set_seeds(seed)
# load Datasets # load Datasets
train_data, test_data = load_data(seed=seed, splitting_part=0.8, train_data, test_data = load_data(seed=seed, splitting_part=0.8,
normalize=True) normalize=normalize)
# make dataloaders # make dataloaders
train_dataloader = DataLoader(train_data, batch_size=batch_size, train_dataloader = DataLoader(train_data, batch_size=batch_size,
shuffle=True) shuffle=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment