Skip to content
Snippets Groups Projects
Commit e2c57f2d authored by Jörg Martin's avatar Jörg Martin
Browse files

Introduced normalize for normalized data

parent f9f0119d
No related branches found
No related tags found
No related merge requests found
Showing
with 105 additions and 43 deletions
......@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
seed_list=seeds[1:3],
normalize=normalize,
return_normalization=True)
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
if normalize:
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return func(x)
dataset_len = noisy_x.shape[0]
# shuffle via seed
......
......@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
seed_list=seeds[1:3],
normalize=normalize,
return_normalization=True)
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
if normalize:
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return func(x)
dataset_len = noisy_x.shape[0]
# shuffle via seed
......
......@@ -53,11 +53,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
seed_list=seeds[1:3],
normalize=normalize,
return_normalization=True)
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
if normalize:
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return func(x)
dataset_len = noisy_x.shape[0]
# shuffle via seed
......
......@@ -78,11 +78,15 @@ class repeated_sampling():
normalize=normalize,
normalization_list=[full_noisy_x, full_noisy_y],
return_normalization=False) # same normalization
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = self.func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
if normalize:
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = self.func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return self.func(x)
trainset = TensorDataset(noisy_train_x, noisy_train_y)
testset = TensorDataset(noisy_test_x, noisy_test_y)
true_trainset = TensorDataset(true_train_x, true_train_y,
......
......@@ -54,11 +54,15 @@ def load_data(seed=0, splitting_part=0.8, normalize=True,
seed_list=seeds[1:3],
normalize=normalize,
return_normalization=True)
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
if normalize:
def normalized_func(x):
unnormalized_x = unnormalize_tensor(x, normalization_list[0])
y = func(unnormalized_x)
normalized_y = normalize_tensor(y, normalization_list[1])
return normalized_y
else:
def normalized_func(x):
return func(x)
dataset_len = noisy_x.shape[0]
# shuffle via seed
......
{
"long_dataname": "cubic",
"short_dataname": "cubic",
"normalize": false,
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 800,
......
{
"long_dataname": "linear",
"short_dataname": "linear",
"normalize": false,
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 800,
......
{
"long_dataname": "quadratic",
"short_dataname": "quadratic",
"normalize": false,
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 800,
......
{
"long_dataname": "sine",
"short_dataname": "sine",
"normalize": false,
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 800,
......
{
"long_dataname": "cubic",
"short_dataname": "cubic",
"normalize": false,
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 800,
......
{
"long_dataname": "linear",
"short_dataname": "linear",
"normalize": false,
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 800,
......
{
"long_dataname": "quadratic",
"short_dataname": "quadratic",
"normalize": false,
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 800,
......
{
"long_dataname": "sine",
"short_dataname": "sine",
"normalize": false,
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 800,
......
......@@ -31,6 +31,12 @@ with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as conf_file:
eiv_conf_dict = json.load(conf_file)
with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as conf_file:
noneiv_conf_dict = json.load(conf_file)
try:
normalize = eiv_conf_dict['normalize']
assert normalize == noneiv_conf_dict['normalize']
except KeyError:
# normalize by default
normalize = True
long_dataname = eiv_conf_dict["long_dataname"]
short_dataname = eiv_conf_dict["short_dataname"]
......@@ -40,7 +46,7 @@ print(f"Evaluating {long_dataname}")
scale_outputs = False
load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
train_data, test_data = load_data()
train_data, test_data = load_data(normalize=normalize)
input_dim = train_data[0][0].numel()
output_dim = train_data[0][1].numel()
......@@ -240,7 +246,8 @@ def collect_metrics(x_y_pairs, seed=0,
def collect_full_seed_range_metrics(load_data,
seed_range,test_batch_size = 100, test_samples = 10,
noneiv_number_of_draws=100, eiv_number_of_draws=[100,5], device=device,
scale_outputs=scale_outputs):
scale_outputs=scale_outputs,
normalize=normalize):
"""
Collect metrics that need all seeds for their computation.
:param load_data: load_data map should take seed as an argument and,
......@@ -257,6 +264,7 @@ def collect_full_seed_range_metrics(load_data,
:param device: The torch.device to use
:param scale_output: Boolean, scale the outputs for some metrics. Defaults
to False.
:param normalize: Boolean, whether to normalize the data
:returns: Dictionaries noneiv_metrics, eiv_metrics
"""
noneiv_metrics = {}
......@@ -267,9 +275,10 @@ def collect_full_seed_range_metrics(load_data,
# load data according toseed
try:
train_data, test_data, true_train_data, true_test_data \
= load_data(seed=seed, return_ground_truth=True)
= load_data(seed=seed, return_ground_truth=True,
normalize=normalize)
except TypeError:
train_data, test_data = load_data(seed=seed)
train_data, test_data = load_data(seed=seed, normalize=normalize)
true_train_data, true_test_data = None, None
## Compute x-dependant bias
......@@ -460,9 +469,10 @@ number_of_test_samples = 2
for seed in tqdm(seed_list):
try:
train_data, test_data, true_train_data, true_test_data \
= load_data(seed=seed, return_ground_truth=True)
= load_data(seed=seed, return_ground_truth=True,
normalize=normalize)
except TypeError:
train_data, test_data = load_data(seed=seed)
train_data, test_data = load_data(seed=seed, normalize=normalize)
true_train_data, true_test_data = None, None
if true_test_data is None:
test_dataloader = DataLoader(test_data,
......
......@@ -48,6 +48,11 @@ def compute_coverages(data, eiv, number_of_draws,
long_dataname = conf_dict["long_dataname"]
short_dataname = conf_dict["short_dataname"]
try:
normalize = conf_dict['normalize']
except KeyError:
# normalize by default
normalize = True
load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
......@@ -73,12 +78,13 @@ def compute_coverages(data, eiv, number_of_draws,
# test whether there is a ground truth
try:
train_data, _, _,_ \
= load_data(seed=0, return_ground_truth=True)
= load_data(seed=0, return_ground_truth=True,
normalize=normalize)
except TypeError:
# if not, end function
return None,None
else:
train_data, _ = load_data()
train_data, _ = load_data(normalize=normalize)
print(f"Computing {'EiV' if eiv else 'non-EiV'} coverage for {long_dataname}")
......@@ -145,14 +151,15 @@ def compute_coverages(data, eiv, number_of_draws,
"""
for seed in seed_list:
if not use_ground_truth:
_, test_data = load_data(seed=seed)
_, test_data = load_data(seed=seed, normalize=normalize)
test_dataloader = DataLoader(test_data,
batch_size=batch_size,
shuffle=True)
yield test_dataloader
else:
_, _, _, true_test =\
load_data(seed=seed, return_ground_truth=True)
load_data(seed=seed, return_ground_truth=True,
normalize=normalize)
# take noisy x but unnoisy y
cut_true_test = VerticalCut(true_test,
components_to_pick=[2,1])
......
......@@ -98,7 +98,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
# determine dimensions
_, test_data = load_data(seed=plotting_seed, return_ground_truth=False)
_, test_data, normalized_func = load_data(seed=plotting_seed, return_ground_truth=False,
return_normalized_func=True)
input_dim = test_data[0][0].numel()
output_dim = test_data[0][1].numel()
assert output_dim == 1
......@@ -209,7 +210,8 @@ def compute_predictions_and_uncertainties(data, x_range, eiv, number_of_draws,
data_list = ['sine'] # short datanames
list_x_range = [torch.linspace(0.0,1.0, 50)]
#TODO: Check which ranges are "correct"
list_x_range = [torch.linspace(-2.5,2.5, 50)]
list_color = [('red','blue')]
list_number_of_draws = [((100,5), 100)]
for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list,
......@@ -229,11 +231,21 @@ for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list,
plt.figure(i)
plt.clf()
x_values, y_values = eiv_plotting_dictionary['range_points']
plt.plot(x_values, y_values,'-', color='k')
# plt.plot(x_values, eiv_plotting_dictionary['prediction'],'-',
# color=color[0])
# plt.plot(x_values, noneiv_plotting_dictionary['prediction'],'-',
# color=color[1])
plt.plot(x_values.flatten(), y_values.flatten(),'-', color='k')
eiv_pred = eiv_plotting_dictionary['prediction']
eiv_unc = eiv_plotting_dictionary['uncertainty']
plt.plot(x_values, eiv_pred,'-',
color=color[0])
plt.fill_between(x_values.flatten(), eiv_pred-k * eiv_unc,
eiv_pred + k * eiv_unc,
color=color[0], alpha=0.5)
noneiv_pred = noneiv_plotting_dictionary['prediction']
noneiv_unc = noneiv_plotting_dictionary['uncertainty']
plt.plot(x_values.flatten(), noneiv_pred,'-',
color=color[1])
plt.fill_between(x_values.flatten(), noneiv_pred-k * noneiv_unc,
noneiv_pred + k * noneiv_unc,
color=color[1], alpha=0.5)
else:
# multidimensional handling not included yet
pass
......
......@@ -50,6 +50,11 @@ fixed_std_x = conf_dict['fixed_std_x']
gamma = conf_dict["gamma"]
hidden_layers = conf_dict["hidden_layers"]
seed_range = conf_dict['seed_range']
try:
normalize = conf_dict['normalize']
except KeyError:
# normalize by default
normalize = True
print(f"Training on {long_dataname} data")
......@@ -199,7 +204,7 @@ def train_on_data(init_std_y, seed):
set_seeds(seed)
# load Datasets
train_data, test_data = load_data(seed=seed, splitting_part=0.8,
normalize=True)
normalize=normalize)
# make dataloaders
train_dataloader = DataLoader(train_data, batch_size=batch_size,
shuffle=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment