Skip to content
Snippets Groups Projects
Commit c8b6f1d4 authored by Jörg Martin's avatar Jörg Martin
Browse files

EiV predict fixed

parent 981f8329
No related branches found
No related tags found
Loading
......@@ -24,6 +24,9 @@ class FNNEIV(nn.Module):
:param h: A list specifying the number of neurons in each layer.
:param fixed_std_x: If given, this value will be the output of the method
`get_std_x()`, no matter what the deming factor.
:param repetition: Positive integer, the default value for repeating input,
defaults to 1. For a single call this can also be specified in the forward
method.
**Note**:
- To change the deming factor afterwards, use the method `change_deming`
- To change fixed_std_x afterwards, use the method `change_fixed_std_x`
......@@ -31,14 +34,14 @@ class FNNEIV(nn.Module):
LeakyReLUSlope = 1e-2
def __init__(self, p = 0.2, init_std_y=1.0, precision_prior_zeta=0.0,
deming=1.0, h=[10, 1024,1024,1024,1024, 1],
fixed_std_x = None):
fixed_std_x = None, repetition = 1):
super().__init__()
# part before Bernoulli dropout
self.init_std_y = init_std_y
InverseSoftplus = lambda sigma: torch.log(torch.exp(sigma) - 1 )
self.std_y_par = nn.parameter.Parameter(
InverseSoftplus(torch.tensor([init_std_y])))
self._repetition = 1
self._repetition = repetition
self.main = nn.Sequential(
EIVInput(precision_prior_zeta=precision_prior_zeta,
external_std_x=self.get_std_x),
......@@ -110,10 +113,11 @@ class FNNEIV(nn.Module):
return self._repetition
def forward(self, x, repetition=1):
old_repetition = self._repetition
self._repetition = repetition
mu = self.main(x)
sigma = self.sigma(mu)
self._repetition = 1
self._repetition = old_repetition
return mu, sigma
def regularizer(self, x, lamb):
......@@ -154,8 +158,10 @@ class FNNEIV(nn.Module):
regularization += self.main[0].neg_x_evidence(x)
return regularization
def predict(self, x, number_of_draws=100, remove_graph=True
# TODO: repetition
# + Test via breakpoints
def predict(self, x, number_of_draws=[100,5], number_of_parameter_chunks = None,
remove_graph=True
, take_average_of_prediction=True):
"""
Average over `number_of_draws` forward passes. If
......@@ -164,19 +170,46 @@ class FNNEIV(nn.Module):
**Note**: This method does neither touch the input noise nor Dropout.
The corresponding setting is left to the user!
:param x: A torch.tensor, the input
:param number_of_draws: Number of draws to obtain from x
:param number_of_draws: An integer or a list. If an integer
`number_of_draws`, will be converted internally to
`[number_of_draws,1]`.Numbers of draws to obtain from x via parameter
sampling (first element) and noise input sampling (second element).
:param number_of_parameter_chunks: An integer or None (default). If
None, the second element of `number_of_draws` will be taken (and will
thus be identical to 1 if `number_of_draws` is an integer, see above).
Samples in the parameter space will be divided into
`number_of_parameter_chunks` chunks when collected. Can be used to
reduced the memory usage.
:param remove_graph: If True (default) the output will
be detached to save memory
:param take_average_of_prediction: If False, no averaging will be
applied to the prediction and the second dimension of the first output
will count the number_of_draws.
"""
x, = repeat_tensors(x, number_of_draws=number_of_draws)
pred, sigma = self.forward(x)
if remove_graph:
pred, sigma = pred.detach(), sigma.detach()
pred, sigma = reshape_to_chunks(pred, sigma,
number_of_draws=number_of_draws)
# x, = repeat_tensors(x, number_of_draws=number_of_draws)
if type(number_of_draws) is int:
number_of_draws = [number_of_draws, 1]
if number_of_parameter_chunks is None:
number_of_parameter_chunks = number_of_draws[1]
chunk_size = number_of_draws[0] // number_of_parameter_chunks
pred_collection, sigma_collection = [], []
remaining_draws = number_of_draws[0]
while remaining_draws > 0:
if remaining_draws < chunk_size:
parameter_sample_size = remaining_draws
else:
parameter_sample_size = chunk_size
repeated_x, = repeat_tensors(x, number_of_draws=parameter_sample_size * number_of_draws[1])
pred, sigma = self.forward(repeated_x, repetition=number_of_draws[1])
if remove_graph:
pred, sigma = pred.detach(), sigma.detach()
pred, sigma = reshape_to_chunks(pred, sigma,
number_of_draws=parameter_sample_size * number_of_draws[1])
pred_collection.append(pred)
sigma_collection.append(pred)
remaining_draws -= parameter_sample_size
pred = torch.cat(pred_collection, dim=1)
sigma = torch.cat(sigma_collection, dim=1)
# reduce along the draws (actually redundant for sigma)
if take_average_of_prediction:
pred, sigma = torch.mean(pred, dim=1), torch.mean(sigma, dim=1)
......@@ -184,7 +217,10 @@ class FNNEIV(nn.Module):
sigma = torch.mean(sigma, dim=1)
return pred, sigma
def predictive_logdensity(self, x, y, number_of_draws=100, remove_graph=True,
# TODO: repetition
# + Implement
# + Test via breakpoints
def predictive_logdensity(self, x, y, number_of_draws=[100, 5], number_of_parameter_chunks = None, remove_graph=True,
average_batch_dimension=True, scale_labels=None,
decouple_dimensions=False):
"""
......@@ -193,7 +229,16 @@ class FNNEIV(nn.Module):
the batch dimension.
:param x: A torch.tensor, the input
:param y: A torch.tensor, labels on which to evaluate the density
:param number_of_draws: Number of draws to obtain from x
:param number_of_draws: An integer or a list. If an integer
`number_of_draws`, will be converted internally to
`[number_of_draws,1]`.Numbers of draws to obtain from x via parameter
sampling (first element) and noise input sampling (second element).
:param number_of_parameter_chunks: An integer or None (default). If
None, the second element of `number_of_draws` will be taken (and will
thus be identical to 1 if `number_of_draws` is an integer, see above).
Samples in the parameter space will be divided into
`number_of_parameter_chunks` chunks when collected. Can be used to
reduced the memory usage.
:param remove_graph: If True (default) the output will
be detached to save memory
:param average_batch_dimension: Boolean. If True (default) the values
......@@ -201,7 +246,9 @@ class FNNEIV(nn.Module):
dimension will be left untouched and all values will be returned.
"""
out, sigmas = self.predict(x, number_of_draws=number_of_draws,
take_average_of_prediction=False, remove_graph=remove_graph)
number_of_parameter_chunks=number_of_parameter_chunks,
remove_graph=remove_graph,
take_average_of_prediction=False)
# Add "repetition" dimension to y and out
y = y[:,None,...]
sigmas = sigmas[:,None,...]
......
......@@ -20,13 +20,13 @@ train_eiv = importlib.import_module(f'train_eiv_{short_dataname}')
train_data, test_data = load_data()
test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data),
800))))
64))), shuffle=True)
input_dim = train_data[0][0].numel()
output_dim = train_data[0][1].numel()
def collect_metrics(x,y, seed=0,
noneiv_number_of_draws=500, eiv_number_of_draws=500,
decouple_dimensions=False):
noneiv_number_of_draws=100, eiv_number_of_draws=100,
decouple_dimensions=False, device=torch.device('cuda:1')):
"""
:param x: A torch.tensor, taken as input
:param y: A torch.tensor, taken as output
......@@ -34,13 +34,14 @@ def collect_metrics(x,y, seed=0,
:param noneiv_number_of_draws: Number of draws for non-EiV model
for sampling from the posterior predictive. Defaults to 100.
:param noneiv_number_of_draws: Number of draws for EiV model
for sampling from the posterior predictive. Defaults to 500.
for sampling from the posterior predictive. Defaults to 100.
:param decouple_dimensions: Boolean. If True, the unsual convention
of Gal et al. is followed where, in the evaluation of the
log-posterior-predictive, each dimension is treated independently and then
averaged. If False (default), a multivariate distribution is used.
:returns: noneiv_rmse, noneiv_logdens,eiv_rmse, eiv_logdens
"""
x,y = x.to(device), y.to(device)
init_std_y = train_noneiv.init_std_y_list[0]
unscaled_reg = train_noneiv.unscaled_reg
p = train_noneiv.p
......@@ -50,9 +51,9 @@ def collect_metrics(x,y, seed=0,
f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_seed_{seed}.pkl')
net = Networks.FNNBer(p=p, init_std_y=init_std_y,
h=[input_dim, *hidden_layers, output_dim])
h=[input_dim, *hidden_layers, output_dim]).to(device)
train_and_store.open_stored_training(saved_file=saved_file,
net=net)
net=net, device=device)
# RMSE
......@@ -64,10 +65,10 @@ def collect_metrics(x,y, seed=0,
y = y.view((-1,1))
assert y.shape == out.shape
res = y-out
scale = train_data.dataset.std_labels
scale = train_data.dataset.std_labels.to(device)
scaled_res = res * scale.view((1,-1))
scaled_res = scaled_res.detach().cpu().numpy().flatten()
noneiv_rmse = np.sqrt(np.mean(scaled_res**2))
noneiv_rmse = np.sqrt(np.mean(scaled_res**2))
# NLL
......@@ -75,7 +76,9 @@ def collect_metrics(x,y, seed=0,
net.train()
noneiv_logdens = net.predictive_logdensity(x, y, number_of_draws=100,
decouple_dimensions=decouple_dimensions,
scale_labels=train_data.dataset.std_labels.view((-1,))).mean()
scale_labels=\
train_data.dataset.std_labels.view((-1,)).to(device)\
).mean().detach().cpu().numpy()
if training_state:
net.train()
else:
......@@ -88,11 +91,13 @@ def collect_metrics(x,y, seed=0,
hidden_layers = train_eiv.hidden_layers
fixed_std_x = train_eiv.fixed_std_x
saved_file = os.path.join('saved_networks',
f'eiv_{short_dataname}'\
f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_seed_{seed}.pkl')
f'eiv_energy'\
f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\
f'_seed_{seed}.pkl')
net = Networks.FNNEIV(p=p, init_std_y=init_std_y,
h=[input_dim, *hidden_layers, output_dim], fixed_std_x=fixed_std_x)
h=[input_dim, *hidden_layers, output_dim],
fixed_std_x=fixed_std_x).to(device)
train_and_store.open_stored_training(saved_file=saved_file,
net=net)
# RMSE
......@@ -106,10 +111,10 @@ def collect_metrics(x,y, seed=0,
y = y.view((-1,1))
assert y.shape == out.shape
res = y-out
scale = train_data.dataset.std_labels
scale = train_data.dataset.std_labels.to(device)
scaled_res = res * scale.view((1,-1))
scaled_res = scaled_res.detach().cpu().numpy().flatten()
eiv_rmse = np.sqrt(np.mean(scaled_res**2))
eiv_rmse = np.sqrt(np.mean(scaled_res**2))
if training_state:
net.train()
else:
......@@ -125,7 +130,9 @@ def collect_metrics(x,y, seed=0,
net.train()
eiv_logdens = net.predictive_logdensity(x, y, number_of_draws=100,
decouple_dimensions=decouple_dimensions,
scale_labels=train_data.dataset.std_labels.view((-1,))).mean()
scale_labels=\
train_data.dataset.std_labels.view((-1,)).to(device)\
).mean().detach().cpu().numpy()
if training_state:
net.train()
else:
......@@ -136,19 +143,32 @@ noneiv_rmse_collection = []
noneiv_logdens_collection = []
eiv_rmse_collection = []
eiv_logdens_collection = []
number_of_samples = 20
for _ in tqdm(range(number_of_samples)):
x,y = next(iter(test_dataloader))
noneiv_rmse, noneiv_logdens, eiv_rmse, eiv_logdens = collect_metrics(x,y)
noneiv_rmse_collection.append(noneiv_rmse)
noneiv_logdens_collection.append(noneiv_logdens)
eiv_rmse_collection.append(eiv_rmse)
eiv_logdens_collection.append(eiv_logdens)
num_test_epochs = 20
assert train_noneiv.seed_list == train_eiv.seed_list
seed_list = train_noneiv.seed_list
for seed in tqdm(seed_list):
train_data, test_data = load_data(seed=seed)
test_dataloader = DataLoader(test_data,
batch_size=int(np.max((len(test_data),
800))), shuffle=True)
for i in tqdm(range(num_test_epochs)):
for x,y in test_dataloader:
noneiv_rmse, noneiv_logdens, eiv_rmse, eiv_logdens =\
collect_metrics(x,y, seed=seed)
noneiv_rmse_collection.append(noneiv_rmse)
noneiv_logdens_collection.append(noneiv_logdens)
eiv_rmse_collection.append(eiv_rmse)
eiv_logdens_collection.append(eiv_logdens)
# TODO: Despite statistics, the fluctuations seem to be large
print('Non-EiV')
print(f'RMSE {np.mean(noneiv_rmse_collection):.3f} ({np.std(noneiv_rmse_collection)/np.sqrt(number_of_samples):.3f})')
print(f'LogDens {np.mean(noneiv_logdens_collection):.3f} ({np.std(noneiv_logdens_collection)/np.sqrt(number_of_samples):.3f})')
print(f'RMSE {np.mean(noneiv_rmse_collection):.3f}'\
f'({np.std(noneiv_rmse_collection)/np.sqrt(num_test_epochs):.3f})')
print(f'LogDens {np.mean(noneiv_logdens_collection):.3f}'\
f'({np.std(noneiv_logdens_collection)/np.sqrt(num_test_epochs):.3f})')
print('EiV')
print(f'RMSE {np.mean(eiv_rmse_collection):.3f} ({np.std(eiv_rmse_collection)/np.sqrt(number_of_samples):.3f})')
print(f'LogDens {np.mean(eiv_logdens_collection):.3f} ({np.std(eiv_logdens_collection)/np.sqrt(number_of_samples):.3f})')
print(f'RMSE {np.mean(eiv_rmse_collection):.3f}'\
f'({np.std(eiv_rmse_collection)/np.sqrt(num_test_epochs):.3f})')
print(f'LogDens {np.mean(eiv_logdens_collection):.3f}'\
f'({np.std(eiv_logdens_collection)/np.sqrt(num_test_epochs):.3f})')
......@@ -118,7 +118,7 @@ def train_on_data(init_std_y, seed):
net.apply(initialize_weights.glorot_init)
net = net.to(device)
net.std_y_par.requires_grad = False
std_x_map = lambda: 0.0
std_x_map = lambda: net.get_std_x().detach().cpu().item()
std_y_map = lambda: net.get_std_y().detach().cpu().item()
# regularization
reg = unscaled_reg/len(train_data)
......
......@@ -118,7 +118,7 @@ def train_on_data(init_std_y, seed):
net.apply(initialize_weights.glorot_init)
net = net.to(device)
net.std_y_par.requires_grad = False
std_x_map = lambda: 0.0
std_x_map = lambda: net.get_std_x().detach().cpu().item()
std_y_map = lambda: net.get_std_y().detach().cpu().item()
# regularization
reg = unscaled_reg/len(train_data)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment