Skip to content
Snippets Groups Projects
Commit 981f8329 authored by Jörg Martin's avatar Jörg Martin
Browse files

EiV for energy and california

parent c07920e2
No related branches found
No related tags found
No related merge requests found
......@@ -14,7 +14,7 @@ class FNNEIV(nn.Module):
"""
A fully connected net with Error-in-Variables input and Bernoulli dropout
layers.
:param p: dropout rate, defaults to 0.5
:param p: dropout rate, defaults to 0.2
:param init_std_y: Initial estimated standard deviation for y.
:param precision_prior_zeta: precision of the prior for zeta.
Defaults to 0.0 (=improper prior)
......@@ -23,7 +23,7 @@ class FNNEIV(nn.Module):
`fixed_std_x` is different from `None`.
:param h: A list specifying the number of neurons in each layer.
:param fixed_std_x: If given, this value will be the output of the method
`get_std_x()`.
`get_std_x()`, no matter what the deming factor.
**Note**:
- To change the deming factor afterwards, use the method `change_deming`
- To change fixed_std_x afterwards, use the method `change_fixed_std_x`
......@@ -36,7 +36,7 @@ class FNNEIV(nn.Module):
# part before Bernoulli dropout
self.init_std_y = init_std_y
InverseSoftplus = lambda sigma: torch.log(torch.exp(sigma) - 1 )
self.std_y_par = nn.Parameter(
self.std_y_par = nn.parameter.Parameter(
InverseSoftplus(torch.tensor([init_std_y])))
self._repetition = 1
self.main = nn.Sequential(
......@@ -57,6 +57,9 @@ class FNNEIV(nn.Module):
nn.Linear(h[4], h[5]))
self.p = p
self._deming = deming
if fixed_std_x is not None:
if type(fixed_std_x) is not torch.tensor:
fixed_std_x = torch.tensor(fixed_std_x)
self._fixed_std_x = fixed_std_x
# needed for switch_noise_off()
self.noise_is_on = True
......@@ -76,6 +79,9 @@ class FNNEIV(nn.Module):
:param fixed_std_x: A positive float
"""
print('Updating fixed_std_x from %.3f to %.3f' % (self._fixed_std_x, fixed_std_x))
if fixed_std_x is not None:
if type(fixed_std_x) is not torch.tensor:
fixed_std_x = torch.tensor(fixed_std_x)
self._fixed_std_x = fixed_std_x
def noise_off(self):
......@@ -95,7 +101,7 @@ class FNNEIV(nn.Module):
else:
return self._fixed_std_x
else:
return 0.0
return torch.tensor(0.0, dtype=torch.float32)
def get_std_y(self):
return nn.Softplus()(self.std_y_par)
......@@ -178,6 +184,61 @@ class FNNEIV(nn.Module):
sigma = torch.mean(sigma, dim=1)
return pred, sigma
def predictive_logdensity(self, x, y, number_of_draws=100, remove_graph=True,
average_batch_dimension=True, scale_labels=None,
decouple_dimensions=False):
"""
Computes the logarithm of the predictive density evaluated at `y`. If
`average_batch_dimension` is `True` these values will be averaged over
the batch dimension.
:param x: A torch.tensor, the input
:param y: A torch.tensor, labels on which to evaluate the density
:param number_of_draws: Number of draws to obtain from x
:param remove_graph: If True (default) the output will
be detached to save memory
:param average_batch_dimension: Boolean. If True (default) the values
will be averaged over the batch dimension. If False, the batch
dimension will be left untouched and all values will be returned.
"""
out, sigmas = self.predict(x, number_of_draws=number_of_draws,
take_average_of_prediction=False, remove_graph=remove_graph)
# Add "repetition" dimension to y and out
y = y[:,None,...]
sigmas = sigmas[:,None,...]
if len(y.shape) <= 2:
# add an output axis if necessary
y = y[...,None]
sigmas = sigmas[...,None]
# squeeze last dimensions into one
y = y.view((*y.shape[:2], -1))
sigmas = sigmas.view((*sigmas.shape[:2], -1))
out = out.view((*out.shape[:2], -1))
# check if dimensions consistent
assert y.shape == sigmas.shape
assert y.shape[0] == out.shape[0]
assert y.shape[2] == out.shape[2]
if scale_labels is not None:
extended_scale_labels = scale_labels.flatten()[None,None,:]
out = out * extended_scale_labels
y = y * extended_scale_labels
sigmas = sigmas * extended_scale_labels
# exponential argument for density
if not decouple_dimensions:
exp_arg = torch.sum(-1/(2*sigmas**2) * (y-out)**2-\
1/2 * torch.log(2 * torch.pi * sigmas**2), dim=2)
else:
exp_arg = -1/(2*sigmas**2) * (y-out)**2-\
1/2 * torch.log(2 * torch.pi * sigmas**2)
# average over parameter values
predictive_log_density_values = \
torch.logsumexp(input=exp_arg, dim=1)\
- torch.log(torch.tensor(number_of_draws))
if average_batch_dimension:
return torch.mean(predictive_log_density_values, dim=0)
else:
return predictive_log_density_values
class FNNBer(nn.Module):
"""
A fully connected net Bernoulli dropout layers.
......@@ -191,7 +252,7 @@ class FNNBer(nn.Module):
# part before Bernoulli dropout
self.init_std_y = init_std_y
InverseSoftplus = lambda sigma: torch.log(torch.exp(sigma) - 1 )
self.std_y_par = nn.Parameter(
self.std_y_par = nn.parameter.Parameter(
InverseSoftplus(torch.tensor([init_std_y])))
self.main = nn.Sequential(
nn.Linear(h[0], h[1]),
......@@ -263,8 +324,9 @@ class FNNBer(nn.Module):
:param remove_graph: If True (default) the output will
be detached to save memory
:param take_average_of_prediction: If False, no averaging will be
applied to the prediction and the second dimension of the first output
applied to the prediction and the second dimension of the first output
will count the number_of_draws.
:returns: predictions, sigmas
"""
x, = repeat_tensors(x, number_of_draws=number_of_draws)
pred, sigma = self.forward(x)
......
......@@ -17,6 +17,7 @@ def nll_reg_loss(net, x, y, reg):
:param reg: A non-negative float, the regularization.
"""
out, std_y = net(x)
# Add label dimension to y if missing
if len(y.shape) <= 1:
y = y.view((-1,1))
assert out.shape == y.shape
......@@ -26,13 +27,11 @@ def nll_reg_loss(net, x, y, reg):
return neg_log_likelihood + regularization
def nll_eiv_no_jensen(net, x, y, reg, number_of_draws=5):
def nll_eiv(net, x, y, reg, number_of_draws=5):
"""
negative log likelihood criterion for an Error in variables model (EIV)
where `torch.logsumexp` is applied to partitions of size `number_of_draws`
of `mu` and `sigma` in the batch dimension (that is the first one).
**Note**: This function is supposed to be used in combination
of `repeat_tensors` with the same argument `number_of_draws`.
*Note that `reg` will not be divided by the data size (and by 2),
this should be done beforehand.*
:param mu: predicted mu
......@@ -40,13 +39,16 @@ def nll_eiv_no_jensen(net, x, y, reg, number_of_draws=5):
:param y: ground truth
:number_of_draws: Integer, supposed to be larger than 2
"""
# Add label dimension to y if missing
if len(y.shape) <= 1:
y = y.view((-1,1))
regularization = net.regularizer(x, lamb=reg)
# repeat_tensors
x, y = repeat_tensors(x, y, number_of_draws=number_of_draws)
pred, sigma = net(x, repetition=number_of_draws)
# split into chunks of size number_of_draws along batch dimension
pred, sigma, y = reshape_to_chunks(pred, sigma,
y, number_of_draws=number_of_draws)
pred, sigma, y = reshape_to_chunks(pred, sigma, y, number_of_draws=number_of_draws)
assert pred.shape == y.shape
# apply logsumexp to chunks and average the results
nll = -1 * (torch.logsumexp(-1 * sigma.log()
-((y-pred)**2)/(2*sigma**2), dim=1)
......
......@@ -9,9 +9,9 @@ from EIVArchitectures import Networks, initialize_weights
from EIVData.energy_efficiency import load_data
from EIVTrainingRoutines import train_and_store, loss_functions
print('Non-EiV')
from train_noneiv_energy import p, init_std_y_list, seed_list, unscaled_reg, hidden_layers
train_data, test_data = load_data()
test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), 800))))
......@@ -32,7 +32,9 @@ train_and_store.open_stored_training(saved_file=saved_file,
# RMSE
x,y = next(iter(test_dataloader))
out = net(x)[0]
training_state = net.training
net.train()
out, sigmas = net.predict(x, number_of_draws=100, take_average_of_prediction=True)
if len(y.shape) <=1:
y = y.view((-1,1))
assert y.shape == out.shape
......@@ -56,3 +58,64 @@ if training_state:
else:
net.eval()
print(f'Dropout predictive {logdens:.3f}')
print('EiV')
from train_eiv_energy import p, init_std_y_list, seed_list, unscaled_reg, hidden_layers, fixed_std_x
train_data, test_data = load_data()
test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), 800))))
seed = seed_list[0]
init_std_y = init_std_y_list[0]
saved_file = os.path.join('saved_networks',
f'eiv_energy'\
f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_seed_{seed}.pkl')
input_dim = train_data[0][0].numel()
output_dim = train_data[0][1].numel()
net = Networks.FNNEIV(p=p, init_std_y=init_std_y,
h=[input_dim, *hidden_layers, output_dim], fixed_std_x=fixed_std_x)
train_and_store.open_stored_training(saved_file=saved_file,
net=net)
# RMSE
x,y = next(iter(test_dataloader))
training_state = net.training
noise_state = net.noise_is_on
net.train()
net.noise_on()
out = net.predict(x, number_of_draws=500, take_average_of_prediction=True)[0]
if len(y.shape) <=1:
y = y.view((-1,1))
assert y.shape == out.shape
res = y-out
scale = train_data.dataset.std_labels
scaled_res = res * scale.view((1,-1))
scaled_res = scaled_res.detach().cpu().numpy().flatten()
rmse = np.sqrt(np.mean(scaled_res**2))
if training_state:
net.train()
else:
net.eval()
if noise_state:
net.noise_on()
else:
net.noise_off()
print(f'RMSE {rmse:.3f}')
# NLL
x,y = next(iter(test_dataloader))
training_state = net.training
net.train()
logdens = net.predictive_logdensity(x, y, number_of_draws=100,
decouple_dimensions=True,
scale_labels=train_data.dataset.std_labels.view((-1,))).mean()
if training_state:
net.train()
else:
net.eval()
print(f'Dropout predictive {logdens:.3f}')
import importlib
import os
import numpy as np
import torch
import torch.backends.cudnn
from torch.utils.data import DataLoader
from tqdm import tqdm
from EIVArchitectures import Networks
from EIVTrainingRoutines import train_and_store
long_dataname = 'energy_efficiency'
short_dataname = 'energy'
load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
train_noneiv = importlib.import_module(f'train_noneiv_{short_dataname}')
train_eiv = importlib.import_module(f'train_eiv_{short_dataname}')
train_data, test_data = load_data()
test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data),
800))))
input_dim = train_data[0][0].numel()
output_dim = train_data[0][1].numel()
def collect_metrics(x,y, seed=0,
noneiv_number_of_draws=500, eiv_number_of_draws=500,
decouple_dimensions=False):
"""
:param x: A torch.tensor, taken as input
:param y: A torch.tensor, taken as output
:param seed: Integer. The seed used for loading, defaults to 0.
:param noneiv_number_of_draws: Number of draws for non-EiV model
for sampling from the posterior predictive. Defaults to 100.
:param noneiv_number_of_draws: Number of draws for EiV model
for sampling from the posterior predictive. Defaults to 500.
:param decouple_dimensions: Boolean. If True, the unsual convention
of Gal et al. is followed where, in the evaluation of the
log-posterior-predictive, each dimension is treated independently and then
averaged. If False (default), a multivariate distribution is used.
:returns: noneiv_rmse, noneiv_logdens,eiv_rmse, eiv_logdens
"""
init_std_y = train_noneiv.init_std_y_list[0]
unscaled_reg = train_noneiv.unscaled_reg
p = train_noneiv.p
hidden_layers = train_noneiv.hidden_layers
saved_file = os.path.join('saved_networks',
f'noneiv_{short_dataname}'\
f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_seed_{seed}.pkl')
net = Networks.FNNBer(p=p, init_std_y=init_std_y,
h=[input_dim, *hidden_layers, output_dim])
train_and_store.open_stored_training(saved_file=saved_file,
net=net)
# RMSE
training_state = net.training
net.train()
out = net.predict(x, number_of_draws=noneiv_number_of_draws,
take_average_of_prediction=True)[0]
if len(y.shape) <= 1:
y = y.view((-1,1))
assert y.shape == out.shape
res = y-out
scale = train_data.dataset.std_labels
scaled_res = res * scale.view((1,-1))
scaled_res = scaled_res.detach().cpu().numpy().flatten()
noneiv_rmse = np.sqrt(np.mean(scaled_res**2))
# NLL
training_state = net.training
net.train()
noneiv_logdens = net.predictive_logdensity(x, y, number_of_draws=100,
decouple_dimensions=decouple_dimensions,
scale_labels=train_data.dataset.std_labels.view((-1,))).mean()
if training_state:
net.train()
else:
net.eval()
# EiV
init_std_y = train_eiv.init_std_y_list[0]
unscaled_reg = train_eiv.unscaled_reg
p = train_eiv.p
hidden_layers = train_eiv.hidden_layers
fixed_std_x = train_eiv.fixed_std_x
saved_file = os.path.join('saved_networks',
f'eiv_{short_dataname}'\
f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_seed_{seed}.pkl')
net = Networks.FNNEIV(p=p, init_std_y=init_std_y,
h=[input_dim, *hidden_layers, output_dim], fixed_std_x=fixed_std_x)
train_and_store.open_stored_training(saved_file=saved_file,
net=net)
# RMSE
training_state = net.training
noise_state = net.noise_is_on
net.train()
net.noise_on()
out = net.predict(x, number_of_draws=eiv_number_of_draws,
take_average_of_prediction=True)[0]
if len(y.shape) <=1:
y = y.view((-1,1))
assert y.shape == out.shape
res = y-out
scale = train_data.dataset.std_labels
scaled_res = res * scale.view((1,-1))
scaled_res = scaled_res.detach().cpu().numpy().flatten()
eiv_rmse = np.sqrt(np.mean(scaled_res**2))
if training_state:
net.train()
else:
net.eval()
if noise_state:
net.noise_on()
else:
net.noise_off()
# NLL
training_state = net.training
net.train()
eiv_logdens = net.predictive_logdensity(x, y, number_of_draws=100,
decouple_dimensions=decouple_dimensions,
scale_labels=train_data.dataset.std_labels.view((-1,))).mean()
if training_state:
net.train()
else:
net.eval()
return noneiv_rmse, noneiv_logdens, eiv_rmse, eiv_logdens
noneiv_rmse_collection = []
noneiv_logdens_collection = []
eiv_rmse_collection = []
eiv_logdens_collection = []
number_of_samples = 20
for _ in tqdm(range(number_of_samples)):
x,y = next(iter(test_dataloader))
noneiv_rmse, noneiv_logdens, eiv_rmse, eiv_logdens = collect_metrics(x,y)
noneiv_rmse_collection.append(noneiv_rmse)
noneiv_logdens_collection.append(noneiv_logdens)
eiv_rmse_collection.append(eiv_rmse)
eiv_logdens_collection.append(eiv_logdens)
print('Non-EiV')
print(f'RMSE {np.mean(noneiv_rmse_collection):.3f} ({np.std(noneiv_rmse_collection)/np.sqrt(number_of_samples):.3f})')
print(f'LogDens {np.mean(noneiv_logdens_collection):.3f} ({np.std(noneiv_logdens_collection)/np.sqrt(number_of_samples):.3f})')
print('EiV')
print(f'RMSE {np.mean(eiv_rmse_collection):.3f} ({np.std(eiv_rmse_collection)/np.sqrt(number_of_samples):.3f})')
print(f'LogDens {np.mean(eiv_logdens_collection):.3f} ({np.std(eiv_logdens_collection)/np.sqrt(number_of_samples):.3f})')
"""
Train EiV model on california housing dataset using different seeds
"""
import random
import os
import numpy as np
import torch
import torch.backends.cudnn
from torch.utils.data import DataLoader
from torch.utils.tensorboard.writer import SummaryWriter
from EIVArchitectures import Networks, initialize_weights
from EIVData.california_housing import load_data
from EIVTrainingRoutines import train_and_store, loss_functions
# hyperparameters
lr = 1e-3
batch_size = 200
test_batch_size = 800
number_of_epochs = 100
unscaled_reg = 10
report_point = 5
p = 0.1
lr_update = 20
# pretraining = 300
epoch_offset = 10
init_std_y_list = [0.5]
gamma = 0.5
hidden_layers = [1024, 1024, 1024, 1024]
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
fixed_std_x = 0.05
# reproducability
def set_seeds(seed):
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
seed_list = range(10)
# to store the RMSE
rmse_chain = []
class UpdatedTrainEpoch(train_and_store.TrainEpoch):
def pre_epoch_update(self, net, epoch):
"""
Overwrites the corresponding method
"""
if epoch == 0:
self.lr = self.initial_lr
self.optimizer = torch.optim.Adam(net.parameters(), lr=self.lr)
self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
self.optimizer, lr_update, gamma)
def post_epoch_update(self, net, epoch):
"""
Overwrites the corresponding method
"""
if epoch >= epoch_offset:
net.std_y_par.requires_grad = True
self.lr_scheduler.step()
def extra_report(self, net, i):
"""
Overwrites the corresponding method
and fed after initialization of this class
"""
rmse = self.rmse(net).item()
rmse_chain.append(rmse)
writer.add_scalar('RMSE', rmse, self.total_count)
writer.add_scalar('train loss', self.last_train_loss, self.total_count)
writer.add_scalar('test loss', self.last_test_loss, self.total_count)
print(f'RMSE {rmse:.3f}')
def rmse(self, net):
"""
Compute the root mean squared error for `net`
"""
net_train_state = net.training
net_noise_state = net.noise_is_on
net.eval()
net.noise_off()
x, y = next(iter(self.test_dataloader))
if len(y.shape) <= 1:
y = y.view((-1,1))
out = net(x.to(device))[0].detach().cpu()
assert out.shape == y.shape
if net_train_state:
net.train()
if net_noise_state:
net.noise_on()
return torch.sqrt(torch.mean((out-y)**2))
def train_on_data(init_std_y, seed):
"""
Sets `seed`, loads data and trains an Bernoulli Modell, starting with
`init_std_y`.
"""
# set seed
set_seeds(seed)
# load Datasets
train_data, test_data = load_data(seed=seed, splitting_part=0.8,
normalize=True)
# make dataloaders
train_dataloader = DataLoader(train_data, batch_size=batch_size,
shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=test_batch_size,
shuffle=True)
# create a net
input_dim = train_data[0][0].numel()
output_dim = train_data[0][1].numel()
net = Networks.FNNEIV(p=p,
init_std_y=init_std_y,
h=[input_dim, *hidden_layers, output_dim],
fixed_std_x=fixed_std_x)
net.apply(initialize_weights.glorot_init)
net = net.to(device)
net.std_y_par.requires_grad = False
std_x_map = lambda: 0.0
std_y_map = lambda: net.get_std_y().detach().cpu().item()
# regularization
reg = unscaled_reg/len(train_data)
# create epoch_map
criterion = loss_functions.nll_eiv
epoch_map = UpdatedTrainEpoch(train_dataloader=train_dataloader,
test_dataloader=test_dataloader,
criterion=criterion, std_y_map=std_y_map, std_x_map=std_x_map,
lr=lr, reg=reg, report_point=report_point, device=device)
# run and save
save_file = os.path.join('saved_networks',
f'eiv_california'\
f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\
f'_seed_{seed}.pkl')
train_and_store.train_and_store(net=net,
epoch_map=epoch_map,
number_of_epochs=number_of_epochs,
save_file=save_file)
if __name__ == '__main__':
for seed in seed_list:
# Tensorboard monitoring
writer = SummaryWriter(log_dir=f'/home/martin09/tmp/tensorboard/'\
f'run_eiv_california_lr_{lr:.4f}_seed'\
f'_{seed}_uregu_{unscaled_reg:.1f}_p_{p:.2f}'\
f'_fixed_std_x_{fixed_std_x:.3f}')
print(f'>>>>SEED: {seed}')
for init_std_y in init_std_y_list:
print(f'Using init_std_y={init_std_y:.3f}')
train_on_data(init_std_y, seed)
"""
Train EiV model on the energy efficiency dataset using different seeds
"""
import random
import os
import numpy as np
import torch
import torch.backends.cudnn
from torch.utils.data import DataLoader
from torch.utils.tensorboard.writer import SummaryWriter
from EIVArchitectures import Networks, initialize_weights
from EIVData.energy_efficiency import load_data
from EIVTrainingRoutines import train_and_store, loss_functions
# hyperparameters
lr = 1e-3
batch_size = 32
test_batch_size = 600
number_of_epochs = 600
unscaled_reg = 10
report_point = 5
p = 0.2
lr_update = 100
# pretraining = 300
epoch_offset = 100
init_std_y_list = [0.5]
gamma = 0.5
hidden_layers = [1024, 1024, 1024, 1024]
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
fixed_std_x = 0.05
# reproducability
def set_seeds(seed):
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
seed_list = range(10)
# to store the RMSE
rmse_chain = []
class UpdatedTrainEpoch(train_and_store.TrainEpoch):
def pre_epoch_update(self, net, epoch):
"""
Overwrites the corresponding method
"""
if epoch == 0:
self.lr = self.initial_lr
self.optimizer = torch.optim.Adam(net.parameters(), lr=self.lr)
self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
self.optimizer, lr_update, gamma)
def post_epoch_update(self, net, epoch):
"""
Overwrites the corresponding method
"""
if epoch >= epoch_offset:
net.std_y_par.requires_grad = True
self.lr_scheduler.step()
def extra_report(self, net, i):
"""
Overwrites the corresponding method
and fed after initialization of this class
"""
rmse = self.rmse(net).item()
rmse_chain.append(rmse)
writer.add_scalar('RMSE', rmse, self.total_count)
writer.add_scalar('train loss', self.last_train_loss, self.total_count)
writer.add_scalar('test loss', self.last_test_loss, self.total_count)
print(f'RMSE {rmse:.3f}')
def rmse(self, net):
"""
Compute the root mean squared error for `net`
"""
net_train_state = net.training
net_noise_state = net.noise_is_on
net.eval()
net.noise_off()
x, y = next(iter(self.test_dataloader))
if len(y.shape) <= 1:
y = y.view((-1,1))
out = net(x.to(device))[0].detach().cpu()
assert out.shape == y.shape
if net_train_state:
net.train()
if net_noise_state:
net.noise_on()
return torch.sqrt(torch.mean((out-y)**2))
def train_on_data(init_std_y, seed):
"""
Sets `seed`, loads data and trains an Bernoulli Modell, starting with
`init_std_y`.
"""
# set seed
set_seeds(seed)
# load Datasets
train_data, test_data = load_data(seed=seed, splitting_part=0.8,
normalize=True)
# make dataloaders
train_dataloader = DataLoader(train_data, batch_size=batch_size,
shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=test_batch_size,
shuffle=True)
# create a net
input_dim = train_data[0][0].numel()
output_dim = train_data[0][1].numel()
net = Networks.FNNEIV(p=p,
init_std_y=init_std_y,
h=[input_dim, *hidden_layers, output_dim],
fixed_std_x=fixed_std_x)
net.apply(initialize_weights.glorot_init)
net = net.to(device)
net.std_y_par.requires_grad = False
std_x_map = lambda: 0.0
std_y_map = lambda: net.get_std_y().detach().cpu().item()
# regularization
reg = unscaled_reg/len(train_data)
# create epoch_map
criterion = loss_functions.nll_eiv
epoch_map = UpdatedTrainEpoch(train_dataloader=train_dataloader,
test_dataloader=test_dataloader,
criterion=criterion, std_y_map=std_y_map, std_x_map=std_x_map,
lr=lr, reg=reg, report_point=report_point, device=device)
# run and save
save_file = os.path.join('saved_networks',
f'eiv_energy'\
f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\
f'_seed_{seed}.pkl')
train_and_store.train_and_store(net=net,
epoch_map=epoch_map,
number_of_epochs=number_of_epochs,
save_file=save_file)
if __name__ == '__main__':
for seed in seed_list:
# Tensorboard monitoring
writer = SummaryWriter(log_dir=f'/home/martin09/tmp/tensorboard/'\
f'run_eiv_energy_lr_{lr:.4f}_seed'\
f'_{seed}_uregu_{unscaled_reg:.1f}_p_{p:.2f}'\
f'_fixed_std_x_{fixed_std_x:.3f}')
print(f'>>>>SEED: {seed}')
for init_std_y in init_std_y_list:
print(f'Using init_std_y={init_std_y:.3f}')
train_on_data(init_std_y, seed)
......@@ -18,13 +18,13 @@ from EIVTrainingRoutines import train_and_store, loss_functions
lr = 1e-3
batch_size = 32
test_batch_size = 600
number_of_epochs = 300
number_of_epochs = 600
unscaled_reg = 10
report_point = 5
p = 0.2
lr_update = 50
lr_update = 100
# pretraining = 300
epoch_offset = 50
epoch_offset = 100
init_std_y_list = [0.5]
gamma = 0.5
hidden_layers = [1024, 1024, 1024, 1024]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment