Skip to content
Snippets Groups Projects
Commit d48d711b authored by Jörg Martin's avatar Jörg Martin
Browse files

non-EiV for California working, but performance only mediocre

parent c8735bcc
Branches
Tags
No related merge requests found
...@@ -278,3 +278,42 @@ class FNNBer(nn.Module): ...@@ -278,3 +278,42 @@ class FNNBer(nn.Module):
else: else:
sigma = torch.mean(sigma, dim=1) sigma = torch.mean(sigma, dim=1)
return pred, sigma return pred, sigma
class SmallFNNBer(FNNBer):
"""
A fully connected net Bernoulli dropout layers.
:param p: dropout rate, defaults to 0.5
:param init_std_y: Initial standard deviation for input y.
:param h: A list specifying the number of neurons in each layer.
"""
def __init__(self, p=0.2, init_std_y=1.0, h=[10, 1024,1024,1024, 1]):
super().__init__(p=p, init_std_y=init_std_y)
self.main = nn.Sequential(
nn.Linear(h[0], h[1]),
nn.LeakyReLU(self.LeakyReLUSlope),
nn.Dropout(p=p),
nn.Linear(h[1],h[2]),
nn.LeakyReLU(self.LeakyReLUSlope),
nn.Dropout(p=p),
nn.Linear(h[2],h[3]),
nn.LeakyReLU(self.LeakyReLUSlope),
nn.Dropout(p=p),
nn.Linear(h[3],h[4]))
class ShallowFNNBer(FNNBer):
"""
A fully connected net Bernoulli dropout layers.
:param p: dropout rate, defaults to 0.5
:param init_std_y: Initial standard deviation for input y.
:param h: A list specifying the number of neurons in each layer.
"""
def __init__(self, p=0.2, init_std_y=1.0, h=[10, 1024,1024, 1]):
super().__init__(p=p, init_std_y=init_std_y)
self.main = nn.Sequential(
nn.Linear(h[0], h[1]),
nn.LeakyReLU(self.LeakyReLUSlope),
nn.Dropout(p=p),
nn.Linear(h[1],h[2]),
nn.LeakyReLU(self.LeakyReLUSlope),
nn.Dropout(p=p),
nn.Linear(h[2],h[3]))
import torch.nn as nn
def normal_init(m, scale = 0.01):
"""
Initialize weights of layer `m` with `scale` as standard
deviation.
Biases will be set to 0.
:param m: A torch.nn object
*Example*:
net = nn.Sequential(nn.Linear(1,2), nn.Linear(2,1))
net.apply(normal_init)
"""
classname = m.__class__.__name__
# only initialize for Linear or Conv layers
if classname.find('Linear') != -1 or classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, scale)
nn.init.constant_(m.bias.data, 0.0)
def glorot_init(m, gain = 1):
"""
Initialize weights of layer `m` via `nn.init.xavier_uniform_(m, gain)`
and biases
with 0.
:param m: A torch.nn object
*Example*:
net = nn.Sequential(nn.Linear(1,2), nn.Linear(2,1))
net.apply(glorot_init)
"""
classname = m.__class__.__name__
# only initialize for Linear or Conv layers
if classname.find('Linear') != -1 or classname.find('Conv') != -1:
nn.init.xavier_uniform_(m.weight.data)
nn.init.constant_(m.bias.data, 0.0)
...@@ -54,10 +54,10 @@ class CSVData(Dataset): ...@@ -54,10 +54,10 @@ class CSVData(Dataset):
""" """
features_array = np.array(self.data_df) features_array = np.array(self.data_df)
labels_array = np.array(self.labels_df) labels_array = np.array(self.labels_df)
self.mean_features = torch.tensor(np.mean(features_array, axis=0)) self.mean_features = torch.tensor(np.mean(features_array, axis=0), dtype=torch.float32)
self.std_features = torch.tensor(np.std(features_array, axis=0)) self.std_features = torch.tensor(np.std(features_array, axis=0), dtype=torch.float32)
self.mean_labels = torch.tensor(np.mean(labels_array, axis=0)) self.mean_labels = torch.tensor(np.mean(labels_array, axis=0), dtype=torch.float32)
self.std_labels = torch.tensor(np.std(labels_array, axis=0)) self.std_labels = torch.tensor(np.std(labels_array, axis=0), dtype=torch.float32)
def normalize_sample(self, sample): def normalize_sample(self, sample):
""" """
...@@ -103,8 +103,8 @@ class CSVData(Dataset): ...@@ -103,8 +103,8 @@ class CSVData(Dataset):
def __getitem__(self, i): def __getitem__(self, i):
# returns a tuple of a tensor and the corresponding label # returns a tuple of a tensor and the corresponding label
assert 0 <= i and i<self.__len__() assert 0 <= i and i<self.__len__()
sample = (torch.tensor(np.array(self.data_df.iloc[i])), sample = (torch.tensor(np.array(self.data_df.iloc[i]), dtype=torch.float32),
torch.tensor(np.array(self.labels_df.iloc[i]))) torch.tensor(np.array(self.labels_df.iloc[i]), dtype=torch.float32))
if self.normalize: if self.normalize:
return self.normalize_sample(sample) return self.normalize_sample(sample)
else: else:
......
...@@ -9,14 +9,17 @@ from EIVGeneral.repetition import repeat_tensors, reshape_to_chunks ...@@ -9,14 +9,17 @@ from EIVGeneral.repetition import repeat_tensors, reshape_to_chunks
def nll_reg_loss(net, x, y, reg): def nll_reg_loss(net, x, y, reg):
""" """
Returns the neg log likelihood with an additional regularization term. Returns the neg log likelihood with an additional regularization term.
*Note that `reg` will not be divided by the data size (and by 2), **Note**: that `reg` will not be divided by the data size (and by 2),
this should be done beforehand.* this should be done beforehand.
:param net: A torch.nn.Module. :param net: A torch.nn.Module.
:param x: A torch.tensor, the input. :param x: A torch.tensor, the input.
:param y: A torch.tensor, the output. :param y: A torch.tensor, the output.
:param reg: A non-negative float, the regularization. :param reg: A non-negative float, the regularization.
""" """
out, std_y = net(x) out, std_y = net(x)
if len(y.shape) <= 1:
y = y.view((-1,1))
assert out.shape == y.shape
neg_log_likelihood = torch.mean(0.5* torch.log(2*pi*std_y**2) \ neg_log_likelihood = torch.mean(0.5* torch.log(2*pi*std_y**2) \
+ ((out-y)**2)/(2*std_y**2)) + ((out-y)**2)/(2*std_y**2))
regularization = net.regularizer(x, lamb=reg) regularization = net.regularizer(x, lamb=reg)
......
...@@ -40,6 +40,7 @@ class TrainEpoch(): ...@@ -40,6 +40,7 @@ class TrainEpoch():
# #
self.lr_generator = iter(self.next_lr()) self.lr_generator = iter(self.next_lr())
self.lr = None self.lr = None
self.total_count = 0
def next_lr(self): def next_lr(self):
while True: while True:
...@@ -91,6 +92,7 @@ class TrainEpoch(): ...@@ -91,6 +92,7 @@ class TrainEpoch():
stored_train_loss_to_average = [] stored_train_loss_to_average = []
stored_test_loss_to_average = [] stored_test_loss_to_average = []
for i, (x,y) in enumerate(self.train_dataloader): for i, (x,y) in enumerate(self.train_dataloader):
self.total_count += 1
# optimize on train data # optimize on train data
x, y = x.to(self.device), y.to(self.device) x, y = x.to(self.device), y.to(self.device)
loss = self.criterion(net, x, y, self.reg) loss = self.criterion(net, x, y, self.reg)
...@@ -125,6 +127,12 @@ class TrainEpoch(): ...@@ -125,6 +127,12 @@ class TrainEpoch():
std_x, std_x,
std_y std_y
)) ))
# to be used for extra reporting
self.last_train_loss = stored_train_loss[-1]
self.last_test_loss = stored_test_loss[-1]
self.last_std_x = std_x
self.last_std_y = std_y
# extra reporting
self.extra_report(net, i) self.extra_report(net, i)
stored_train_loss_to_average = [] stored_train_loss_to_average = []
stored_test_loss_to_average = [] stored_test_loss_to_average = []
......
"""
Train non-EiV model on california housing dataset using different seeds
"""
import random import random
import os import os
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.backends.cudnn
from torch.utils.data import DataLoader, TensorDataset from torch.utils.data import DataLoader
from torch.utils.tensorboard.writer import SummaryWriter
from EIVArchitectures import Networks from EIVArchitectures import Networks, initialize_weights
from EIVData.california_housing import load_data from EIVData.california_housing import load_data
from EIVTrainingRoutines import train_and_store, loss_functions from EIVTrainingRoutines import train_and_store, loss_functions
# hyperparameters # hyperparameters
lr = 1e-3 lr = 1e-3
batch_size = 25 batch_size = 200
number_of_epochs = 1000 test_batch_size = 800
reg = 1e-7 number_of_epochs = 100
unscaled_reg = 10
report_point = 5 report_point = 5
p = 0.5 p = 0.1
lr_update = 950 lr_update = 20
# pretraining = 300 # pretraining = 300
# epoch_offset = pretraining epoch_offset = 10
init_std_y_list = [0.15] init_std_y_list = [0.5]
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') gamma = 0.5
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
# reproducability # reproducability
torch.backends.cudnn.benchmark = False
def set_seeds(seed): def set_seeds(seed):
torch.backends.cudnn.benchmark = False
np.random.seed(seed) np.random.seed(seed)
random.seed(seed) random.seed(seed)
torch.manual_seed(seed) torch.manual_seed(seed)
seed_list = range(1) seed_list = [0,]
# to store the RMSE # to store the RMSE
rmse_chain = [] rmse_chain = []
...@@ -43,8 +49,7 @@ class UpdatedTrainEpoch(train_and_store.TrainEpoch): ...@@ -43,8 +49,7 @@ class UpdatedTrainEpoch(train_and_store.TrainEpoch):
self.lr = self.initial_lr self.lr = self.initial_lr
self.optimizer = torch.optim.Adam(net.parameters(), lr=self.lr) self.optimizer = torch.optim.Adam(net.parameters(), lr=self.lr)
self.lr_scheduler = torch.optim.lr_scheduler.StepLR( self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
self.optimizer, lr_update, 0.1 self.optimizer, lr_update, gamma)
)
def post_epoch_update(self, net, epoch): def post_epoch_update(self, net, epoch):
...@@ -53,67 +58,74 @@ class UpdatedTrainEpoch(train_and_store.TrainEpoch): ...@@ -53,67 +58,74 @@ class UpdatedTrainEpoch(train_and_store.TrainEpoch):
""" """
if epoch >= epoch_offset: if epoch >= epoch_offset:
net.std_y_par.requires_grad = True net.std_y_par.requires_grad = True
self.lr_scheduler.step() self.lr_scheduler.step()
def extra_report(self, net, i): def extra_report(self, net, i):
""" """
Overwrites the corresponding method Overwrites the corresponding method
**Note**: self.val_data_pure has to be defined explicitely and fed after initialization of this class
and fed after initialiaztion of this class
""" """
rmse = self.rmse(net).item() rmse = self.rmse(net).item()
rmse_chain.append(rmse) rmse_chain.append(rmse)
print('RMSE %.2f', rmse) writer.add_scalar('RMSE', rmse, self.total_count)
writer.add_scalar('train loss', self.last_train_loss, self.total_count)
writer.add_scalar('test loss', self.last_test_loss, self.total_count)
print(f'RMSE {rmse:.3f}')
def rmse(self, net): def rmse(self, net):
""" """
Compute the root mean squared error for `net` Compute the root mean squared error for `net`
""" """
mse = 0
net_train_state = net.training net_train_state = net.training
net.eval() net.eval()
x, y = self.val_data_pure x, y = next(iter(self.test_dataloader))
if len(y.shape) <= 1:
y = y.view((-1,1))
out = net(x.to(device))[0].detach().cpu() out = net(x.to(device))[0].detach().cpu()
assert out.shape == y.shape
if net_train_state: if net_train_state:
net.train() net.train()
return torch.sqrt(torch.mean((out-y)**2)) return torch.sqrt(torch.mean((out-y)**2))
def train_on_data(init_std_y, seed):
def train_on_data(std_x, init_std_y, seed):
""" """
Loads data associated with `std_x` and trains an Bernoulli Modell. Sets `seed`, loads data and trains an Bernoulli Modell, starting with
`init_std_y`.
""" """
# load Datasets # set seed
train_data_pure, train_data,\
test_data_pure,test_data,\
val_data_pure,val_data = \
generate_mexican_data.get_data(std_x=std_x,
std_y=std_y)[:-1]
train_data = TensorDataset(*train_data)
test_data = TensorDataset(*test_data)
set_seeds(seed) set_seeds(seed)
# make to dataloader # load Datasets
train_data, test_data = load_data(seed=seed, splitting_part=0.8,
normalize=True)
# make dataloaders
train_dataloader = DataLoader(train_data, batch_size=batch_size, train_dataloader = DataLoader(train_data, batch_size=batch_size,
shuffle=True) shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, test_dataloader = DataLoader(test_data, batch_size=test_batch_size,
shuffle=True) shuffle=True)
# Create a net # create a net
net = Networks.FNNBer(init_std_y=init_std_y) input_dim = train_data[0][0].numel()
output_dim = train_data[0][1].numel()
net = Networks.FNNBer(p=p,
init_std_y=init_std_y,
h=[input_dim, 1024, 1024, 1024, 1024, output_dim])
net.apply(initialize_weights.glorot_init)
net = net.to(device) net = net.to(device)
net.std_y_par.requires_grad = False net.std_y_par.requires_grad = False
std_x_map = lambda: 0.0 std_x_map = lambda: 0.0
std_y_map = lambda: net.get_std_y().detach().cpu().item() std_y_map = lambda: net.get_std_y().detach().cpu().item()
# Create epoch_map # regularization
reg = unscaled_reg/len(train_data)
# create epoch_map
criterion = loss_functions.nll_reg_loss criterion = loss_functions.nll_reg_loss
epoch_map = UpdatedTrainEpoch(train_dataloader=train_dataloader, epoch_map = UpdatedTrainEpoch(train_dataloader=train_dataloader,
test_dataloader=test_dataloader, test_dataloader=test_dataloader,
criterion=criterion, std_y_map=std_y_map, std_x_map=std_x_map, criterion=criterion, std_y_map=std_y_map, std_x_map=std_x_map,
lr=lr, reg=reg,report_point=report_point, device=device) lr=lr, reg=reg, report_point=report_point, device=device)
epoch_map.val_data_pure = val_data_pure
# run and save # run and save
save_file = os.path.join('saved_networks','noneiv_mexican_std_x_%.3f'\ save_file = os.path.join('saved_networks',
'_std_y_%.3f_init_std_y_%.3f_seed_%i.pkl'\ f'noneiv_california'\
% (std_x, std_y, init_std_y, int(seed))) f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_seed_{seed}.pkl')
train_and_store.train_and_store(net=net, train_and_store.train_and_store(net=net,
epoch_map=epoch_map, epoch_map=epoch_map,
number_of_epochs=number_of_epochs, number_of_epochs=number_of_epochs,
...@@ -122,11 +134,13 @@ def train_on_data(std_x, init_std_y, seed): ...@@ -122,11 +134,13 @@ def train_on_data(std_x, init_std_y, seed):
if __name__ == '__main__': if __name__ == '__main__':
for seed in seed_list: for seed in seed_list:
print('SEED: %i' % (seed,)) # Tensorboard monitoring
writer = SummaryWriter(log_dir=f'/home/martin09/tmp/tensorboard/'\
f'run_noneiv_california_lr_{lr:.4f}_seed'\
f'_{seed}_uregu_{unscaled_reg:.1f}_p_{p:.2f}')
print(f'>>>>SEED: {seed}')
for init_std_y in init_std_y_list: for init_std_y in init_std_y_list:
for std_x in std_x_list: print(f'Using init_std_y={init_std_y:.3f}')
print('->->Using std_x=%.2f and init_std_y=%.2f<-<-<-<-' train_on_data(init_std_y, seed)
%(std_x, init_std_y))
train_on_data(std_x, init_std_y, seed)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment