From d48d711b1427ce7ca11818f741428984c26f2cbb Mon Sep 17 00:00:00 2001 From: Joerg Martin <joerg.martin@ptb.de> Date: Thu, 25 Nov 2021 11:54:58 +0100 Subject: [PATCH] non-EiV for California working, but performance only mediocre --- EIVPackage/EIVArchitectures/Networks.py | 39 +++++++ .../EIVArchitectures/initialize_weights.py | 36 ++++++ EIVPackage/EIVData/csv_dataset.py | 12 +- .../EIVTrainingRoutines/loss_functions.py | 7 +- .../EIVTrainingRoutines/train_and_store.py | 8 ++ Experiments/train_eiv_carlifornia.py | 0 Experiments/train_noneiv_carlifornia.py | 108 ++++++++++-------- 7 files changed, 155 insertions(+), 55 deletions(-) create mode 100644 EIVPackage/EIVArchitectures/initialize_weights.py delete mode 100644 Experiments/train_eiv_carlifornia.py diff --git a/EIVPackage/EIVArchitectures/Networks.py b/EIVPackage/EIVArchitectures/Networks.py index 3e1d61f..3a1c35c 100644 --- a/EIVPackage/EIVArchitectures/Networks.py +++ b/EIVPackage/EIVArchitectures/Networks.py @@ -278,3 +278,42 @@ class FNNBer(nn.Module): else: sigma = torch.mean(sigma, dim=1) return pred, sigma + +class SmallFNNBer(FNNBer): + """ + A fully connected net Bernoulli dropout layers. + :param p: dropout rate, defaults to 0.5 + :param init_std_y: Initial standard deviation for input y. + :param h: A list specifying the number of neurons in each layer. + """ + def __init__(self, p=0.2, init_std_y=1.0, h=[10, 1024,1024,1024, 1]): + super().__init__(p=p, init_std_y=init_std_y) + self.main = nn.Sequential( + nn.Linear(h[0], h[1]), + nn.LeakyReLU(self.LeakyReLUSlope), + nn.Dropout(p=p), + nn.Linear(h[1],h[2]), + nn.LeakyReLU(self.LeakyReLUSlope), + nn.Dropout(p=p), + nn.Linear(h[2],h[3]), + nn.LeakyReLU(self.LeakyReLUSlope), + nn.Dropout(p=p), + nn.Linear(h[3],h[4])) + +class ShallowFNNBer(FNNBer): + """ + A fully connected net Bernoulli dropout layers. + :param p: dropout rate, defaults to 0.5 + :param init_std_y: Initial standard deviation for input y. + :param h: A list specifying the number of neurons in each layer. + """ + def __init__(self, p=0.2, init_std_y=1.0, h=[10, 1024,1024, 1]): + super().__init__(p=p, init_std_y=init_std_y) + self.main = nn.Sequential( + nn.Linear(h[0], h[1]), + nn.LeakyReLU(self.LeakyReLUSlope), + nn.Dropout(p=p), + nn.Linear(h[1],h[2]), + nn.LeakyReLU(self.LeakyReLUSlope), + nn.Dropout(p=p), + nn.Linear(h[2],h[3])) diff --git a/EIVPackage/EIVArchitectures/initialize_weights.py b/EIVPackage/EIVArchitectures/initialize_weights.py new file mode 100644 index 0000000..a87501d --- /dev/null +++ b/EIVPackage/EIVArchitectures/initialize_weights.py @@ -0,0 +1,36 @@ +import torch.nn as nn + +def normal_init(m, scale = 0.01): + """ + Initialize weights of layer `m` with `scale` as standard + deviation. + Biases will be set to 0. + :param m: A torch.nn object + + *Example*: + net = nn.Sequential(nn.Linear(1,2), nn.Linear(2,1)) + net.apply(normal_init) + """ + classname = m.__class__.__name__ + # only initialize for Linear or Conv layers + if classname.find('Linear') != -1 or classname.find('Conv') != -1: + nn.init.normal_(m.weight.data, 0.0, scale) + nn.init.constant_(m.bias.data, 0.0) + + +def glorot_init(m, gain = 1): + """ + Initialize weights of layer `m` via `nn.init.xavier_uniform_(m, gain)` + and biases + with 0. + :param m: A torch.nn object + + *Example*: + net = nn.Sequential(nn.Linear(1,2), nn.Linear(2,1)) + net.apply(glorot_init) + """ + classname = m.__class__.__name__ + # only initialize for Linear or Conv layers + if classname.find('Linear') != -1 or classname.find('Conv') != -1: + nn.init.xavier_uniform_(m.weight.data) + nn.init.constant_(m.bias.data, 0.0) diff --git a/EIVPackage/EIVData/csv_dataset.py b/EIVPackage/EIVData/csv_dataset.py index 5b584a2..cd0435d 100644 --- a/EIVPackage/EIVData/csv_dataset.py +++ b/EIVPackage/EIVData/csv_dataset.py @@ -54,10 +54,10 @@ class CSVData(Dataset): """ features_array = np.array(self.data_df) labels_array = np.array(self.labels_df) - self.mean_features = torch.tensor(np.mean(features_array, axis=0)) - self.std_features = torch.tensor(np.std(features_array, axis=0)) - self.mean_labels = torch.tensor(np.mean(labels_array, axis=0)) - self.std_labels = torch.tensor(np.std(labels_array, axis=0)) + self.mean_features = torch.tensor(np.mean(features_array, axis=0), dtype=torch.float32) + self.std_features = torch.tensor(np.std(features_array, axis=0), dtype=torch.float32) + self.mean_labels = torch.tensor(np.mean(labels_array, axis=0), dtype=torch.float32) + self.std_labels = torch.tensor(np.std(labels_array, axis=0), dtype=torch.float32) def normalize_sample(self, sample): """ @@ -103,8 +103,8 @@ class CSVData(Dataset): def __getitem__(self, i): # returns a tuple of a tensor and the corresponding label assert 0 <= i and i<self.__len__() - sample = (torch.tensor(np.array(self.data_df.iloc[i])), - torch.tensor(np.array(self.labels_df.iloc[i]))) + sample = (torch.tensor(np.array(self.data_df.iloc[i]), dtype=torch.float32), + torch.tensor(np.array(self.labels_df.iloc[i]), dtype=torch.float32)) if self.normalize: return self.normalize_sample(sample) else: diff --git a/EIVPackage/EIVTrainingRoutines/loss_functions.py b/EIVPackage/EIVTrainingRoutines/loss_functions.py index e8ab002..37b76fe 100644 --- a/EIVPackage/EIVTrainingRoutines/loss_functions.py +++ b/EIVPackage/EIVTrainingRoutines/loss_functions.py @@ -9,14 +9,17 @@ from EIVGeneral.repetition import repeat_tensors, reshape_to_chunks def nll_reg_loss(net, x, y, reg): """ Returns the neg log likelihood with an additional regularization term. - *Note that `reg` will not be divided by the data size (and by 2), - this should be done beforehand.* + **Note**: that `reg` will not be divided by the data size (and by 2), + this should be done beforehand. :param net: A torch.nn.Module. :param x: A torch.tensor, the input. :param y: A torch.tensor, the output. :param reg: A non-negative float, the regularization. """ out, std_y = net(x) + if len(y.shape) <= 1: + y = y.view((-1,1)) + assert out.shape == y.shape neg_log_likelihood = torch.mean(0.5* torch.log(2*pi*std_y**2) \ + ((out-y)**2)/(2*std_y**2)) regularization = net.regularizer(x, lamb=reg) diff --git a/EIVPackage/EIVTrainingRoutines/train_and_store.py b/EIVPackage/EIVTrainingRoutines/train_and_store.py index 8adc29e..eb0378e 100644 --- a/EIVPackage/EIVTrainingRoutines/train_and_store.py +++ b/EIVPackage/EIVTrainingRoutines/train_and_store.py @@ -40,6 +40,7 @@ class TrainEpoch(): # self.lr_generator = iter(self.next_lr()) self.lr = None + self.total_count = 0 def next_lr(self): while True: @@ -91,6 +92,7 @@ class TrainEpoch(): stored_train_loss_to_average = [] stored_test_loss_to_average = [] for i, (x,y) in enumerate(self.train_dataloader): + self.total_count += 1 # optimize on train data x, y = x.to(self.device), y.to(self.device) loss = self.criterion(net, x, y, self.reg) @@ -125,6 +127,12 @@ class TrainEpoch(): std_x, std_y )) + # to be used for extra reporting + self.last_train_loss = stored_train_loss[-1] + self.last_test_loss = stored_test_loss[-1] + self.last_std_x = std_x + self.last_std_y = std_y + # extra reporting self.extra_report(net, i) stored_train_loss_to_average = [] stored_test_loss_to_average = [] diff --git a/Experiments/train_eiv_carlifornia.py b/Experiments/train_eiv_carlifornia.py deleted file mode 100644 index e69de29..0000000 diff --git a/Experiments/train_noneiv_carlifornia.py b/Experiments/train_noneiv_carlifornia.py index 645b9da..6ce2673 100644 --- a/Experiments/train_noneiv_carlifornia.py +++ b/Experiments/train_noneiv_carlifornia.py @@ -1,35 +1,41 @@ +""" +Train non-EiV model on california housing dataset using different seeds +""" import random import os import numpy as np import torch -import torch.nn as nn -from torch.utils.data import DataLoader, TensorDataset +import torch.backends.cudnn +from torch.utils.data import DataLoader +from torch.utils.tensorboard.writer import SummaryWriter -from EIVArchitectures import Networks +from EIVArchitectures import Networks, initialize_weights from EIVData.california_housing import load_data from EIVTrainingRoutines import train_and_store, loss_functions # hyperparameters lr = 1e-3 -batch_size = 25 -number_of_epochs = 1000 -reg = 1e-7 +batch_size = 200 +test_batch_size = 800 +number_of_epochs = 100 +unscaled_reg = 10 report_point = 5 -p = 0.5 -lr_update = 950 +p = 0.1 +lr_update = 20 # pretraining = 300 -# epoch_offset = pretraining -init_std_y_list = [0.15] -device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') +epoch_offset = 10 +init_std_y_list = [0.5] +gamma = 0.5 +device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu') # reproducability -torch.backends.cudnn.benchmark = False def set_seeds(seed): + torch.backends.cudnn.benchmark = False np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) -seed_list = range(1) +seed_list = [0,] # to store the RMSE rmse_chain = [] @@ -43,8 +49,7 @@ class UpdatedTrainEpoch(train_and_store.TrainEpoch): self.lr = self.initial_lr self.optimizer = torch.optim.Adam(net.parameters(), lr=self.lr) self.lr_scheduler = torch.optim.lr_scheduler.StepLR( - self.optimizer, lr_update, 0.1 - ) + self.optimizer, lr_update, gamma) def post_epoch_update(self, net, epoch): @@ -53,67 +58,74 @@ class UpdatedTrainEpoch(train_and_store.TrainEpoch): """ if epoch >= epoch_offset: net.std_y_par.requires_grad = True - self.lr_scheduler.step() + self.lr_scheduler.step() def extra_report(self, net, i): """ Overwrites the corresponding method - **Note**: self.val_data_pure has to be defined explicitely - and fed after initialiaztion of this class + and fed after initialization of this class """ rmse = self.rmse(net).item() rmse_chain.append(rmse) - print('RMSE %.2f', rmse) + writer.add_scalar('RMSE', rmse, self.total_count) + writer.add_scalar('train loss', self.last_train_loss, self.total_count) + writer.add_scalar('test loss', self.last_test_loss, self.total_count) + print(f'RMSE {rmse:.3f}') def rmse(self, net): """ Compute the root mean squared error for `net` """ - mse = 0 net_train_state = net.training net.eval() - x, y = self.val_data_pure + x, y = next(iter(self.test_dataloader)) + if len(y.shape) <= 1: + y = y.view((-1,1)) out = net(x.to(device))[0].detach().cpu() + assert out.shape == y.shape if net_train_state: net.train() return torch.sqrt(torch.mean((out-y)**2)) - -def train_on_data(std_x, init_std_y, seed): +def train_on_data(init_std_y, seed): """ - Loads data associated with `std_x` and trains an Bernoulli Modell. + Sets `seed`, loads data and trains an Bernoulli Modell, starting with + `init_std_y`. """ - # load Datasets - train_data_pure, train_data,\ - test_data_pure,test_data,\ - val_data_pure,val_data = \ - generate_mexican_data.get_data(std_x=std_x, - std_y=std_y)[:-1] - train_data = TensorDataset(*train_data) - test_data = TensorDataset(*test_data) + # set seed set_seeds(seed) - # make to dataloader + # load Datasets + train_data, test_data = load_data(seed=seed, splitting_part=0.8, + normalize=True) + # make dataloaders train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True) - test_dataloader = DataLoader(test_data, batch_size=batch_size, + test_dataloader = DataLoader(test_data, batch_size=test_batch_size, shuffle=True) - # Create a net - net = Networks.FNNBer(init_std_y=init_std_y) + # create a net + input_dim = train_data[0][0].numel() + output_dim = train_data[0][1].numel() + net = Networks.FNNBer(p=p, + init_std_y=init_std_y, + h=[input_dim, 1024, 1024, 1024, 1024, output_dim]) + net.apply(initialize_weights.glorot_init) net = net.to(device) net.std_y_par.requires_grad = False std_x_map = lambda: 0.0 std_y_map = lambda: net.get_std_y().detach().cpu().item() - # Create epoch_map + # regularization + reg = unscaled_reg/len(train_data) + # create epoch_map criterion = loss_functions.nll_reg_loss epoch_map = UpdatedTrainEpoch(train_dataloader=train_dataloader, test_dataloader=test_dataloader, criterion=criterion, std_y_map=std_y_map, std_x_map=std_x_map, - lr=lr, reg=reg,report_point=report_point, device=device) - epoch_map.val_data_pure = val_data_pure + lr=lr, reg=reg, report_point=report_point, device=device) # run and save - save_file = os.path.join('saved_networks','noneiv_mexican_std_x_%.3f'\ - '_std_y_%.3f_init_std_y_%.3f_seed_%i.pkl'\ - % (std_x, std_y, init_std_y, int(seed))) + save_file = os.path.join('saved_networks', + f'noneiv_california'\ + f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ + f'_p_{p:.2f}_seed_{seed}.pkl') train_and_store.train_and_store(net=net, epoch_map=epoch_map, number_of_epochs=number_of_epochs, @@ -122,11 +134,13 @@ def train_on_data(std_x, init_std_y, seed): if __name__ == '__main__': for seed in seed_list: - print('SEED: %i' % (seed,)) + # Tensorboard monitoring + writer = SummaryWriter(log_dir=f'/home/martin09/tmp/tensorboard/'\ + f'run_noneiv_california_lr_{lr:.4f}_seed'\ + f'_{seed}_uregu_{unscaled_reg:.1f}_p_{p:.2f}') + print(f'>>>>SEED: {seed}') for init_std_y in init_std_y_list: - for std_x in std_x_list: - print('->->Using std_x=%.2f and init_std_y=%.2f<-<-<-<-' - %(std_x, init_std_y)) - train_on_data(std_x, init_std_y, seed) + print(f'Using init_std_y={init_std_y:.3f}') + train_on_data(init_std_y, seed) -- GitLab