Skip to content
Snippets Groups Projects
Commit d48d711b authored by Jörg Martin's avatar Jörg Martin
Browse files

non-EiV for California working, but performance only mediocre

parent c8735bcc
No related branches found
No related tags found
No related merge requests found
......@@ -278,3 +278,42 @@ class FNNBer(nn.Module):
else:
sigma = torch.mean(sigma, dim=1)
return pred, sigma
class SmallFNNBer(FNNBer):
"""
A fully connected net Bernoulli dropout layers.
:param p: dropout rate, defaults to 0.5
:param init_std_y: Initial standard deviation for input y.
:param h: A list specifying the number of neurons in each layer.
"""
def __init__(self, p=0.2, init_std_y=1.0, h=[10, 1024,1024,1024, 1]):
super().__init__(p=p, init_std_y=init_std_y)
self.main = nn.Sequential(
nn.Linear(h[0], h[1]),
nn.LeakyReLU(self.LeakyReLUSlope),
nn.Dropout(p=p),
nn.Linear(h[1],h[2]),
nn.LeakyReLU(self.LeakyReLUSlope),
nn.Dropout(p=p),
nn.Linear(h[2],h[3]),
nn.LeakyReLU(self.LeakyReLUSlope),
nn.Dropout(p=p),
nn.Linear(h[3],h[4]))
class ShallowFNNBer(FNNBer):
"""
A fully connected net Bernoulli dropout layers.
:param p: dropout rate, defaults to 0.5
:param init_std_y: Initial standard deviation for input y.
:param h: A list specifying the number of neurons in each layer.
"""
def __init__(self, p=0.2, init_std_y=1.0, h=[10, 1024,1024, 1]):
super().__init__(p=p, init_std_y=init_std_y)
self.main = nn.Sequential(
nn.Linear(h[0], h[1]),
nn.LeakyReLU(self.LeakyReLUSlope),
nn.Dropout(p=p),
nn.Linear(h[1],h[2]),
nn.LeakyReLU(self.LeakyReLUSlope),
nn.Dropout(p=p),
nn.Linear(h[2],h[3]))
import torch.nn as nn
def normal_init(m, scale = 0.01):
"""
Initialize weights of layer `m` with `scale` as standard
deviation.
Biases will be set to 0.
:param m: A torch.nn object
*Example*:
net = nn.Sequential(nn.Linear(1,2), nn.Linear(2,1))
net.apply(normal_init)
"""
classname = m.__class__.__name__
# only initialize for Linear or Conv layers
if classname.find('Linear') != -1 or classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, scale)
nn.init.constant_(m.bias.data, 0.0)
def glorot_init(m, gain = 1):
"""
Initialize weights of layer `m` via `nn.init.xavier_uniform_(m, gain)`
and biases
with 0.
:param m: A torch.nn object
*Example*:
net = nn.Sequential(nn.Linear(1,2), nn.Linear(2,1))
net.apply(glorot_init)
"""
classname = m.__class__.__name__
# only initialize for Linear or Conv layers
if classname.find('Linear') != -1 or classname.find('Conv') != -1:
nn.init.xavier_uniform_(m.weight.data)
nn.init.constant_(m.bias.data, 0.0)
......@@ -54,10 +54,10 @@ class CSVData(Dataset):
"""
features_array = np.array(self.data_df)
labels_array = np.array(self.labels_df)
self.mean_features = torch.tensor(np.mean(features_array, axis=0))
self.std_features = torch.tensor(np.std(features_array, axis=0))
self.mean_labels = torch.tensor(np.mean(labels_array, axis=0))
self.std_labels = torch.tensor(np.std(labels_array, axis=0))
self.mean_features = torch.tensor(np.mean(features_array, axis=0), dtype=torch.float32)
self.std_features = torch.tensor(np.std(features_array, axis=0), dtype=torch.float32)
self.mean_labels = torch.tensor(np.mean(labels_array, axis=0), dtype=torch.float32)
self.std_labels = torch.tensor(np.std(labels_array, axis=0), dtype=torch.float32)
def normalize_sample(self, sample):
"""
......@@ -103,8 +103,8 @@ class CSVData(Dataset):
def __getitem__(self, i):
# returns a tuple of a tensor and the corresponding label
assert 0 <= i and i<self.__len__()
sample = (torch.tensor(np.array(self.data_df.iloc[i])),
torch.tensor(np.array(self.labels_df.iloc[i])))
sample = (torch.tensor(np.array(self.data_df.iloc[i]), dtype=torch.float32),
torch.tensor(np.array(self.labels_df.iloc[i]), dtype=torch.float32))
if self.normalize:
return self.normalize_sample(sample)
else:
......
......@@ -9,14 +9,17 @@ from EIVGeneral.repetition import repeat_tensors, reshape_to_chunks
def nll_reg_loss(net, x, y, reg):
"""
Returns the neg log likelihood with an additional regularization term.
*Note that `reg` will not be divided by the data size (and by 2),
this should be done beforehand.*
**Note**: that `reg` will not be divided by the data size (and by 2),
this should be done beforehand.
:param net: A torch.nn.Module.
:param x: A torch.tensor, the input.
:param y: A torch.tensor, the output.
:param reg: A non-negative float, the regularization.
"""
out, std_y = net(x)
if len(y.shape) <= 1:
y = y.view((-1,1))
assert out.shape == y.shape
neg_log_likelihood = torch.mean(0.5* torch.log(2*pi*std_y**2) \
+ ((out-y)**2)/(2*std_y**2))
regularization = net.regularizer(x, lamb=reg)
......
......@@ -40,6 +40,7 @@ class TrainEpoch():
#
self.lr_generator = iter(self.next_lr())
self.lr = None
self.total_count = 0
def next_lr(self):
while True:
......@@ -91,6 +92,7 @@ class TrainEpoch():
stored_train_loss_to_average = []
stored_test_loss_to_average = []
for i, (x,y) in enumerate(self.train_dataloader):
self.total_count += 1
# optimize on train data
x, y = x.to(self.device), y.to(self.device)
loss = self.criterion(net, x, y, self.reg)
......@@ -125,6 +127,12 @@ class TrainEpoch():
std_x,
std_y
))
# to be used for extra reporting
self.last_train_loss = stored_train_loss[-1]
self.last_test_loss = stored_test_loss[-1]
self.last_std_x = std_x
self.last_std_y = std_y
# extra reporting
self.extra_report(net, i)
stored_train_loss_to_average = []
stored_test_loss_to_average = []
......
"""
Train non-EiV model on california housing dataset using different seeds
"""
import random
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.backends.cudnn
from torch.utils.data import DataLoader
from torch.utils.tensorboard.writer import SummaryWriter
from EIVArchitectures import Networks
from EIVArchitectures import Networks, initialize_weights
from EIVData.california_housing import load_data
from EIVTrainingRoutines import train_and_store, loss_functions
# hyperparameters
lr = 1e-3
batch_size = 25
number_of_epochs = 1000
reg = 1e-7
batch_size = 200
test_batch_size = 800
number_of_epochs = 100
unscaled_reg = 10
report_point = 5
p = 0.5
lr_update = 950
p = 0.1
lr_update = 20
# pretraining = 300
# epoch_offset = pretraining
init_std_y_list = [0.15]
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
epoch_offset = 10
init_std_y_list = [0.5]
gamma = 0.5
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
# reproducability
torch.backends.cudnn.benchmark = False
def set_seeds(seed):
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
seed_list = range(1)
seed_list = [0,]
# to store the RMSE
rmse_chain = []
......@@ -43,8 +49,7 @@ class UpdatedTrainEpoch(train_and_store.TrainEpoch):
self.lr = self.initial_lr
self.optimizer = torch.optim.Adam(net.parameters(), lr=self.lr)
self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
self.optimizer, lr_update, 0.1
)
self.optimizer, lr_update, gamma)
def post_epoch_update(self, net, epoch):
......@@ -53,67 +58,74 @@ class UpdatedTrainEpoch(train_and_store.TrainEpoch):
"""
if epoch >= epoch_offset:
net.std_y_par.requires_grad = True
self.lr_scheduler.step()
self.lr_scheduler.step()
def extra_report(self, net, i):
"""
Overwrites the corresponding method
**Note**: self.val_data_pure has to be defined explicitely
and fed after initialiaztion of this class
and fed after initialization of this class
"""
rmse = self.rmse(net).item()
rmse_chain.append(rmse)
print('RMSE %.2f', rmse)
writer.add_scalar('RMSE', rmse, self.total_count)
writer.add_scalar('train loss', self.last_train_loss, self.total_count)
writer.add_scalar('test loss', self.last_test_loss, self.total_count)
print(f'RMSE {rmse:.3f}')
def rmse(self, net):
"""
Compute the root mean squared error for `net`
"""
mse = 0
net_train_state = net.training
net.eval()
x, y = self.val_data_pure
x, y = next(iter(self.test_dataloader))
if len(y.shape) <= 1:
y = y.view((-1,1))
out = net(x.to(device))[0].detach().cpu()
assert out.shape == y.shape
if net_train_state:
net.train()
return torch.sqrt(torch.mean((out-y)**2))
def train_on_data(std_x, init_std_y, seed):
def train_on_data(init_std_y, seed):
"""
Loads data associated with `std_x` and trains an Bernoulli Modell.
Sets `seed`, loads data and trains an Bernoulli Modell, starting with
`init_std_y`.
"""
# load Datasets
train_data_pure, train_data,\
test_data_pure,test_data,\
val_data_pure,val_data = \
generate_mexican_data.get_data(std_x=std_x,
std_y=std_y)[:-1]
train_data = TensorDataset(*train_data)
test_data = TensorDataset(*test_data)
# set seed
set_seeds(seed)
# make to dataloader
# load Datasets
train_data, test_data = load_data(seed=seed, splitting_part=0.8,
normalize=True)
# make dataloaders
train_dataloader = DataLoader(train_data, batch_size=batch_size,
shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size,
test_dataloader = DataLoader(test_data, batch_size=test_batch_size,
shuffle=True)
# Create a net
net = Networks.FNNBer(init_std_y=init_std_y)
# create a net
input_dim = train_data[0][0].numel()
output_dim = train_data[0][1].numel()
net = Networks.FNNBer(p=p,
init_std_y=init_std_y,
h=[input_dim, 1024, 1024, 1024, 1024, output_dim])
net.apply(initialize_weights.glorot_init)
net = net.to(device)
net.std_y_par.requires_grad = False
std_x_map = lambda: 0.0
std_y_map = lambda: net.get_std_y().detach().cpu().item()
# Create epoch_map
# regularization
reg = unscaled_reg/len(train_data)
# create epoch_map
criterion = loss_functions.nll_reg_loss
epoch_map = UpdatedTrainEpoch(train_dataloader=train_dataloader,
test_dataloader=test_dataloader,
criterion=criterion, std_y_map=std_y_map, std_x_map=std_x_map,
lr=lr, reg=reg,report_point=report_point, device=device)
epoch_map.val_data_pure = val_data_pure
lr=lr, reg=reg, report_point=report_point, device=device)
# run and save
save_file = os.path.join('saved_networks','noneiv_mexican_std_x_%.3f'\
'_std_y_%.3f_init_std_y_%.3f_seed_%i.pkl'\
% (std_x, std_y, init_std_y, int(seed)))
save_file = os.path.join('saved_networks',
f'noneiv_california'\
f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_seed_{seed}.pkl')
train_and_store.train_and_store(net=net,
epoch_map=epoch_map,
number_of_epochs=number_of_epochs,
......@@ -122,11 +134,13 @@ def train_on_data(std_x, init_std_y, seed):
if __name__ == '__main__':
for seed in seed_list:
print('SEED: %i' % (seed,))
# Tensorboard monitoring
writer = SummaryWriter(log_dir=f'/home/martin09/tmp/tensorboard/'\
f'run_noneiv_california_lr_{lr:.4f}_seed'\
f'_{seed}_uregu_{unscaled_reg:.1f}_p_{p:.2f}')
print(f'>>>>SEED: {seed}')
for init_std_y in init_std_y_list:
for std_x in std_x_list:
print('->->Using std_x=%.2f and init_std_y=%.2f<-<-<-<-'
%(std_x, init_std_y))
train_on_data(std_x, init_std_y, seed)
print(f'Using init_std_y={init_std_y:.3f}')
train_on_data(init_std_y, seed)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment