diff --git a/EIVPackage/EIVArchitectures/Networks.py b/EIVPackage/EIVArchitectures/Networks.py
index 3e1d61f683ea06e1a10bfff3fe6bcb83abf066c7..3a1c35c28d6b46f042d4e955d38fc77716868066 100644
--- a/EIVPackage/EIVArchitectures/Networks.py
+++ b/EIVPackage/EIVArchitectures/Networks.py
@@ -278,3 +278,42 @@ class FNNBer(nn.Module):
             sigma = torch.mean(sigma, dim=1)
         return pred, sigma
+class SmallFNNBer(FNNBer):
+    """
+    A fully connected net Bernoulli dropout layers.
+    :param p: dropout rate, defaults to 0.5
+    :param init_std_y: Initial standard deviation for input y. 
+    :param h: A list specifying the number of neurons in each layer.
+    """
+    def __init__(self, p=0.2, init_std_y=1.0, h=[10, 1024,1024,1024, 1]):
+        super().__init__(p=p, init_std_y=init_std_y)
+        self.main = nn.Sequential(
+                nn.Linear(h[0], h[1]),
+                nn.LeakyReLU(self.LeakyReLUSlope),
+                nn.Dropout(p=p),
+                nn.Linear(h[1],h[2]),
+                nn.LeakyReLU(self.LeakyReLUSlope),
+                nn.Dropout(p=p),
+                nn.Linear(h[2],h[3]),
+                nn.LeakyReLU(self.LeakyReLUSlope),
+                nn.Dropout(p=p),
+                nn.Linear(h[3],h[4]))
+class ShallowFNNBer(FNNBer):
+    """
+    A fully connected net Bernoulli dropout layers.
+    :param p: dropout rate, defaults to 0.5
+    :param init_std_y: Initial standard deviation for input y. 
+    :param h: A list specifying the number of neurons in each layer.
+    """
+    def __init__(self, p=0.2, init_std_y=1.0, h=[10, 1024,1024, 1]):
+        super().__init__(p=p, init_std_y=init_std_y)
+        self.main = nn.Sequential(
+                nn.Linear(h[0], h[1]),
+                nn.LeakyReLU(self.LeakyReLUSlope),
+                nn.Dropout(p=p),
+                nn.Linear(h[1],h[2]),
+                nn.LeakyReLU(self.LeakyReLUSlope),
+                nn.Dropout(p=p),
+                nn.Linear(h[2],h[3]))
diff --git a/EIVPackage/EIVArchitectures/initialize_weights.py b/EIVPackage/EIVArchitectures/initialize_weights.py
new file mode 100644
index 0000000000000000000000000000000000000000..a87501df194e7877d2df492440d720a241d957d1
--- /dev/null
+++ b/EIVPackage/EIVArchitectures/initialize_weights.py
@@ -0,0 +1,36 @@
+import torch.nn as nn
+def normal_init(m, scale = 0.01):
+    """
+    Initialize weights of layer `m` with `scale` as standard
+    deviation.
+    Biases will be set to 0.
+    :param m: A torch.nn object
+    *Example*:
+    net = nn.Sequential(nn.Linear(1,2), nn.Linear(2,1))
+    net.apply(normal_init)
+    """
+    classname = m.__class__.__name__
+    # only initialize for Linear or Conv layers
+    if classname.find('Linear') != -1 or classname.find('Conv') != -1:
+        nn.init.normal_(m.weight.data, 0.0, scale)
+        nn.init.constant_(m.bias.data, 0.0)
+def glorot_init(m, gain = 1):
+    """
+    Initialize weights of layer `m` via `nn.init.xavier_uniform_(m, gain)`
+    and biases
+    with 0.
+    :param m: A torch.nn object
+    *Example*:
+    net = nn.Sequential(nn.Linear(1,2), nn.Linear(2,1))
+    net.apply(glorot_init)
+    """
+    classname = m.__class__.__name__
+    # only initialize for Linear or Conv layers
+    if classname.find('Linear') != -1 or classname.find('Conv') != -1:
+        nn.init.xavier_uniform_(m.weight.data)
+        nn.init.constant_(m.bias.data, 0.0)
diff --git a/EIVPackage/EIVData/csv_dataset.py b/EIVPackage/EIVData/csv_dataset.py
index 5b584a237cbca1952a496f043671e48ae78e3d17..cd0435d55aadc64f992383fdc612bd674e696e93 100644
--- a/EIVPackage/EIVData/csv_dataset.py
+++ b/EIVPackage/EIVData/csv_dataset.py
@@ -54,10 +54,10 @@ class CSVData(Dataset):
         features_array = np.array(self.data_df)
         labels_array = np.array(self.labels_df)
-        self.mean_features = torch.tensor(np.mean(features_array, axis=0))
-        self.std_features = torch.tensor(np.std(features_array, axis=0))
-        self.mean_labels = torch.tensor(np.mean(labels_array, axis=0))
-        self.std_labels = torch.tensor(np.std(labels_array, axis=0))
+        self.mean_features = torch.tensor(np.mean(features_array, axis=0), dtype=torch.float32)
+        self.std_features = torch.tensor(np.std(features_array, axis=0), dtype=torch.float32)
+        self.mean_labels = torch.tensor(np.mean(labels_array, axis=0), dtype=torch.float32)
+        self.std_labels = torch.tensor(np.std(labels_array, axis=0), dtype=torch.float32)
     def normalize_sample(self, sample):
@@ -103,8 +103,8 @@ class CSVData(Dataset):
     def __getitem__(self, i):
         # returns a tuple of a tensor and the corresponding label
         assert 0 <= i and i<self.__len__()
-        sample = (torch.tensor(np.array(self.data_df.iloc[i])),
-            torch.tensor(np.array(self.labels_df.iloc[i])))
+        sample = (torch.tensor(np.array(self.data_df.iloc[i]), dtype=torch.float32),
+            torch.tensor(np.array(self.labels_df.iloc[i]), dtype=torch.float32))
         if self.normalize:
             return self.normalize_sample(sample)
diff --git a/EIVPackage/EIVTrainingRoutines/loss_functions.py b/EIVPackage/EIVTrainingRoutines/loss_functions.py
index e8ab002ea35cfa3a347eb34537a29c748b773d9b..37b76fe46f0518708a8f6fc45c38d2784d604db1 100644
--- a/EIVPackage/EIVTrainingRoutines/loss_functions.py
+++ b/EIVPackage/EIVTrainingRoutines/loss_functions.py
@@ -9,14 +9,17 @@ from EIVGeneral.repetition import repeat_tensors, reshape_to_chunks
 def nll_reg_loss(net, x, y, reg):
     Returns the neg log likelihood with an additional regularization term.
-    *Note that `reg` will not be divided by the data size (and by 2), 
-     this should be done beforehand.*
+    **Note**: that `reg` will not be divided by the data size (and by 2), 
+     this should be done beforehand.
     :param net: A torch.nn.Module.
     :param x: A torch.tensor, the input.
     :param y: A torch.tensor, the output.
     :param reg: A non-negative float, the regularization.
     out, std_y = net(x)
+    if len(y.shape) <= 1:
+        y = y.view((-1,1))
+    assert out.shape == y.shape
     neg_log_likelihood = torch.mean(0.5* torch.log(2*pi*std_y**2) \
             + ((out-y)**2)/(2*std_y**2)) 
     regularization = net.regularizer(x, lamb=reg)
diff --git a/EIVPackage/EIVTrainingRoutines/train_and_store.py b/EIVPackage/EIVTrainingRoutines/train_and_store.py
index 8adc29e00b9567a630e614aad78060d18cb3ef99..eb0378e953c00eee2bb0465b1aea8580b92baa01 100644
--- a/EIVPackage/EIVTrainingRoutines/train_and_store.py
+++ b/EIVPackage/EIVTrainingRoutines/train_and_store.py
@@ -40,6 +40,7 @@ class TrainEpoch():
         self.lr_generator = iter(self.next_lr())
         self.lr = None
+        self.total_count = 0
     def next_lr(self):
         while True:
@@ -91,6 +92,7 @@ class TrainEpoch():
         stored_train_loss_to_average = []
         stored_test_loss_to_average = []
         for i, (x,y) in enumerate(self.train_dataloader):
+            self.total_count += 1
             # optimize on train data
             x, y = x.to(self.device), y.to(self.device)
             loss = self.criterion(net, x, y, self.reg)
@@ -125,6 +127,12 @@ class TrainEpoch():
+                # to be used for extra reporting
+                self.last_train_loss = stored_train_loss[-1]
+                self.last_test_loss = stored_test_loss[-1]
+                self.last_std_x = std_x
+                self.last_std_y = std_y
+                # extra reporting
                 self.extra_report(net, i)
                 stored_train_loss_to_average = []
                 stored_test_loss_to_average = []
diff --git a/Experiments/train_eiv_carlifornia.py b/Experiments/train_eiv_carlifornia.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/Experiments/train_noneiv_carlifornia.py b/Experiments/train_noneiv_carlifornia.py
index 645b9dab50f41982fe17d5c72ceb7b9337017459..6ce26736308ee27309cf1312b59c04ce2a941fa4 100644
--- a/Experiments/train_noneiv_carlifornia.py
+++ b/Experiments/train_noneiv_carlifornia.py
@@ -1,35 +1,41 @@
+Train non-EiV model on california housing dataset using different seeds
 import random
 import os
 import numpy as np
 import torch
-import torch.nn as nn
-from torch.utils.data import DataLoader, TensorDataset
+import torch.backends.cudnn
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard.writer import SummaryWriter
-from EIVArchitectures import Networks
+from EIVArchitectures import Networks, initialize_weights
 from EIVData.california_housing import load_data
 from EIVTrainingRoutines import train_and_store, loss_functions
 # hyperparameters
 lr = 1e-3
-batch_size = 25
-number_of_epochs = 1000
-reg = 1e-7
+batch_size = 200
+test_batch_size = 800
+number_of_epochs = 100
+unscaled_reg = 10
 report_point = 5
-p = 0.5
-lr_update = 950
+p = 0.1
+lr_update = 20
 # pretraining = 300
-# epoch_offset = pretraining
-init_std_y_list = [0.15]
-device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+epoch_offset = 10
+init_std_y_list = [0.5]
+gamma = 0.5
+device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
 # reproducability
-torch.backends.cudnn.benchmark = False
 def set_seeds(seed):
+    torch.backends.cudnn.benchmark = False
-seed_list = range(1)
+seed_list = [0,]
 # to store the RMSE
 rmse_chain = []
@@ -43,8 +49,7 @@ class UpdatedTrainEpoch(train_and_store.TrainEpoch):
             self.lr = self.initial_lr
             self.optimizer = torch.optim.Adam(net.parameters(), lr=self.lr)
             self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
-            self.optimizer, lr_update, 0.1
-            )
+            self.optimizer, lr_update, gamma)
     def post_epoch_update(self, net, epoch):
@@ -53,67 +58,74 @@ class UpdatedTrainEpoch(train_and_store.TrainEpoch):
         if epoch >= epoch_offset:
             net.std_y_par.requires_grad = True
-        self.lr_scheduler.step()
+        self.lr_scheduler.step() 
     def extra_report(self, net, i):
         Overwrites the corresponding method
-        **Note**: self.val_data_pure has to be defined explicitely
-        and fed after initialiaztion of this class
+        and fed after initialization of this class
         rmse = self.rmse(net).item()
-        print('RMSE %.2f', rmse)
+        writer.add_scalar('RMSE', rmse, self.total_count)
+        writer.add_scalar('train loss', self.last_train_loss, self.total_count)
+        writer.add_scalar('test loss', self.last_test_loss, self.total_count)
+        print(f'RMSE {rmse:.3f}')
     def rmse(self, net):
         Compute the root mean squared error for `net`
-        mse = 0 
         net_train_state = net.training
-        x, y = self.val_data_pure
+        x, y = next(iter(self.test_dataloader))
+        if len(y.shape) <= 1:
+            y = y.view((-1,1))
         out = net(x.to(device))[0].detach().cpu()
+        assert out.shape == y.shape
         if net_train_state:
         return torch.sqrt(torch.mean((out-y)**2))
-def train_on_data(std_x, init_std_y, seed):
+def train_on_data(init_std_y, seed):
-    Loads data associated with `std_x` and trains an Bernoulli Modell.
+    Sets `seed`, loads data and trains an Bernoulli Modell, starting with
+    `init_std_y`.
-    # load Datasets
-    train_data_pure, train_data,\
-            test_data_pure,test_data,\
-            val_data_pure,val_data = \
-            generate_mexican_data.get_data(std_x=std_x,
-                    std_y=std_y)[:-1]
-    train_data = TensorDataset(*train_data)
-    test_data = TensorDataset(*test_data)
+    # set seed
-    # make to dataloader
+    # load Datasets
+    train_data, test_data = load_data(seed=seed, splitting_part=0.8,
+            normalize=True)
+    # make dataloaders
     train_dataloader = DataLoader(train_data, batch_size=batch_size, 
-    test_dataloader = DataLoader(test_data, batch_size=batch_size,
+    test_dataloader = DataLoader(test_data, batch_size=test_batch_size,
-    # Create a net
-    net = Networks.FNNBer(init_std_y=init_std_y)
+    # create a net
+    input_dim = train_data[0][0].numel()
+    output_dim = train_data[0][1].numel()
+    net = Networks.FNNBer(p=p,
+            init_std_y=init_std_y,
+            h=[input_dim, 1024, 1024, 1024, 1024, output_dim])
+    net.apply(initialize_weights.glorot_init)
     net = net.to(device)
     net.std_y_par.requires_grad = False
     std_x_map = lambda: 0.0
     std_y_map = lambda: net.get_std_y().detach().cpu().item()
-    # Create epoch_map
+    # regularization
+    reg = unscaled_reg/len(train_data)
+    # create epoch_map
     criterion = loss_functions.nll_reg_loss
     epoch_map = UpdatedTrainEpoch(train_dataloader=train_dataloader,
             criterion=criterion, std_y_map=std_y_map, std_x_map=std_x_map,
-            lr=lr, reg=reg,report_point=report_point, device=device)
-    epoch_map.val_data_pure = val_data_pure
+            lr=lr, reg=reg, report_point=report_point, device=device)
     # run and save
-    save_file = os.path.join('saved_networks','noneiv_mexican_std_x_%.3f'\
-            '_std_y_%.3f_init_std_y_%.3f_seed_%i.pkl'\
-            % (std_x, std_y, init_std_y, int(seed)))
+    save_file = os.path.join('saved_networks',
+            f'noneiv_california'\
+                    f'init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                    f'_p_{p:.2f}_seed_{seed}.pkl')
@@ -122,11 +134,13 @@ def train_on_data(std_x, init_std_y, seed):
 if __name__ == '__main__':
     for seed in seed_list:
-        print('SEED: %i' % (seed,))
+        # Tensorboard monitoring
+        writer = SummaryWriter(log_dir=f'/home/martin09/tmp/tensorboard/'\
+                f'run_noneiv_california_lr_{lr:.4f}_seed'\
+                f'_{seed}_uregu_{unscaled_reg:.1f}_p_{p:.2f}')
+        print(f'>>>>SEED: {seed}')
         for init_std_y in init_std_y_list:
-            for std_x in std_x_list:
-                print('->->Using std_x=%.2f and init_std_y=%.2f<-<-<-<-'
-                        %(std_x, init_std_y))
-                train_on_data(std_x, init_std_y, seed)
+            print(f'Using init_std_y={init_std_y:.3f}')
+            train_on_data(init_std_y, seed)