Skip to content
Snippets Groups Projects
Commit cc0b86fa authored by Jörg Martin's avatar Jörg Martin
Browse files

Universal training scripts. Std_y via RMSE

parent 0acf6161
Branches
Tags
No related merge requests found
Showing
with 437 additions and 8 deletions
......@@ -27,20 +27,24 @@ class FNNEIV(nn.Module):
:param repetition: Positive integer, the default value for repeating input,
defaults to 1. For a single call this can also be specified in the forward
method.
:param std_y_requires_grad: Whether `sigma_y` will require_grad and thus
be updated during optimization. Defaults to False.
**Note**:
- To change the deming factor afterwards, use the method `change_deming`
- To change fixed_std_x afterwards, use the method `change_fixed_std_x`
- To change std_y use the method `change_std_x`
"""
LeakyReLUSlope = 1e-2
def __init__(self, p = 0.2, init_std_y=1.0, precision_prior_zeta=0.0,
deming=1.0, h=[10, 1024,1024,1024,1024, 1],
fixed_std_x = None, repetition = 1):
fixed_std_x = None, repetition = 1, std_y_requires_grad = False):
super().__init__()
# part before Bernoulli dropout
self.init_std_y = init_std_y
InverseSoftplus = lambda sigma: torch.log(torch.exp(sigma) - 1 )
self.InverseSoftplus = lambda sigma: torch.log(torch.exp(sigma) - 1 )
self.std_y_par = nn.parameter.Parameter(
InverseSoftplus(torch.tensor([init_std_y])))
self.InverseSoftplus(torch.tensor([init_std_y])))
self.std_y_par.requires_grad = std_y_requires_grad
self._repetition = repetition
self.main = nn.Sequential(
EIVInput(precision_prior_zeta=precision_prior_zeta,
......@@ -87,6 +91,18 @@ class FNNEIV(nn.Module):
fixed_std_x = torch.tensor(fixed_std_x)
self._fixed_std_x = fixed_std_x
def change_std_y(self, std_y):
"""
Update internal std_y to `std_y`
:param std_y: A singular, positive torch.tensor
"""
assert std_y.numel() == 1
std_y = std_y.view((1,))
print('Updating std_y from %.3f to %.3f' % (self.get_std_y().item(),
std_y.item()))
self.std_y_par.data = self.InverseSoftplus(std_y)
def noise_off(self):
self.noise_is_on = False
......@@ -331,15 +347,19 @@ class FNNBer(nn.Module):
:param p: dropout rate, defaults to 0.5
:param init_std_y: Initial standard deviation for input y.
:param h: A list specifying the number of neurons in each layer.
:param std_y_requires_grad: Whether `sigma_y` will require_grad and thus
be updated during optimization. Defaults to False.
"""
LeakyReLUSlope = 1e-2
def __init__(self, p=0.2, init_std_y=1.0, h=[10, 1024,1024,1024,1024, 1]):
def __init__(self, p=0.2, init_std_y=1.0, h=[10, 1024,1024,1024,1024, 1],
std_y_requires_grad=False):
super().__init__()
# part before Bernoulli dropout
self.init_std_y = init_std_y
InverseSoftplus = lambda sigma: torch.log(torch.exp(sigma) - 1 )
self.InverseSoftplus = lambda sigma: torch.log(torch.exp(sigma) - 1 )
self.std_y_par = nn.parameter.Parameter(
InverseSoftplus(torch.tensor([init_std_y])))
self.InverseSoftplus(torch.tensor([init_std_y])))
self.std_y_par.requires_grad = std_y_requires_grad
self.main = nn.Sequential(
nn.Linear(h[0], h[1]),
nn.LeakyReLU(self.LeakyReLUSlope),
......@@ -363,6 +383,17 @@ class FNNBer(nn.Module):
def get_std_y(self):
return nn.Softplus()(self.std_y_par)
def change_std_y(self, std_y):
"""
Update internal std_y to `std_y`
:param std_y: A singular, positive torch.tensor
"""
assert std_y.numel() == 1
std_y = std_y.view((1,))
print('Updating std_y from %.3f to %.3f' % (self.get_std_y().item(),
std_y.item()))
self.std_y_par.data = self.InverseSoftplus(std_y)
def forward(self, x):
mu = self.main(x)
sigma = self.sigma(mu)
......@@ -526,9 +557,13 @@ class SmallFNNBer(FNNBer):
:param p: dropout rate, defaults to 0.5
:param init_std_y: Initial standard deviation for input y.
:param h: A list specifying the number of neurons in each layer.
:param std_y_requires_grad: Whether `sigma_y` will require_grad and thus
be updated during optimization. Defaults to False.
"""
def __init__(self, p=0.2, init_std_y=1.0, h=[10, 1024,1024,1024, 1]):
super().__init__(p=p, init_std_y=init_std_y)
def __init__(self, p=0.2, init_std_y=1.0, h=[10, 1024,1024,1024, 1],
std_y_requires_grad=False):
super().__init__(p=p, init_std_y=init_std_y,
std_y_requires_grad=std_y_requires_grad)
self.main = nn.Sequential(
nn.Linear(h[0], h[1]),
nn.LeakyReLU(self.LeakyReLUSlope),
......
{
"long_dataname": "california_housing",
"short_dataname": "california",
"lr": 1e-3,
"batch_size": 200,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.1,
"lr_update": 20,
"epoch_offset": 10,
"eiv_prediction_number_of_draws": 100,
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"fixed_std_x": 0.05,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "concrete_strength",
"short_dataname": "concrete",
"lr": 1e-3,
"batch_size": 32,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 20,
"epoch_offset": 10,
"eiv_prediction_number_of_draws": 100,
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"fixed_std_x": 0.05,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "energy_efficiency",
"short_dataname": "energy",
"lr": 1e-3,
"batch_size": 32,
"test_batch_size": 600,
"number_of_epochs": 600,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 100,
"epoch_offset": 100,
"eiv_prediction_number_of_draws": 100,
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"fixed_std_x": 0.05,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "kin8nm",
"short_dataname": "kin8nm",
"lr": 1e-3,
"batch_size": 32,
"test_batch_size": 600,
"number_of_epochs": 30,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 20,
"epoch_offset": 19,
"eiv_prediction_number_of_draws": 100,
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"fixed_std_x": 0.05,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "million_song",
"short_dataname": "msd",
"lr": 1e-3,
"batch_size": 100,
"test_batch_size": 600,
"number_of_epochs": 10,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 4,
"epoch_offset": 4,
"eiv_prediction_number_of_draws": 100,
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"fixed_std_x": 0.05,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "naval_propulsion",
"short_dataname": "naval",
"lr": 1e-3,
"batch_size": 32,
"test_batch_size": 600,
"number_of_epochs": 30,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 20,
"epoch_offset": 20,
"eiv_prediction_number_of_draws": 100,
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"fixed_std_x": 0.05,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "power_plant",
"short_dataname": "power",
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 600,
"number_of_epochs": 35,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 10,
"epoch_offset": 15,
"eiv_prediction_number_of_draws": 100,
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"fixed_std_x": 0.05,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "protein_structure",
"short_dataname": "protein",
"lr": 1e-3,
"batch_size": 100,
"test_batch_size": 600,
"number_of_epochs": 30,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 10,
"epoch_offset": 10,
"eiv_prediction_number_of_draws": 100,
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"fixed_std_x": 0.05,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "wine_quality",
"short_dataname": "wine",
"lr": 1e-3,
"batch_size": 32,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 30,
"epoch_offset": 50,
"eiv_prediction_number_of_draws": 100,
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"fixed_std_x": 0.05,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "yacht_hydrodynamics",
"short_dataname": "yacht",
"lr": 1e-3,
"batch_size": 32,
"test_batch_size": 600,
"number_of_epochs": 1200,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 200,
"epoch_offset": 250,
"eiv_prediction_number_of_draws": 100,
"eiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"fixed_std_x": 0.05,
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "california_housing",
"short_dataname": "california",
"lr": 1e-3,
"batch_size": 200,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.1,
"lr_update": 20,
"epoch_offset": 0 ,
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "concrete_strength",
"short_dataname": "concrete",
"lr": 1e-3,
"batch_size": 32,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 20,
"epoch_offset": 10,
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "energy_efficiency",
"short_dataname": "energy",
"lr": 1e-3,
"batch_size": 32,
"test_batch_size": 600,
"number_of_epochs": 600,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 100,
"epoch_offset": 100,
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "kin8nm",
"short_dataname": "kin8nm",
"lr": 1e-3,
"batch_size": 32,
"test_batch_size": 600,
"number_of_epochs": 30,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 20,
"epoch_offset": 19,
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "million_song",
"short_dataname": "msd",
"lr": 1e-3,
"batch_size": 100,
"test_batch_size": 600,
"number_of_epochs": 10,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 4,
"epoch_offset": 4,
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "naval_propulsion",
"short_dataname": "naval",
"lr": 1e-3,
"batch_size": 32,
"test_batch_size": 600,
"number_of_epochs": 30,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 20,
"epoch_offset": 20,
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "power_plant",
"short_dataname": "power",
"lr": 1e-3,
"batch_size": 64,
"test_batch_size": 600,
"number_of_epochs": 35,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 10,
"epoch_offset": 15,
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "protein_structure",
"short_dataname": "protein",
"lr": 1e-3,
"batch_size": 100,
"test_batch_size": 600,
"number_of_epochs": 30,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 10,
"epoch_offset": 10,
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"seed_range": [0,10],
"gpu_number": 1
}
{
"long_dataname": "wine_quality",
"short_dataname": "wine",
"lr": 1e-3,
"batch_size": 32,
"test_batch_size": 800,
"number_of_epochs": 100,
"unscaled_reg": 10,
"report_point": 5,
"p": 0.2,
"lr_update": 30,
"epoch_offset": 50,
"noneiv_prediction_number_of_draws": 100,
"noneiv_prediction_number_of_batches": 10,
"init_std_y_list": [0.5],
"gamma": 0.5,
"hidden_layers": [1024, 1024, 1024, 1024],
"seed_range": [0,10],
"gpu_number": 1
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment