diff --git a/EIVPackage/EIVArchitectures/Networks.py b/EIVPackage/EIVArchitectures/Networks.py index 1435f48f00a929d51a1a9cfde6414c11704ab7a2..3a26b02d4e7d1f74f86502e9eb6d76e08ebe16ea 100644 --- a/EIVPackage/EIVArchitectures/Networks.py +++ b/EIVPackage/EIVArchitectures/Networks.py @@ -24,6 +24,9 @@ class FNNEIV(nn.Module): :param h: A list specifying the number of neurons in each layer. :param fixed_std_x: If given, this value will be the output of the method `get_std_x()`, no matter what the deming factor. + :param repetition: Positive integer, the default value for repeating input, + defaults to 1. For a single call this can also be specified in the forward + method. **Note**: - To change the deming factor afterwards, use the method `change_deming` - To change fixed_std_x afterwards, use the method `change_fixed_std_x` @@ -31,14 +34,14 @@ class FNNEIV(nn.Module): LeakyReLUSlope = 1e-2 def __init__(self, p = 0.2, init_std_y=1.0, precision_prior_zeta=0.0, deming=1.0, h=[10, 1024,1024,1024,1024, 1], - fixed_std_x = None): + fixed_std_x = None, repetition = 1): super().__init__() # part before Bernoulli dropout self.init_std_y = init_std_y InverseSoftplus = lambda sigma: torch.log(torch.exp(sigma) - 1 ) self.std_y_par = nn.parameter.Parameter( InverseSoftplus(torch.tensor([init_std_y]))) - self._repetition = 1 + self._repetition = repetition self.main = nn.Sequential( EIVInput(precision_prior_zeta=precision_prior_zeta, external_std_x=self.get_std_x), @@ -110,10 +113,11 @@ class FNNEIV(nn.Module): return self._repetition def forward(self, x, repetition=1): + old_repetition = self._repetition self._repetition = repetition mu = self.main(x) sigma = self.sigma(mu) - self._repetition = 1 + self._repetition = old_repetition return mu, sigma def regularizer(self, x, lamb): @@ -154,8 +158,10 @@ class FNNEIV(nn.Module): regularization += self.main[0].neg_x_evidence(x) return regularization - - def predict(self, x, number_of_draws=100, remove_graph=True + # TODO: repetition + # + Test via breakpoints + def predict(self, x, number_of_draws=[100,5], number_of_parameter_chunks = None, + remove_graph=True , take_average_of_prediction=True): """ Average over `number_of_draws` forward passes. If @@ -164,19 +170,46 @@ class FNNEIV(nn.Module): **Note**: This method does neither touch the input noise nor Dropout. The corresponding setting is left to the user! :param x: A torch.tensor, the input - :param number_of_draws: Number of draws to obtain from x + :param number_of_draws: An integer or a list. If an integer + `number_of_draws`, will be converted internally to + `[number_of_draws,1]`.Numbers of draws to obtain from x via parameter + sampling (first element) and noise input sampling (second element). + :param number_of_parameter_chunks: An integer or None (default). If + None, the second element of `number_of_draws` will be taken (and will + thus be identical to 1 if `number_of_draws` is an integer, see above). + Samples in the parameter space will be divided into + `number_of_parameter_chunks` chunks when collected. Can be used to + reduced the memory usage. :param remove_graph: If True (default) the output will be detached to save memory :param take_average_of_prediction: If False, no averaging will be applied to the prediction and the second dimension of the first output will count the number_of_draws. """ - x, = repeat_tensors(x, number_of_draws=number_of_draws) - pred, sigma = self.forward(x) - if remove_graph: - pred, sigma = pred.detach(), sigma.detach() - pred, sigma = reshape_to_chunks(pred, sigma, - number_of_draws=number_of_draws) + # x, = repeat_tensors(x, number_of_draws=number_of_draws) + if type(number_of_draws) is int: + number_of_draws = [number_of_draws, 1] + if number_of_parameter_chunks is None: + number_of_parameter_chunks = number_of_draws[1] + chunk_size = number_of_draws[0] // number_of_parameter_chunks + pred_collection, sigma_collection = [], [] + remaining_draws = number_of_draws[0] + while remaining_draws > 0: + if remaining_draws < chunk_size: + parameter_sample_size = remaining_draws + else: + parameter_sample_size = chunk_size + repeated_x, = repeat_tensors(x, number_of_draws=parameter_sample_size * number_of_draws[1]) + pred, sigma = self.forward(repeated_x, repetition=number_of_draws[1]) + if remove_graph: + pred, sigma = pred.detach(), sigma.detach() + pred, sigma = reshape_to_chunks(pred, sigma, + number_of_draws=parameter_sample_size * number_of_draws[1]) + pred_collection.append(pred) + sigma_collection.append(pred) + remaining_draws -= parameter_sample_size + pred = torch.cat(pred_collection, dim=1) + sigma = torch.cat(sigma_collection, dim=1) # reduce along the draws (actually redundant for sigma) if take_average_of_prediction: pred, sigma = torch.mean(pred, dim=1), torch.mean(sigma, dim=1) @@ -184,7 +217,10 @@ class FNNEIV(nn.Module): sigma = torch.mean(sigma, dim=1) return pred, sigma - def predictive_logdensity(self, x, y, number_of_draws=100, remove_graph=True, + # TODO: repetition + # + Implement + # + Test via breakpoints + def predictive_logdensity(self, x, y, number_of_draws=[100, 5], number_of_parameter_chunks = None, remove_graph=True, average_batch_dimension=True, scale_labels=None, decouple_dimensions=False): """ @@ -193,7 +229,16 @@ class FNNEIV(nn.Module): the batch dimension. :param x: A torch.tensor, the input :param y: A torch.tensor, labels on which to evaluate the density - :param number_of_draws: Number of draws to obtain from x + :param number_of_draws: An integer or a list. If an integer + `number_of_draws`, will be converted internally to + `[number_of_draws,1]`.Numbers of draws to obtain from x via parameter + sampling (first element) and noise input sampling (second element). + :param number_of_parameter_chunks: An integer or None (default). If + None, the second element of `number_of_draws` will be taken (and will + thus be identical to 1 if `number_of_draws` is an integer, see above). + Samples in the parameter space will be divided into + `number_of_parameter_chunks` chunks when collected. Can be used to + reduced the memory usage. :param remove_graph: If True (default) the output will be detached to save memory :param average_batch_dimension: Boolean. If True (default) the values @@ -201,7 +246,9 @@ class FNNEIV(nn.Module): dimension will be left untouched and all values will be returned. """ out, sigmas = self.predict(x, number_of_draws=number_of_draws, - take_average_of_prediction=False, remove_graph=remove_graph) + number_of_parameter_chunks=number_of_parameter_chunks, + remove_graph=remove_graph, + take_average_of_prediction=False) # Add "repetition" dimension to y and out y = y[:,None,...] sigmas = sigmas[:,None,...] diff --git a/Experiments/evaluate_tabular.py b/Experiments/evaluate_tabular.py index d97622dfe48a41b86eb95df92d7ecec5f0c2e3fb..90b972f92148183d73cfdbb7e95cbe0391fc57f3 100644 --- a/Experiments/evaluate_tabular.py +++ b/Experiments/evaluate_tabular.py @@ -20,13 +20,13 @@ train_eiv = importlib.import_module(f'train_eiv_{short_dataname}') train_data, test_data = load_data() test_dataloader = DataLoader(test_data, batch_size=int(np.max((len(test_data), - 800)))) + 64))), shuffle=True) input_dim = train_data[0][0].numel() output_dim = train_data[0][1].numel() def collect_metrics(x,y, seed=0, - noneiv_number_of_draws=500, eiv_number_of_draws=500, - decouple_dimensions=False): + noneiv_number_of_draws=100, eiv_number_of_draws=100, + decouple_dimensions=False, device=torch.device('cuda:1')): """ :param x: A torch.tensor, taken as input :param y: A torch.tensor, taken as output @@ -34,13 +34,14 @@ def collect_metrics(x,y, seed=0, :param noneiv_number_of_draws: Number of draws for non-EiV model for sampling from the posterior predictive. Defaults to 100. :param noneiv_number_of_draws: Number of draws for EiV model - for sampling from the posterior predictive. Defaults to 500. + for sampling from the posterior predictive. Defaults to 100. :param decouple_dimensions: Boolean. If True, the unsual convention of Gal et al. is followed where, in the evaluation of the log-posterior-predictive, each dimension is treated independently and then averaged. If False (default), a multivariate distribution is used. :returns: noneiv_rmse, noneiv_logdens,eiv_rmse, eiv_logdens """ + x,y = x.to(device), y.to(device) init_std_y = train_noneiv.init_std_y_list[0] unscaled_reg = train_noneiv.unscaled_reg p = train_noneiv.p @@ -50,9 +51,9 @@ def collect_metrics(x,y, seed=0, f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ f'_p_{p:.2f}_seed_{seed}.pkl') net = Networks.FNNBer(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim]) + h=[input_dim, *hidden_layers, output_dim]).to(device) train_and_store.open_stored_training(saved_file=saved_file, - net=net) + net=net, device=device) # RMSE @@ -64,10 +65,10 @@ def collect_metrics(x,y, seed=0, y = y.view((-1,1)) assert y.shape == out.shape res = y-out - scale = train_data.dataset.std_labels + scale = train_data.dataset.std_labels.to(device) scaled_res = res * scale.view((1,-1)) scaled_res = scaled_res.detach().cpu().numpy().flatten() - noneiv_rmse = np.sqrt(np.mean(scaled_res**2)) + noneiv_rmse = np.sqrt(np.mean(scaled_res**2)) # NLL @@ -75,7 +76,9 @@ def collect_metrics(x,y, seed=0, net.train() noneiv_logdens = net.predictive_logdensity(x, y, number_of_draws=100, decouple_dimensions=decouple_dimensions, - scale_labels=train_data.dataset.std_labels.view((-1,))).mean() + scale_labels=\ + train_data.dataset.std_labels.view((-1,)).to(device)\ + ).mean().detach().cpu().numpy() if training_state: net.train() else: @@ -88,11 +91,13 @@ def collect_metrics(x,y, seed=0, hidden_layers = train_eiv.hidden_layers fixed_std_x = train_eiv.fixed_std_x saved_file = os.path.join('saved_networks', - f'eiv_{short_dataname}'\ - f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ - f'_p_{p:.2f}_seed_{seed}.pkl') + f'eiv_energy'\ + f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ + f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\ + f'_seed_{seed}.pkl') net = Networks.FNNEIV(p=p, init_std_y=init_std_y, - h=[input_dim, *hidden_layers, output_dim], fixed_std_x=fixed_std_x) + h=[input_dim, *hidden_layers, output_dim], + fixed_std_x=fixed_std_x).to(device) train_and_store.open_stored_training(saved_file=saved_file, net=net) # RMSE @@ -106,10 +111,10 @@ def collect_metrics(x,y, seed=0, y = y.view((-1,1)) assert y.shape == out.shape res = y-out - scale = train_data.dataset.std_labels + scale = train_data.dataset.std_labels.to(device) scaled_res = res * scale.view((1,-1)) scaled_res = scaled_res.detach().cpu().numpy().flatten() - eiv_rmse = np.sqrt(np.mean(scaled_res**2)) + eiv_rmse = np.sqrt(np.mean(scaled_res**2)) if training_state: net.train() else: @@ -125,7 +130,9 @@ def collect_metrics(x,y, seed=0, net.train() eiv_logdens = net.predictive_logdensity(x, y, number_of_draws=100, decouple_dimensions=decouple_dimensions, - scale_labels=train_data.dataset.std_labels.view((-1,))).mean() + scale_labels=\ + train_data.dataset.std_labels.view((-1,)).to(device)\ + ).mean().detach().cpu().numpy() if training_state: net.train() else: @@ -136,19 +143,32 @@ noneiv_rmse_collection = [] noneiv_logdens_collection = [] eiv_rmse_collection = [] eiv_logdens_collection = [] -number_of_samples = 20 -for _ in tqdm(range(number_of_samples)): - x,y = next(iter(test_dataloader)) - noneiv_rmse, noneiv_logdens, eiv_rmse, eiv_logdens = collect_metrics(x,y) - noneiv_rmse_collection.append(noneiv_rmse) - noneiv_logdens_collection.append(noneiv_logdens) - eiv_rmse_collection.append(eiv_rmse) - eiv_logdens_collection.append(eiv_logdens) - - +num_test_epochs = 20 +assert train_noneiv.seed_list == train_eiv.seed_list +seed_list = train_noneiv.seed_list +for seed in tqdm(seed_list): + train_data, test_data = load_data(seed=seed) + test_dataloader = DataLoader(test_data, + batch_size=int(np.max((len(test_data), + 800))), shuffle=True) + for i in tqdm(range(num_test_epochs)): + for x,y in test_dataloader: + noneiv_rmse, noneiv_logdens, eiv_rmse, eiv_logdens =\ + collect_metrics(x,y, seed=seed) + noneiv_rmse_collection.append(noneiv_rmse) + noneiv_logdens_collection.append(noneiv_logdens) + eiv_rmse_collection.append(eiv_rmse) + eiv_logdens_collection.append(eiv_logdens) + + +# TODO: Despite statistics, the fluctuations seem to be large print('Non-EiV') -print(f'RMSE {np.mean(noneiv_rmse_collection):.3f} ({np.std(noneiv_rmse_collection)/np.sqrt(number_of_samples):.3f})') -print(f'LogDens {np.mean(noneiv_logdens_collection):.3f} ({np.std(noneiv_logdens_collection)/np.sqrt(number_of_samples):.3f})') +print(f'RMSE {np.mean(noneiv_rmse_collection):.3f}'\ + f'({np.std(noneiv_rmse_collection)/np.sqrt(num_test_epochs):.3f})') +print(f'LogDens {np.mean(noneiv_logdens_collection):.3f}'\ + f'({np.std(noneiv_logdens_collection)/np.sqrt(num_test_epochs):.3f})') print('EiV') -print(f'RMSE {np.mean(eiv_rmse_collection):.3f} ({np.std(eiv_rmse_collection)/np.sqrt(number_of_samples):.3f})') -print(f'LogDens {np.mean(eiv_logdens_collection):.3f} ({np.std(eiv_logdens_collection)/np.sqrt(number_of_samples):.3f})') +print(f'RMSE {np.mean(eiv_rmse_collection):.3f}'\ + f'({np.std(eiv_rmse_collection)/np.sqrt(num_test_epochs):.3f})') +print(f'LogDens {np.mean(eiv_logdens_collection):.3f}'\ + f'({np.std(eiv_logdens_collection)/np.sqrt(num_test_epochs):.3f})') diff --git a/Experiments/train_eiv_california.py b/Experiments/train_eiv_california.py index 40a1c60d447c801e88c555a53306a8d712d8faf2..39c95fbe0cad3b7f839a8c230e9c54e5bfcde8f3 100644 --- a/Experiments/train_eiv_california.py +++ b/Experiments/train_eiv_california.py @@ -118,7 +118,7 @@ def train_on_data(init_std_y, seed): net.apply(initialize_weights.glorot_init) net = net.to(device) net.std_y_par.requires_grad = False - std_x_map = lambda: 0.0 + std_x_map = lambda: net.get_std_x().detach().cpu().item() std_y_map = lambda: net.get_std_y().detach().cpu().item() # regularization reg = unscaled_reg/len(train_data) diff --git a/Experiments/train_eiv_energy.py b/Experiments/train_eiv_energy.py index a29a4ed1f7ce193c1b3700295e1ba6b30630e3d0..40be4b283ed12849418e6c760618f79a70122bda 100644 --- a/Experiments/train_eiv_energy.py +++ b/Experiments/train_eiv_energy.py @@ -118,7 +118,7 @@ def train_on_data(init_std_y, seed): net.apply(initialize_weights.glorot_init) net = net.to(device) net.std_y_par.requires_grad = False - std_x_map = lambda: 0.0 + std_x_map = lambda: net.get_std_x().detach().cpu().item() std_y_map = lambda: net.get_std_y().detach().cpu().item() # regularization reg = unscaled_reg/len(train_data)