From a9de00e54cfc745c80160464c23563afb37eb4ba Mon Sep 17 00:00:00 2001 From: Joerg Martin <joerg.martin@ptb.de> Date: Wed, 15 Dec 2021 15:51:01 +0000 Subject: [PATCH] Handling of unaccessible GPU added --- Experiments/evaluate_tabular.py | 18 +++++++++++++++++- Experiments/train_eiv.py | 12 ++++++++++-- Experiments/train_noneiv.py | 12 ++++++++++-- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/Experiments/evaluate_tabular.py b/Experiments/evaluate_tabular.py index ba0654f..1cb0fec 100644 --- a/Experiments/evaluate_tabular.py +++ b/Experiments/evaluate_tabular.py @@ -39,9 +39,25 @@ train_data, test_data = load_data() input_dim = train_data[0][0].numel() output_dim = train_data[0][1].numel() +try: + gpu_number = eiv_conf_dict["gpu_number"] + device = torch.device(f'cuda:{gpu_number}') + try: + torch.tensor([0.0]).to(device) + except RuntimeError: + if torch.cuda.is_available(): + print('Switched to GPU 0') + device = torch.device('cuda:0') + else: + print('No cuda available, using CPU') + device = torch.device('cpu') +except KeyError: + device = torch.device('cpu') + + def collect_metrics(x,y, seed=0, noneiv_number_of_draws=100, eiv_number_of_draws=[100,5], - decouple_dimensions=False, device=torch.device('cuda:1'), + decouple_dimensions=False, device=device, scale_outputs=scale_outputs): """ Compute various metrics for EiV and non-EiV. Will be returned as diff --git a/Experiments/train_eiv.py b/Experiments/train_eiv.py index eb8d9d8..6822946 100644 --- a/Experiments/train_eiv.py +++ b/Experiments/train_eiv.py @@ -55,8 +55,16 @@ print(f"Training on {long_dataname} data") try: gpu_number = conf_dict["gpu_number"] - device = torch.device(f'cuda:{gpu_number}' if torch.cuda.is_available() - else 'cpu') + device = torch.device(f'cuda:{gpu_number}') + try: + torch.tensor([0.0]).to(device) + except RuntimeError: + if torch.cuda.is_available(): + print('Switched to GPU 0') + device = torch.device('cuda:0') + else: + print('No cuda available, using CPU') + device = torch.device('cpu') except KeyError: device = torch.device('cpu') diff --git a/Experiments/train_noneiv.py b/Experiments/train_noneiv.py index 316dda1..ee5687b 100644 --- a/Experiments/train_noneiv.py +++ b/Experiments/train_noneiv.py @@ -54,8 +54,16 @@ print(f"Training on {long_dataname} data") try: gpu_number = conf_dict["gpu_number"] - device = torch.device(f'cuda:{gpu_number}' if torch.cuda.is_available() - else 'cpu') + device = torch.device(f'cuda:{gpu_number}') + try: + torch.tensor([0.0]).to(device) + except RuntimeError: + if torch.cuda.is_available(): + print('Switched to GPU 0') + device = torch.device('cuda:0') + else: + print('No cuda available, using CPU') + device = torch.device('cpu') except KeyError: device = torch.device('cpu') -- GitLab