plot_coverage implemented via a data-dependant function

0d072014 · Jörg Martin · 1feac39b · 0d072014 · 0d072014
Commit 0d072014 authored 3 years ago by Jörg Martin
--- a/EIVPackage/EIVGeneral/coverage_collect.py
+++ b/EIVPackage/EIVGeneral/coverage_collect.py
@@ -4,6 +4,7 @@ Collect coverages for various coverage factors, networks and dataloaders.
 import numpy as np
 import torch
 import torch.backends.cudnn
+from EIVGeneral.coverage_metrics import epistemic_coverage
 def get_coverages(not_averaged_predictions, y,\
        q_range=np.linspace(0.1,0.9,num=30)):
@@ -60,6 +61,8 @@ def get_coverage_distribution(net_iterator, dataloader_iterator,
                not_av_pred_collection_out, not_av_pred_collection_sigma,\
                    y_collection = [], [], []
                for i, (x,y) in enumerate(dataloader):
+                    if i>= number_of_test_samples:
+                        break
                    x, y = x.to(device), y.to(device)
                    not_averaged_predictions = net.predict(x,
                            take_average_of_prediction=False,

--- a/Experiments/plot_coverage.py
+++ b/Experiments/plot_coverage.py
@@ -6,7 +6,6 @@ import importlib
 import os
 import json
-import numpy as np
 import torch
 import torch.backends.cudnn
 from torch.utils.data import DataLoader
@@ -18,144 +17,158 @@ from EIVGeneral.coverage_collect import get_coverage_distribution
 from EIVGeneral.manipulate_datasets import VerticalCut
-# read in data via --data option
-data = 'linear'
 # load hyperparameters from JSON file
-with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as conf_file:
+def get_coverages(data, eiv, number_of_draws, use_ground_truth=False):
-    eiv_conf_dict = json.load(conf_file)
+    """
-with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as conf_file:
+    Create network and dataloader iterators for `data` (short dataname) and
-    noneiv_conf_dict = json.load(conf_file)
+    feed them into `get_coverage_distribution`.
+    :data: String, short dataname
+    :eiv: Boolean. If True an EiV model is used, else an non-EiV model.
+    :number_of_draws: Number of draws to use for prediction. Take an int for
+    non-EiV models and a two-element list for EiV models.
+    :use_ground_truth: If True, unnoisy `y` are considered when computing the
+    coverage. If there is no ground truth None,None is returned
+    :returns: numerical_coverage, theoretical_coverage
+    """
+    # load configuration file
+    if eiv:
+        with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as\
+                conf_file:
+            conf_dict = json.load(conf_file)
+    else:
+        with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as\
+                conf_file:
+            conf_dict = json.load(conf_file)
-long_dataname = eiv_conf_dict["long_dataname"]
+    long_dataname = conf_dict["long_dataname"]
-short_dataname = eiv_conf_dict["short_dataname"]
+    short_dataname = conf_dict["short_dataname"]
-print(f"Plotting coverage for {long_dataname}")
+    print(f"Plotting coverage for {long_dataname}")
-scale_outputs = False 
+    load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
-load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
-try:
+    # switch to gpu, if possible
-    gpu_number = eiv_conf_dict["gpu_number"]
-    device = torch.device(f'cuda:{gpu_number}')
    try:
-        torch.tensor([0.0]).to(device)
+        gpu_number = conf_dict["gpu_number"]
-    except RuntimeError:
+        device = torch.device(f'cuda:{gpu_number}')
-        if torch.cuda.is_available():
+        try:
-            print('Switched to GPU 0')
+            torch.tensor([0.0]).to(device)
-            device = torch.device('cuda:0')
+        except RuntimeError:
-        else:
+            if torch.cuda.is_available():
-            print('No cuda available, using CPU')
+                print('Switched to GPU 0')
-            device = torch.device('cpu')
+                device = torch.device('cuda:0')
-except KeyError:
+            else:
-    device = torch.device('cpu')
+                print('No cuda available, using CPU')
+                device = torch.device('cpu')
+    except KeyError:
-# test whether there is a ground truth
+        device = torch.device('cpu')
-try:
-    train_data, test_data, true_train_data, true_test_data \
-            = load_data(seed=0, return_ground_truth=True)
+    if use_ground_truth:
-    ground_truth_exists = True
+        # test whether there is a ground truth
-except TypeError:
+        try:
-    train_data, test_data = load_data(seed=0)
+            train_data, _, _,_  \
-    true_train_data, true_test_data = None, None
+                    = load_data(seed=0, return_ground_truth=True)
-    ground_truth_exists = False
+        except TypeError:
+        # if not, end function
-train_data, test_data = load_data()
+            return None,None
-input_dim = train_data[0][0].numel()
-output_dim = train_data[0][1].numel()
-## Create iterators
-seed_list = range(noneiv_conf_dict["seed_range"][0],
-        noneiv_conf_dict["seed_range"][1])
-# networks
-def net_iterator(eiv=True, seed_list=seed_list):
-    if eiv:
-        init_std_y = eiv_conf_dict["init_std_y_list"][0]
-        unscaled_reg = eiv_conf_dict["unscaled_reg"]
-        p = eiv_conf_dict["p"]
-        hidden_layers = eiv_conf_dict["hidden_layers"]
-        fixed_std_x = eiv_conf_dict["fixed_std_x"]
-        net = Networks.FNNEIV(p=p, init_std_y=init_std_y,
-                h=[input_dim, *hidden_layers, output_dim],
-                fixed_std_x=fixed_std_x).to(device)
-        for seed in seed_list:
-            saved_file = os.path.join('saved_networks',
-                    f'eiv_{short_dataname}'\
-                            f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
-                            f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\
-                            f'_seed_{seed}.pkl')
-            train_and_store.open_stored_training(saved_file=saved_file,
-                    net=net, device=device)
-            yield net
    else:
-        init_std_y = noneiv_conf_dict["init_std_y_list"][0]
+        train_data, _ = load_data()
-        unscaled_reg = noneiv_conf_dict["unscaled_reg"]
-        p = noneiv_conf_dict["p"]
-        hidden_layers = noneiv_conf_dict["hidden_layers"]
+    # train_data only used for finding dimensions 
-        net = Networks.FNNBer(p=p, init_std_y=init_std_y,
+    input_dim = train_data[0][0].numel()
-                h=[input_dim, *hidden_layers, output_dim]).to(device)
+    output_dim = train_data[0][1].numel()
-        for seed in seed_list:
-            saved_file = os.path.join('saved_networks',
+    ## Create iterators for get_coverage_distribution
-                        f'noneiv_{short_dataname}'\
+    seed_list = range(conf_dict["seed_range"][0],
-                                f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+            conf_dict["seed_range"][1])
-                                f'_p_{p:.2f}_seed_{seed}.pkl')
-            train_and_store.open_stored_training(saved_file=saved_file,
+    # iterator for networks
-                    net=net, device=device)
+    def net_iterator(eiv=eiv, seed_list=seed_list):
-            yield net
+        """
+        Yields EiV models (if `eiv`) or
-# dataloaders
+        non-EiV models (if not `eiv`) for the seeds in
-def dataloader_iterator(seed_list=seed_list, use_ground_truth=False,
+        `seed_list` and `data`.
-        batch_size = 100):
+        """
-    for seed in seed_list:
+        if eiv:
-        if not use_ground_truth:
+            # load parameters
-            train_data, test_data = load_data(seed=seed)
+            init_std_y = conf_dict["init_std_y_list"][0]
-            test_dataloader = DataLoader(test_data, 
+            unscaled_reg = conf_dict["unscaled_reg"]
-                    batch_size=batch_size,
+            p = conf_dict["p"]
-                    shuffle=True)
+            hidden_layers = conf_dict["hidden_layers"]
-            yield test_dataloader
+            fixed_std_x = conf_dict["fixed_std_x"]
+            net = Networks.FNNEIV(p=p, init_std_y=init_std_y,
+                    h=[input_dim, *hidden_layers, output_dim],
+                    fixed_std_x=fixed_std_x).to(device)
+            for seed in seed_list:
+                # load network paramaters
+                saved_file = os.path.join('saved_networks',
+                        f'eiv_{short_dataname}'\
+                                f'_init_std_y_{init_std_y:.3f}'\
+                                f'_ureg_{unscaled_reg:.1f}'\
+                                f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\
+                                f'_seed_{seed}.pkl')
+                train_and_store.open_stored_training(saved_file=saved_file,
+                        net=net, device=device)
+                yield net
        else:
-            assert ground_truth_exists
+            # load parameters
-            _, _, _, true_test =\
+            init_std_y = conf_dict["init_std_y_list"][0]
-                    load_data(seed=seed, return_ground_truth=True)
+            unscaled_reg = conf_dict["unscaled_reg"]
-            # take noisy x but unnoisy y
+            p = conf_dict["p"]
-            cut_true_test = VerticalCut(true_test, components_to_pick=[2,1])
+            hidden_layers = conf_dict["hidden_layers"]
-            test_dataloader = DataLoader(cut_true_test, 
+            net = Networks.FNNBer(p=p, init_std_y=init_std_y,
-                    batch_size=batch_size,
+                    h=[input_dim, *hidden_layers, output_dim]).to(device)
-                    shuffle=True)
+            for seed in seed_list:
-            yield test_dataloader
+                saved_file = os.path.join('saved_networks',
+                            f'noneiv_{short_dataname}'\
+                                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                                    f'_p_{p:.2f}_seed_{seed}.pkl')
+                # load network paramaters
-eiv_numerical_coverage, eiv_theoretical_coverage = get_coverage_distribution(
+                train_and_store.open_stored_training(saved_file=saved_file,
-        net_iterator=net_iterator(eiv=True),
+                        net=net, device=device)
-        dataloader_iterator=dataloader_iterator(),
+                yield net
-        device=device,
-        number_of_draws=[100,5])
+    # iterator for dataloaders
-mean_eiv_theoretical_coverage = np.mean(eiv_theoretical_coverage, axis=1)
+    def dataloader_iterator(seed_list=seed_list,use_ground_truth=use_ground_truth,
-std_eiv_theoretical_coverage = np.std(eiv_theoretical_coverage, axis=1)
+            batch_size = 100):
-mean_eiv_numerical_coverage = np.mean(eiv_numerical_coverage, axis=1)
+        """
-std_eiv_numerical_coverage = np.std(eiv_numerical_coverage, axis=1)
+        Yields dataloaders for `data`, according to the seeds in `seed_list`.
-noneiv_numerical_coverage, noneiv_theoretical_coverage = get_coverage_distribution(
+        If `use_ground_truth` the data is cut to contain noisy x and unnoisy y.
-        net_iterator=net_iterator(eiv=False),
+        """
+        for seed in seed_list:
+            if not use_ground_truth:
+                _, test_data = load_data(seed=seed)
+                test_dataloader = DataLoader(test_data, 
+                        batch_size=batch_size,
+                        shuffle=True)
+                yield test_dataloader
+            else:
+                _, _, _, true_test =\
+                        load_data(seed=seed, return_ground_truth=True)
+                # take noisy x but unnoisy y
+                cut_true_test = VerticalCut(true_test,
+                        components_to_pick=[2,1])
+                test_dataloader = DataLoader(cut_true_test, 
+                        batch_size=batch_size,
+                        shuffle=True)
+                yield test_dataloader
+    # Compute coverages
+    numerical_coverage, theoretical_coverage = get_coverage_distribution(
+        net_iterator=net_iterator(eiv=eiv),
        dataloader_iterator=dataloader_iterator(),
        device=device,
-        number_of_draws=100)
+        number_of_draws=number_of_draws)
-mean_noneiv_theoretical_coverage = np.mean(noneiv_theoretical_coverage, axis=1)
+    return numerical_coverage, theoretical_coverage
-std_noneiv_theoretical_coverage = np.std(noneiv_theoretical_coverage, axis=1)
-mean_noneiv_numerical_coverage = np.mean(noneiv_numerical_coverage, axis=1)
-std_noneiv_numerical_coverage = np.std(noneiv_numerical_coverage, axis=1)
-plt.plot(mean_eiv_theoretical_coverage, mean_eiv_numerical_coverage, color='r', label='EiV')
-plt.fill_between(mean_eiv_theoretical_coverage, mean_eiv_numerical_coverage
-        - std_eiv_numerical_coverage,
-        mean_eiv_numerical_coverage + std_eiv_numerical_coverage, color='r', alpha=0.5)
-plt.plot(mean_noneiv_theoretical_coverage, mean_noneiv_numerical_coverage, color='b', label='nonEiV')
-plt.fill_between(mean_noneiv_theoretical_coverage, mean_noneiv_numerical_coverage
-        - std_noneiv_numerical_coverage,
-        mean_noneiv_numerical_coverage + std_noneiv_numerical_coverage, color='b', alpha=0.5)
-diag_x = np.linspace(0, np.max(mean_eiv_numerical_coverage))
-plt.plot(diag_x, diag_x, 'k--')
-plt.show()
+#####
+# numerical_coverage, theoretical_coverage =\
+#     get_coverages(data='quadratic', eiv=True, number_of_draws=100,
+#             use_ground_truth=True)
+# print(numerical_coverage)
+# print(theoretical_coverage)