renamed plot_coverage.py to plot_coverage_vs_q.py

The old file was stored as plot_variety_of_coverage_plots.py

renamed plot_coverage.py to plot_coverage_vs_q.py
c2a8a3c4 · Jörg Martin · d88c989f · c2a8a3c4 · c2a8a3c4 · c2a8a3c4
Commit c2a8a3c4 authored 3 years ago by Jörg Martin
--- a/Experiments/plot_coverage_vs_q.py
+++ b/Experiments/plot_coverage_vs_q.py
+"""
+Compute the coverage for various coverage factors and compare them with 
+with the corresponding q or theoretical coverage. Results will be stored
+in various plots in the results/figures folder.
+"""
+import importlib
+import os
+import json
+
+import numpy as np
+import torch
+import torch.backends.cudnn
+from torch.utils.data import DataLoader
+from matplotlib.pyplot import cm
+import matplotlib.pyplot as plt
+
+from EIVArchitectures import Networks
+from EIVTrainingRoutines import train_and_store
+from EIVGeneral.coverage_collect import get_coverage_distribution
+from EIVGeneral.manipulate_datasets import VerticalCut
+
+# coverages to consider
+q_range = np.linspace(0.1, 0.95)
+
+def coverage_diagonal_plot(eiv_coverages, noneiv_coverages, color,
+        against_theoretical = False, label = '', mean_error=True):
+    """
+    Plot numerical coverages against q (used coverage value), if
+    against_theoretical is False, or the theoretical coverage, if
+    `against_theoretical` is True.
+    :param eiv_coverages: The output of `compute_coverages` with `eiv=True`
+    :param noneiv_coverages: The output of `compute_coverages` with `eiv=False`
+    :param color: String, denoting the color.
+    :param against_theoretical: Boolean, see above.
+    :param label: String. Will be included as label in the plot.
+    :param mean_error: Boolean. If True the standard deviation is divided
+    by the square root of the number of elements, to display the error 
+    of the mean (and not the dispersion).
+    """
+    eiv_numerical_coverage, eiv_theoretical_coverage = eiv_coverages
+    noneiv_numerical_coverage, noneiv_theoretical_coverage = noneiv_coverages
+    assert (len(eiv_numerical_coverage.shape)) == 2
+    assert (len(noneiv_numerical_coverage.shape)) == 2
+    # EiV
+    # take mean/std over seed dimension
+    mean_eiv_numerical_coverage = np.mean(eiv_numerical_coverage, axis=-1) 
+    std_eiv_numerical_coverage = np.std(eiv_numerical_coverage, axis=-1)
+    if mean_error:
+        std_eiv_numerical_coverage /= np.sqrt(eiv_numerical_coverage.shape[1])
+    if against_theoretical:
+        # show theoretical coverage on x-axis
+        x_values = np.mean(eiv_theoretical_coverage, axis=-1)
+    else:
+        # show q-range on x-axis
+        x_values = np.array(q_range)
+    # plot mean
+    plt.plot(x_values, mean_eiv_numerical_coverage,
+            color=color, linestyle='solid', label=label)
+    # plot std
+    plt.fill_between(x_values,
+            mean_eiv_numerical_coverage - std_eiv_numerical_coverage,
+            mean_eiv_numerical_coverage + std_eiv_numerical_coverage, 
+            color=color, alpha=0.5)
+    # non-EiV
+    # take mean/std over seed dimension
+    mean_noneiv_numerical_coverage = np.mean(noneiv_numerical_coverage, axis=-1)
+    std_noneiv_numerical_coverage = np.std(noneiv_numerical_coverage, axis=-1)
+    if mean_error:
+        std_noneiv_numerical_coverage /= \
+                np.sqrt(noneiv_numerical_coverage.shape[1])
+    if against_theoretical:
+        # show theoretical coverage on x-axis
+        x_values = np.mean(noneiv_theoretical_coverage, axis=-1)
+    else:
+        # show q-range on x-axis
+        x_values = np.array(q_range)
+    # plot mean
+    plt.plot(x_values, mean_noneiv_numerical_coverage,
+            color=color, linestyle='dashed')
+    # plot std
+    plt.fill_between(x_values,
+            mean_noneiv_numerical_coverage - std_noneiv_numerical_coverage,
+            mean_noneiv_numerical_coverage + std_noneiv_numerical_coverage,
+            color=color, alpha=0.3)
+
+
+
+# create figures, together with title and axis labels
+plt.figure(1)
+plt.clf()
+plt.title('Coverage for datasets with ground truth')
+plt.xlabel('q')
+plt.ylabel('coverage')
+# datasets to plot and their coloring
+datasets = ['linear', 'quadratic','cubic','sine']
+
+colors = ['cyan', 'magenta', 'yellow', 'green']
+
+def compute_coverages(data, eiv, number_of_draws):
+    """
+    Create network and dataloader iterators for `data` (short dataname) and
+    feed them into `get_coverage_distribution`.
+    :data: String, short dataname
+    :eiv: Boolean. If True an EiV model is used, else an non-EiV model.
+    :number_of_draws: Number of draws to use for prediction. Take an int for
+    non-EiV models and a two-element list for EiV models.
+    :returns: numerical_coverage, theoretical_coverage
+    """
+    # load configuration file
+    if eiv:
+        with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as\
+                conf_file:
+            conf_dict = json.load(conf_file)
+    else:
+        with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as\
+                conf_file:
+            conf_dict = json.load(conf_file)
+
+    long_dataname = conf_dict["long_dataname"]
+    short_dataname = conf_dict["short_dataname"]
+    try:
+        normalize = conf_dict['normalize']
+    except KeyError:
+        # normalize by default
+        normalize = True
+
+
+    load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
+
+    # switch to gpu, if possible
+    try:
+        gpu_number = conf_dict["gpu_number"]
+        device = torch.device(f'cuda:{gpu_number}')
+        try:
+            torch.tensor([0.0]).to(device)
+        except RuntimeError:
+            if torch.cuda.is_available():
+                print('Switched to GPU 0')
+                device = torch.device('cuda:0')
+            else:
+                print('No cuda available, using CPU')
+                device = torch.device('cpu')
+    except KeyError:
+        device = torch.device('cpu')
+
+
+    train_data, _, _,_  \
+            = load_data(seed=0, return_ground_truth=True,
+                    normalize=normalize)
+
+    print(f"Computing {'EiV' if eiv else 'non-EiV'} coverage for {long_dataname}")
+
+    # train_data only used for finding dimensions 
+    input_dim = train_data[0][0].numel()
+    output_dim = train_data[0][1].numel()
+
+    ## Create iterators for get_coverage_distribution
+    seed_list = range(conf_dict["seed_range"][0],
+            conf_dict["seed_range"][1])
+
+    # iterator for networks
+    def net_iterator(eiv=eiv, seed_list=seed_list):
+        """
+        Yields EiV models (if `eiv`) or
+        non-EiV models (if not `eiv`) for the seeds in
+        `seed_list` and `data`.
+        """
+        if eiv:
+            # load parameters
+            init_std_y = conf_dict["init_std_y_list"][0]
+            unscaled_reg = conf_dict["unscaled_reg"]
+            p = conf_dict["p"]
+            hidden_layers = conf_dict["hidden_layers"]
+            fixed_std_x = conf_dict["fixed_std_x"]
+            net = Networks.FNNEIV(p=p, init_std_y=init_std_y,
+                    h=[input_dim, *hidden_layers, output_dim],
+                    fixed_std_x=fixed_std_x).to(device)
+            for seed in seed_list:
+                # load network paramaters
+                saved_file = os.path.join('saved_networks',
+                        f'eiv_{short_dataname}'\
+                                f'_init_std_y_{init_std_y:.3f}'\
+                                f'_ureg_{unscaled_reg:.1f}'\
+                                f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\
+                                f'_seed_{seed}.pkl')
+                train_and_store.open_stored_training(saved_file=saved_file,
+                        net=net, device=device)
+                yield net
+        else:
+            # load parameters
+            init_std_y = conf_dict["init_std_y_list"][0]
+            unscaled_reg = conf_dict["unscaled_reg"]
+            p = conf_dict["p"]
+            hidden_layers = conf_dict["hidden_layers"]
+            net = Networks.FNNBer(p=p, init_std_y=init_std_y,
+                    h=[input_dim, *hidden_layers, output_dim]).to(device)
+            for seed in seed_list:
+                saved_file = os.path.join('saved_networks',
+                            f'noneiv_{short_dataname}'\
+                                    f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
+                                    f'_p_{p:.2f}_seed_{seed}.pkl')
+                # load network paramaters
+                train_and_store.open_stored_training(saved_file=saved_file,
+                        net=net, device=device)
+                yield net
+
+    # iterator for dataloaders
+    def dataloader_iterator(seed_list=seed_list, batch_size = 100):
+        """
+        Yields dataloaders for `data`, according to the seeds in `seed_list`.
+        """
+        for seed in seed_list:
+            _, _, _, true_test =\
+                    load_data(seed=seed, return_ground_truth=True,
+                            normalize=normalize)
+            # take noisy x but unnoisy y
+            cut_true_test = VerticalCut(true_test,
+                    components_to_pick=[2,1])
+            test_dataloader = DataLoader(cut_true_test, 
+                    batch_size=batch_size,
+                    shuffle=True)
+            yield test_dataloader
+
+
+    # Compute coverages
+    numerical_coverage, theoretical_coverage = get_coverage_distribution(
+        net_iterator=net_iterator(eiv=eiv),
+        dataloader_iterator=dataloader_iterator(),
+        device=device,
+        number_of_draws=number_of_draws,
+        q_range=q_range,
+        noisy_y = False)
+    return numerical_coverage, theoretical_coverage
+
+# loop through data
+for data, color in zip(datasets, colors):
+    # compute coverages
+    eiv_coverages = compute_coverages(data=data, eiv=True,
+            number_of_draws=[100,5])
+    noneiv_coverages = compute_coverages(data=data, eiv=False,
+            number_of_draws=100)
+    # create plots
+    plt.figure(1)
+    coverage_diagonal_plot(eiv_coverages, noneiv_coverages, 
+            color=color, against_theoretical=False, label=data)
+
+# add diagonal
+x_diag = np.linspace(0.0, 1.0)
+plt.plot(x_diag, x_diag, color='k', linestyle='dotted' )
+
+# add legend
+plt.legend()
+
+# save and show
+plt.savefig('results/figures/true_coverage_vs_q.pdf')
+plt.show()
--- a/Experiments/plot_prediction.py
+++ b/Experiments/plot_prediction.py
@@ -255,6 +255,7 @@ for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list,
        plt.fill_between(x_values.flatten(), noneiv_pred-k * noneiv_unc,
                noneiv_pred + k * noneiv_unc,
                color=color[1], alpha=0.5)
+        plt.savefig(f'results/figures/prediction_{data}.pdf')
    else:
        # multidimensional handling not included yet
        pass

--- a/Experiments/plot_coverage.py
+++ b/Experiments/plot_coverage.py