Skip to content
Snippets Groups Projects
Commit c2a8a3c4 authored by Jörg Martin's avatar Jörg Martin
Browse files

renamed plot_coverage.py to plot_coverage_vs_q.py

The old file was stored as plot_variety_of_coverage_plots.py
parent d88c989f
No related branches found
No related tags found
No related merge requests found
"""
Compute the coverage for various coverage factors and compare them with
with the corresponding q or theoretical coverage. Results will be stored
in various plots in the results/figures folder.
"""
import importlib
import os
import json
import numpy as np
import torch
import torch.backends.cudnn
from torch.utils.data import DataLoader
from matplotlib.pyplot import cm
import matplotlib.pyplot as plt
from EIVArchitectures import Networks
from EIVTrainingRoutines import train_and_store
from EIVGeneral.coverage_collect import get_coverage_distribution
from EIVGeneral.manipulate_datasets import VerticalCut
# coverages to consider
q_range = np.linspace(0.1, 0.95)
def coverage_diagonal_plot(eiv_coverages, noneiv_coverages, color,
against_theoretical = False, label = '', mean_error=True):
"""
Plot numerical coverages against q (used coverage value), if
against_theoretical is False, or the theoretical coverage, if
`against_theoretical` is True.
:param eiv_coverages: The output of `compute_coverages` with `eiv=True`
:param noneiv_coverages: The output of `compute_coverages` with `eiv=False`
:param color: String, denoting the color.
:param against_theoretical: Boolean, see above.
:param label: String. Will be included as label in the plot.
:param mean_error: Boolean. If True the standard deviation is divided
by the square root of the number of elements, to display the error
of the mean (and not the dispersion).
"""
eiv_numerical_coverage, eiv_theoretical_coverage = eiv_coverages
noneiv_numerical_coverage, noneiv_theoretical_coverage = noneiv_coverages
assert (len(eiv_numerical_coverage.shape)) == 2
assert (len(noneiv_numerical_coverage.shape)) == 2
# EiV
# take mean/std over seed dimension
mean_eiv_numerical_coverage = np.mean(eiv_numerical_coverage, axis=-1)
std_eiv_numerical_coverage = np.std(eiv_numerical_coverage, axis=-1)
if mean_error:
std_eiv_numerical_coverage /= np.sqrt(eiv_numerical_coverage.shape[1])
if against_theoretical:
# show theoretical coverage on x-axis
x_values = np.mean(eiv_theoretical_coverage, axis=-1)
else:
# show q-range on x-axis
x_values = np.array(q_range)
# plot mean
plt.plot(x_values, mean_eiv_numerical_coverage,
color=color, linestyle='solid', label=label)
# plot std
plt.fill_between(x_values,
mean_eiv_numerical_coverage - std_eiv_numerical_coverage,
mean_eiv_numerical_coverage + std_eiv_numerical_coverage,
color=color, alpha=0.5)
# non-EiV
# take mean/std over seed dimension
mean_noneiv_numerical_coverage = np.mean(noneiv_numerical_coverage, axis=-1)
std_noneiv_numerical_coverage = np.std(noneiv_numerical_coverage, axis=-1)
if mean_error:
std_noneiv_numerical_coverage /= \
np.sqrt(noneiv_numerical_coverage.shape[1])
if against_theoretical:
# show theoretical coverage on x-axis
x_values = np.mean(noneiv_theoretical_coverage, axis=-1)
else:
# show q-range on x-axis
x_values = np.array(q_range)
# plot mean
plt.plot(x_values, mean_noneiv_numerical_coverage,
color=color, linestyle='dashed')
# plot std
plt.fill_between(x_values,
mean_noneiv_numerical_coverage - std_noneiv_numerical_coverage,
mean_noneiv_numerical_coverage + std_noneiv_numerical_coverage,
color=color, alpha=0.3)
# create figures, together with title and axis labels
plt.figure(1)
plt.clf()
plt.title('Coverage for datasets with ground truth')
plt.xlabel('q')
plt.ylabel('coverage')
# datasets to plot and their coloring
datasets = ['linear', 'quadratic','cubic','sine']
colors = ['cyan', 'magenta', 'yellow', 'green']
def compute_coverages(data, eiv, number_of_draws):
"""
Create network and dataloader iterators for `data` (short dataname) and
feed them into `get_coverage_distribution`.
:data: String, short dataname
:eiv: Boolean. If True an EiV model is used, else an non-EiV model.
:number_of_draws: Number of draws to use for prediction. Take an int for
non-EiV models and a two-element list for EiV models.
:returns: numerical_coverage, theoretical_coverage
"""
# load configuration file
if eiv:
with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as\
conf_file:
conf_dict = json.load(conf_file)
else:
with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as\
conf_file:
conf_dict = json.load(conf_file)
long_dataname = conf_dict["long_dataname"]
short_dataname = conf_dict["short_dataname"]
try:
normalize = conf_dict['normalize']
except KeyError:
# normalize by default
normalize = True
load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
# switch to gpu, if possible
try:
gpu_number = conf_dict["gpu_number"]
device = torch.device(f'cuda:{gpu_number}')
try:
torch.tensor([0.0]).to(device)
except RuntimeError:
if torch.cuda.is_available():
print('Switched to GPU 0')
device = torch.device('cuda:0')
else:
print('No cuda available, using CPU')
device = torch.device('cpu')
except KeyError:
device = torch.device('cpu')
train_data, _, _,_ \
= load_data(seed=0, return_ground_truth=True,
normalize=normalize)
print(f"Computing {'EiV' if eiv else 'non-EiV'} coverage for {long_dataname}")
# train_data only used for finding dimensions
input_dim = train_data[0][0].numel()
output_dim = train_data[0][1].numel()
## Create iterators for get_coverage_distribution
seed_list = range(conf_dict["seed_range"][0],
conf_dict["seed_range"][1])
# iterator for networks
def net_iterator(eiv=eiv, seed_list=seed_list):
"""
Yields EiV models (if `eiv`) or
non-EiV models (if not `eiv`) for the seeds in
`seed_list` and `data`.
"""
if eiv:
# load parameters
init_std_y = conf_dict["init_std_y_list"][0]
unscaled_reg = conf_dict["unscaled_reg"]
p = conf_dict["p"]
hidden_layers = conf_dict["hidden_layers"]
fixed_std_x = conf_dict["fixed_std_x"]
net = Networks.FNNEIV(p=p, init_std_y=init_std_y,
h=[input_dim, *hidden_layers, output_dim],
fixed_std_x=fixed_std_x).to(device)
for seed in seed_list:
# load network paramaters
saved_file = os.path.join('saved_networks',
f'eiv_{short_dataname}'\
f'_init_std_y_{init_std_y:.3f}'\
f'_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\
f'_seed_{seed}.pkl')
train_and_store.open_stored_training(saved_file=saved_file,
net=net, device=device)
yield net
else:
# load parameters
init_std_y = conf_dict["init_std_y_list"][0]
unscaled_reg = conf_dict["unscaled_reg"]
p = conf_dict["p"]
hidden_layers = conf_dict["hidden_layers"]
net = Networks.FNNBer(p=p, init_std_y=init_std_y,
h=[input_dim, *hidden_layers, output_dim]).to(device)
for seed in seed_list:
saved_file = os.path.join('saved_networks',
f'noneiv_{short_dataname}'\
f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_seed_{seed}.pkl')
# load network paramaters
train_and_store.open_stored_training(saved_file=saved_file,
net=net, device=device)
yield net
# iterator for dataloaders
def dataloader_iterator(seed_list=seed_list, batch_size = 100):
"""
Yields dataloaders for `data`, according to the seeds in `seed_list`.
"""
for seed in seed_list:
_, _, _, true_test =\
load_data(seed=seed, return_ground_truth=True,
normalize=normalize)
# take noisy x but unnoisy y
cut_true_test = VerticalCut(true_test,
components_to_pick=[2,1])
test_dataloader = DataLoader(cut_true_test,
batch_size=batch_size,
shuffle=True)
yield test_dataloader
# Compute coverages
numerical_coverage, theoretical_coverage = get_coverage_distribution(
net_iterator=net_iterator(eiv=eiv),
dataloader_iterator=dataloader_iterator(),
device=device,
number_of_draws=number_of_draws,
q_range=q_range,
noisy_y = False)
return numerical_coverage, theoretical_coverage
# loop through data
for data, color in zip(datasets, colors):
# compute coverages
eiv_coverages = compute_coverages(data=data, eiv=True,
number_of_draws=[100,5])
noneiv_coverages = compute_coverages(data=data, eiv=False,
number_of_draws=100)
# create plots
plt.figure(1)
coverage_diagonal_plot(eiv_coverages, noneiv_coverages,
color=color, against_theoretical=False, label=data)
# add diagonal
x_diag = np.linspace(0.0, 1.0)
plt.plot(x_diag, x_diag, color='k', linestyle='dotted' )
# add legend
plt.legend()
# save and show
plt.savefig('results/figures/true_coverage_vs_q.pdf')
plt.show()
......@@ -255,6 +255,7 @@ for i, (data, x_range, color, number_of_draws) in enumerate(zip(data_list,
plt.fill_between(x_values.flatten(), noneiv_pred-k * noneiv_unc,
noneiv_pred + k * noneiv_unc,
color=color[1], alpha=0.5)
plt.savefig(f'results/figures/prediction_{data}.pdf')
else:
# multidimensional handling not included yet
pass
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment