Skip to content
Snippets Groups Projects
Commit 0d072014 authored by Jörg Martin's avatar Jörg Martin
Browse files

plot_coverage implemented via a data-dependant function

parent 1feac39b
No related branches found
No related tags found
No related merge requests found
...@@ -4,6 +4,7 @@ Collect coverages for various coverage factors, networks and dataloaders. ...@@ -4,6 +4,7 @@ Collect coverages for various coverage factors, networks and dataloaders.
import numpy as np import numpy as np
import torch import torch
import torch.backends.cudnn import torch.backends.cudnn
from EIVGeneral.coverage_metrics import epistemic_coverage
def get_coverages(not_averaged_predictions, y,\ def get_coverages(not_averaged_predictions, y,\
q_range=np.linspace(0.1,0.9,num=30)): q_range=np.linspace(0.1,0.9,num=30)):
...@@ -60,6 +61,8 @@ def get_coverage_distribution(net_iterator, dataloader_iterator, ...@@ -60,6 +61,8 @@ def get_coverage_distribution(net_iterator, dataloader_iterator,
not_av_pred_collection_out, not_av_pred_collection_sigma,\ not_av_pred_collection_out, not_av_pred_collection_sigma,\
y_collection = [], [], [] y_collection = [], [], []
for i, (x,y) in enumerate(dataloader): for i, (x,y) in enumerate(dataloader):
if i>= number_of_test_samples:
break
x, y = x.to(device), y.to(device) x, y = x.to(device), y.to(device)
not_averaged_predictions = net.predict(x, not_averaged_predictions = net.predict(x,
take_average_of_prediction=False, take_average_of_prediction=False,
......
...@@ -6,7 +6,6 @@ import importlib ...@@ -6,7 +6,6 @@ import importlib
import os import os
import json import json
import numpy as np
import torch import torch
import torch.backends.cudnn import torch.backends.cudnn
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
...@@ -18,144 +17,158 @@ from EIVGeneral.coverage_collect import get_coverage_distribution ...@@ -18,144 +17,158 @@ from EIVGeneral.coverage_collect import get_coverage_distribution
from EIVGeneral.manipulate_datasets import VerticalCut from EIVGeneral.manipulate_datasets import VerticalCut
# read in data via --data option
data = 'linear'
# load hyperparameters from JSON file # load hyperparameters from JSON file
with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as conf_file: def get_coverages(data, eiv, number_of_draws, use_ground_truth=False):
eiv_conf_dict = json.load(conf_file) """
with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as conf_file: Create network and dataloader iterators for `data` (short dataname) and
noneiv_conf_dict = json.load(conf_file) feed them into `get_coverage_distribution`.
:data: String, short dataname
:eiv: Boolean. If True an EiV model is used, else an non-EiV model.
:number_of_draws: Number of draws to use for prediction. Take an int for
non-EiV models and a two-element list for EiV models.
:use_ground_truth: If True, unnoisy `y` are considered when computing the
coverage. If there is no ground truth None,None is returned
:returns: numerical_coverage, theoretical_coverage
"""
# load configuration file
if eiv:
with open(os.path.join('configurations',f'eiv_{data}.json'),'r') as\
conf_file:
conf_dict = json.load(conf_file)
else:
with open(os.path.join('configurations',f'noneiv_{data}.json'),'r') as\
conf_file:
conf_dict = json.load(conf_file)
long_dataname = eiv_conf_dict["long_dataname"] long_dataname = conf_dict["long_dataname"]
short_dataname = eiv_conf_dict["short_dataname"] short_dataname = conf_dict["short_dataname"]
print(f"Plotting coverage for {long_dataname}") print(f"Plotting coverage for {long_dataname}")
scale_outputs = False load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
load_data = importlib.import_module(f'EIVData.{long_dataname}').load_data
try: # switch to gpu, if possible
gpu_number = eiv_conf_dict["gpu_number"]
device = torch.device(f'cuda:{gpu_number}')
try: try:
torch.tensor([0.0]).to(device) gpu_number = conf_dict["gpu_number"]
except RuntimeError: device = torch.device(f'cuda:{gpu_number}')
if torch.cuda.is_available(): try:
print('Switched to GPU 0') torch.tensor([0.0]).to(device)
device = torch.device('cuda:0') except RuntimeError:
else: if torch.cuda.is_available():
print('No cuda available, using CPU') print('Switched to GPU 0')
device = torch.device('cpu') device = torch.device('cuda:0')
except KeyError: else:
device = torch.device('cpu') print('No cuda available, using CPU')
device = torch.device('cpu')
except KeyError:
# test whether there is a ground truth device = torch.device('cpu')
try:
train_data, test_data, true_train_data, true_test_data \
= load_data(seed=0, return_ground_truth=True) if use_ground_truth:
ground_truth_exists = True # test whether there is a ground truth
except TypeError: try:
train_data, test_data = load_data(seed=0) train_data, _, _,_ \
true_train_data, true_test_data = None, None = load_data(seed=0, return_ground_truth=True)
ground_truth_exists = False except TypeError:
# if not, end function
train_data, test_data = load_data() return None,None
input_dim = train_data[0][0].numel()
output_dim = train_data[0][1].numel()
## Create iterators
seed_list = range(noneiv_conf_dict["seed_range"][0],
noneiv_conf_dict["seed_range"][1])
# networks
def net_iterator(eiv=True, seed_list=seed_list):
if eiv:
init_std_y = eiv_conf_dict["init_std_y_list"][0]
unscaled_reg = eiv_conf_dict["unscaled_reg"]
p = eiv_conf_dict["p"]
hidden_layers = eiv_conf_dict["hidden_layers"]
fixed_std_x = eiv_conf_dict["fixed_std_x"]
net = Networks.FNNEIV(p=p, init_std_y=init_std_y,
h=[input_dim, *hidden_layers, output_dim],
fixed_std_x=fixed_std_x).to(device)
for seed in seed_list:
saved_file = os.path.join('saved_networks',
f'eiv_{short_dataname}'\
f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\
f'_seed_{seed}.pkl')
train_and_store.open_stored_training(saved_file=saved_file,
net=net, device=device)
yield net
else: else:
init_std_y = noneiv_conf_dict["init_std_y_list"][0] train_data, _ = load_data()
unscaled_reg = noneiv_conf_dict["unscaled_reg"]
p = noneiv_conf_dict["p"]
hidden_layers = noneiv_conf_dict["hidden_layers"] # train_data only used for finding dimensions
net = Networks.FNNBer(p=p, init_std_y=init_std_y, input_dim = train_data[0][0].numel()
h=[input_dim, *hidden_layers, output_dim]).to(device) output_dim = train_data[0][1].numel()
for seed in seed_list:
saved_file = os.path.join('saved_networks', ## Create iterators for get_coverage_distribution
f'noneiv_{short_dataname}'\ seed_list = range(conf_dict["seed_range"][0],
f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\ conf_dict["seed_range"][1])
f'_p_{p:.2f}_seed_{seed}.pkl')
train_and_store.open_stored_training(saved_file=saved_file, # iterator for networks
net=net, device=device) def net_iterator(eiv=eiv, seed_list=seed_list):
yield net """
Yields EiV models (if `eiv`) or
# dataloaders non-EiV models (if not `eiv`) for the seeds in
def dataloader_iterator(seed_list=seed_list, use_ground_truth=False, `seed_list` and `data`.
batch_size = 100): """
for seed in seed_list: if eiv:
if not use_ground_truth: # load parameters
train_data, test_data = load_data(seed=seed) init_std_y = conf_dict["init_std_y_list"][0]
test_dataloader = DataLoader(test_data, unscaled_reg = conf_dict["unscaled_reg"]
batch_size=batch_size, p = conf_dict["p"]
shuffle=True) hidden_layers = conf_dict["hidden_layers"]
yield test_dataloader fixed_std_x = conf_dict["fixed_std_x"]
net = Networks.FNNEIV(p=p, init_std_y=init_std_y,
h=[input_dim, *hidden_layers, output_dim],
fixed_std_x=fixed_std_x).to(device)
for seed in seed_list:
# load network paramaters
saved_file = os.path.join('saved_networks',
f'eiv_{short_dataname}'\
f'_init_std_y_{init_std_y:.3f}'\
f'_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_fixed_std_x_{fixed_std_x:.3f}'\
f'_seed_{seed}.pkl')
train_and_store.open_stored_training(saved_file=saved_file,
net=net, device=device)
yield net
else: else:
assert ground_truth_exists # load parameters
_, _, _, true_test =\ init_std_y = conf_dict["init_std_y_list"][0]
load_data(seed=seed, return_ground_truth=True) unscaled_reg = conf_dict["unscaled_reg"]
# take noisy x but unnoisy y p = conf_dict["p"]
cut_true_test = VerticalCut(true_test, components_to_pick=[2,1]) hidden_layers = conf_dict["hidden_layers"]
test_dataloader = DataLoader(cut_true_test, net = Networks.FNNBer(p=p, init_std_y=init_std_y,
batch_size=batch_size, h=[input_dim, *hidden_layers, output_dim]).to(device)
shuffle=True) for seed in seed_list:
yield test_dataloader saved_file = os.path.join('saved_networks',
f'noneiv_{short_dataname}'\
f'_init_std_y_{init_std_y:.3f}_ureg_{unscaled_reg:.1f}'\
f'_p_{p:.2f}_seed_{seed}.pkl')
# load network paramaters
eiv_numerical_coverage, eiv_theoretical_coverage = get_coverage_distribution( train_and_store.open_stored_training(saved_file=saved_file,
net_iterator=net_iterator(eiv=True), net=net, device=device)
dataloader_iterator=dataloader_iterator(), yield net
device=device,
number_of_draws=[100,5]) # iterator for dataloaders
mean_eiv_theoretical_coverage = np.mean(eiv_theoretical_coverage, axis=1) def dataloader_iterator(seed_list=seed_list,use_ground_truth=use_ground_truth,
std_eiv_theoretical_coverage = np.std(eiv_theoretical_coverage, axis=1) batch_size = 100):
mean_eiv_numerical_coverage = np.mean(eiv_numerical_coverage, axis=1) """
std_eiv_numerical_coverage = np.std(eiv_numerical_coverage, axis=1) Yields dataloaders for `data`, according to the seeds in `seed_list`.
noneiv_numerical_coverage, noneiv_theoretical_coverage = get_coverage_distribution( If `use_ground_truth` the data is cut to contain noisy x and unnoisy y.
net_iterator=net_iterator(eiv=False), """
for seed in seed_list:
if not use_ground_truth:
_, test_data = load_data(seed=seed)
test_dataloader = DataLoader(test_data,
batch_size=batch_size,
shuffle=True)
yield test_dataloader
else:
_, _, _, true_test =\
load_data(seed=seed, return_ground_truth=True)
# take noisy x but unnoisy y
cut_true_test = VerticalCut(true_test,
components_to_pick=[2,1])
test_dataloader = DataLoader(cut_true_test,
batch_size=batch_size,
shuffle=True)
yield test_dataloader
# Compute coverages
numerical_coverage, theoretical_coverage = get_coverage_distribution(
net_iterator=net_iterator(eiv=eiv),
dataloader_iterator=dataloader_iterator(), dataloader_iterator=dataloader_iterator(),
device=device, device=device,
number_of_draws=100) number_of_draws=number_of_draws)
mean_noneiv_theoretical_coverage = np.mean(noneiv_theoretical_coverage, axis=1) return numerical_coverage, theoretical_coverage
std_noneiv_theoretical_coverage = np.std(noneiv_theoretical_coverage, axis=1)
mean_noneiv_numerical_coverage = np.mean(noneiv_numerical_coverage, axis=1)
std_noneiv_numerical_coverage = np.std(noneiv_numerical_coverage, axis=1)
plt.plot(mean_eiv_theoretical_coverage, mean_eiv_numerical_coverage, color='r', label='EiV')
plt.fill_between(mean_eiv_theoretical_coverage, mean_eiv_numerical_coverage
- std_eiv_numerical_coverage,
mean_eiv_numerical_coverage + std_eiv_numerical_coverage, color='r', alpha=0.5)
plt.plot(mean_noneiv_theoretical_coverage, mean_noneiv_numerical_coverage, color='b', label='nonEiV')
plt.fill_between(mean_noneiv_theoretical_coverage, mean_noneiv_numerical_coverage
- std_noneiv_numerical_coverage,
mean_noneiv_numerical_coverage + std_noneiv_numerical_coverage, color='b', alpha=0.5)
diag_x = np.linspace(0, np.max(mean_eiv_numerical_coverage))
plt.plot(diag_x, diag_x, 'k--')
plt.show()
#####
# numerical_coverage, theoretical_coverage =\
# get_coverages(data='quadratic', eiv=True, number_of_draws=100,
# use_ground_truth=True)
# print(numerical_coverage)
# print(theoretical_coverage)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment