Skip to content
Snippets Groups Projects
Commit bb7bb595 authored by Manuel Marschall's avatar Manuel Marschall
Browse files

update rejection sampling to take desired number of samples as input

parent 9af8e2bb
Branches
No related tags found
No related merge requests found
Pipeline #3796 passed with warnings
......@@ -71,7 +71,7 @@ def main():
A simple generic example with a linear measurement model.
This function is used to generate the case distinction plot in the manuscript.
"""
n_samples = int(1e7)
n_samples = int(1e6)
do_mcmc = False
experiment = "data/generic/"
options = {
......@@ -112,9 +112,7 @@ def main():
# y_prior = model["variables"]["Y"]["samples"]
y_gum_mean, y_gum_std = gum_reference(1, model=model)
rejection_result = gum_rejection(model, measurement_model, forward_model)
print(f"actual acceptance rate: {len(rejection_result['Y_accept'])/n_samples*100:.4g}%")
rejection_result = gum_rejection(model, measurement_model, forward_model, n_samples)
fig = plot_x(model["xbar"], model["s"], rejection_result["X_samples"], xlim=(0.7, 1.5), ylim=(0, 10),
fontsize=22)
......
......@@ -28,7 +28,7 @@ import os
import json
import numpy as np
from utility import (gum_s1_reference, gum_reference, gum_rejection)
from plot_utility import (plot_x, plot_result_pdfs)
from plot_utility import plot_result_pdfs
from models import independent_model
......@@ -146,11 +146,15 @@ def generate(experiment: str, n_samples: int, bootstrap: int):
# Here starts the computation
options = get_options(nu0, k0_guess)
model = independent_model(options, measurement_model, forward_model, n_samples, measurements=measurements)
model = independent_model(options, measurement_model, forward_model, int(1e8), measurements=measurements)
res_dict = {
"mean": [],
"GUMS1_mean": [],
"GUM_mean": [],
"unc": [],
"GUMS1_unc": [],
"GUM_unc": [],
"q025": [],
"q975": [],
"acc_rate": [],
......@@ -161,13 +165,12 @@ def generate(experiment: str, n_samples: int, bootstrap: int):
for run in range(bootstrap):
print(f"Run experiment: {experiment_run} bootstrap:{run+1}/{bootstrap}")
rejection_result = gum_rejection(model, measurement_model, forward_model)
rejection_result = gum_rejection(model, measurement_model, forward_model, n_samples, proposals=1e8)
print(f"actual acceptance rate: {len(rejection_result['Y_accept'])/n_samples*100:.4g}%")
res_dict["acc_rate"].append(len(rejection_result["Y_accept"])/n_samples*100) # type: ignore
res_dict["acc_rate"].append(rejection_result["acc_rate"]) # type: ignore
fig = plot_x(model["xbar"], model["s"], rejection_result["X_samples"])
fig.savefig(experiment_run + "x_samples.pdf")
# fig = plot_x(model["xbar"], model["s"], rejection_result["X_samples"])
# fig.savefig(experiment_run + "x_samples.pdf")
result_dict = {
"PRIOR": model["variables"]["Y"]["samples"],
"REJECTION": rejection_result["Y_accept"],
......@@ -175,22 +178,16 @@ def generate(experiment: str, n_samples: int, bootstrap: int):
"GUMS1": gum_s1_reference(model)
}
# if False: # no MCMC run needed here for computation time reasons.
# sampler = mcmc_reference(model, nwalker=32, nsamples=10000)
# fig = plot_mcmc_chains(sampler.get_chain(), labels=[key for key, value in model["variables"].items()])
# fig.savefig(experiment_run + "mcmc.png")
# # Adjust the MCMC sampler chains to discard ~ 2xtau and thin by tau/2
# tau = sampler.get_autocorr_time()
# print(tau)
# result_dict["MCMC"] = sampler.get_chain(discard=100, flat=True)[:, 0]
fig = plot_result_pdfs(result_dict)
fig.savefig(experiment_run + "result.pdf")
print(f"posterior mean: {np.mean(rejection_result['Y_accept']):.4e}")
print(f"posterior uncertainty: {np.std(rejection_result['Y_accept'], ddof=1):.4e}")
res_dict["mean"].append(np.mean(rejection_result["Y_accept"])) # type: ignore
res_dict["GUM_mean"].append(result_dict["GUM"]["mean"]) # type: ignore
res_dict["GUMS1_mean"].append(np.mean(result_dict["GUMS1"])) # type: ignore
res_dict["GUM_unc"].append(result_dict["GUM"]["uncertainty"]) # type: ignore
res_dict["GUMS1_unc"].append(np.std(result_dict["GUMS1"])) # type: ignore
res_dict["unc"].append(np.std(rejection_result["Y_accept"], ddof=1)) # type: ignore
res_dict["q025"].append(np.percentile(rejection_result["Y_accept"], 2.5)) # type: ignore
res_dict["q975"].append(np.percentile(rejection_result["Y_accept"], 97.5)) # type: ignore
......@@ -206,21 +203,21 @@ def validate(experiment: str):
Arguments:
experiment {str} -- location of json files containing run results
"""
print("k0 n0 mean unc acc rate")
print("k0 n0 mean unc acc rate GUMS1 mean"
" GUMS1 unc")
for nu0, k0_guess in zip([3, 10, 3, 10, 10, 30], [3, 3, 10, 10, 1, 1]):
experiment_run = experiment + f"nu_{nu0}_k0_{k0_guess}/"
if os.path.exists(experiment_run + "result.dat"):
with open(experiment_run + "result.dat", "r") as file_p:
res = json.load(file_p)
mean = np.mean(res["mean"])
mean_std = np.std(res['mean'], ddof=1)
unc = np.mean(res["unc"])
unc_std = np.std(res['unc'], ddof=1)
acc_rate = np.mean(res["acc_rate"])
print(f"{res['k0']} {res['nu0']} {mean:.5g} +- {mean_std:.2e} {unc:.5g} +- {unc_std:.2e}"
f" {acc_rate:.4e}")
print(f"{res['k0']} {res['nu0']}"
f" {np.mean(res['mean']):.5g} +- {np.std(res['mean'], ddof=1):.2e}"
f" {np.mean(res['unc']):.5g} +- {np.std(res['unc'], ddof=1):.2e}"
f" {np.mean(res['acc_rate']):.4e}"
f" {np.mean(res['GUMS1_mean']):.5g} +- {np.std(res['GUMS1_mean'], ddof=1):.2e}"
f" {np.mean(res['GUMS1_unc']):.5g} +- {np.std(res['GUMS1_unc'], ddof=1):.2e}")
if __name__ == "__main__":
generate(experiment="data/mass_dem/", n_samples=int(1e8), bootstrap=10)
generate(experiment="data/mass_dem/", n_samples=int(5e4), bootstrap=10)
validate(experiment="data/mass_dem/")
......@@ -133,7 +133,7 @@ def main():
"""
Script to evaluate the mass calibration example from the manuscript Section 4.2
"""
n_samples = int(1e8)
n_samples = int(1e6)
experiment = "data/mass_sb/"
if not os.path.exists(experiment):
os.makedirs(experiment)
......@@ -146,9 +146,7 @@ def main():
y_gum_mean, y_gum_std = gum_reference(1, model=model)
rejection_result = gum_rejection(model, measurement_model, forward_model)
print(f"actual acceptance rate: {len(rejection_result['Y_accept'])/n_samples*100:.4g}%")
rejection_result = gum_rejection(model, measurement_model, forward_model, n_samples)
fig = plot_x(model["xbar"], model["s"], rejection_result["X_samples"])
# plt.title(f"Y mean offset: {m_off:.4g}")
......
......@@ -148,9 +148,9 @@ def main():
"""
A small script that implement the resistor example from Section 4.1 of the manuscript.
"""
n_samples = int(1e8)
n_samples = int(1e6)
experiment = "data/resistor/"
do_mcmc = True
do_mcmc = False
measurement = [
1.0000104,
1.0000107,
......@@ -175,9 +175,7 @@ def main():
y_gum_mean, y_gum_std = gum_reference(10000, model=model)
rejection_result = gum_rejection(model, measurement_model, forward_model)
print(f"actual acceptance rate: {len(rejection_result['Y_accept'])/n_samples*100:.4g}%")
rejection_result = gum_rejection(model, measurement_model, forward_model, n_samples)
fig = plot_x(model["xbar"], model["s"], rejection_result["X_samples"], xlim=((8e-6)+1, (13e-6)+1))
# plt.title(f"Y mean offset: {m_off:.4g}")
......
......@@ -49,21 +49,25 @@ def get_rv(opt: dict, n_samples: int) -> dict:
retval["pdf"] = lambda _x: norm.pdf(_x, loc=opt["mean"], scale=opt["uncertainty"])
retval["logpdf"] = lambda _x: norm.logpdf(_x, loc=opt["mean"], scale=opt["uncertainty"])
retval["bounds"] = (-np.inf, np.inf)
retval["rv"] = norm(loc=opt["mean"], scale=opt["uncertainty"])
elif opt["class"] == "rectangular":
retval["samples"] = uniform.rvs(loc=opt["a"], scale=opt["b"]-opt["a"], size=n_samples)
retval["pdf"] = lambda _x: uniform.pdf(_x, loc=opt["a"], scale=opt["b"]-opt["a"])
retval["logpdf"] = lambda _x: uniform.logpdf(_x, loc=opt["a"], scale=opt["b"]-opt["a"])
retval["bounds"] = (opt["a"], opt["b"])
retval["rv"] = uniform(loc=opt["a"], scale=opt["b"]-opt["a"])
elif opt["class"] == "triangular":
retval["samples"] = triang.rvs(opt["c"], loc=opt["a"], scale=opt["scale"], size=n_samples)
retval["pdf"] = lambda _x: triang.pdf(_x, opt["c"], loc=opt["a"], scale=opt["scal"])
retval["logpdf"] = lambda _x: triang.logpdf(_x, opt["c"], loc=opt["a"], scale=opt["scale"])
retval["bounds"] = (opt["a"], opt["a"] + opt["scale"])
retval["rv"] = triang(opt["c"], loc=opt["a"], scale=opt["scale"])
elif opt["class"] == "tscale":
retval["samples"] = t.rvs(loc=opt["mean"], scale=opt["scale"], df=opt["df"], size=n_samples)
retval["pdf"] = lambda _x: t.pdf(_x, loc=opt["mean"], scale=opt["scale"], df=opt["df"])
retval["logpdf"] = lambda _x: t.logpdf(_x, loc=opt["mean"], scale=opt["scale"], df=opt["df"])
retval["bounds"] = (-np.inf, np.inf)
retval["rv"] = t(loc=opt["mean"], scale=opt["scale"], df=opt["df"])
elif opt["class"] == "non-informative":
retval["samples"] = np.zeros(n_samples)
retval["pdf"] = lambda _x: 1
......@@ -118,12 +122,12 @@ def _class_inversegamma(options: dict,
assert "k0" in y_options
# We disable pylint here to be more consistent with the paper notation
k0 = y_options["k0"] # pylint: disable=invalid-name
mu0 = y_options["mean"] # pylint: disable=invalid-name
mu0 = y_options["mean"]
n = options["N"] # pylint: disable=invalid-name
a0 = sigma["a0"] # pylint: disable=invalid-name
b0 = sigma["b0"] # pylint: disable=invalid-name
mu_n = (k0*mu0 + n*options["xbar"])/(k0 + n) # pylint: disable=invalid-name
mu_n = (k0*mu0 + n*options["xbar"])/(k0 + n)
kn = k0 + n # pylint: disable=invalid-name
an = a0 + 0.5*n # pylint: disable=invalid-name
bn = b0 + 0.5*(n-1)*options["s"]**2 + \
......@@ -150,7 +154,7 @@ def _class_inversechisq(options: dict,
Returns:
tuple -- likelihood and log-likelihood
"""
nu0 = sigma["nu0"] # pylint: disable=invalid-name
nu0 = sigma["nu0"]
s0 = sigma["s0"] # pylint: disable=invalid-name
n = options["N"] # pylint: disable=invalid-name
sigmahat = np.sqrt(((n-1)*options["s"]**2 + nu0*s0**2)/(n+nu0+1))
......@@ -219,7 +223,7 @@ def _class_normalinversechisq(options: dict,
assert "nu0" in sigma
assert "s0" in sigma
n = options["N"] # pylint: disable=invalid-name
nu0 = sigma["nu0"] # pylint: disable=invalid-name
nu0 = sigma["nu0"]
s0 = sigma["s0"] # pylint: disable=invalid-name
llambda = y_options["lambda"]
y0 = y_options["mean"] # pylint: disable=invalid-name
......@@ -266,7 +270,7 @@ def independent_model(options: dict,
retval["variables"] = {}
retval["variables"]["Y"] = get_rv(y_options, n_samples)
retval["n_samples"] = n_samples
# We ignore the typing here since dicts with different data types are not trivial to check
retval["f"] = measurement_model
retval["g"] = forward_model
# add all other items of Y to its new dictionary
......
......@@ -25,6 +25,8 @@
"""
from typing import Any, Union, Optional
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt # type: ignore
import numpy as np
from scipy.stats import norm # type: ignore
......
......@@ -24,12 +24,13 @@
# for any direct, indirect or consequential damage arising in connection
"""
from typing import Union, Callable
from typing import Union, Callable, Any
import numpy as np
from scipy.stats import t as student_t # type: ignore
from scipy.stats import norm # type: ignore
from scipy.integrate import quad # type: ignore
import emcee # type: ignore
import gc
def get_pdf_range(samples: Union[list, np.ndarray],
......@@ -145,7 +146,6 @@ def gum_reference(x_sensitivity: float,
def get_samplesize(samples: Union[np.ndarray, list, dict]) -> int:
"""
Prepare a given set of samples to return its shape
TODO: rename function - it just counts number of samples
Arguments:
samples {Union[np.ndarray, list, dict]} -- Given samples in various formats
......@@ -214,66 +214,91 @@ def gum_s1_reference(model: dict) -> Union[list, np.ndarray]:
def gum_rejection(model: dict,
measurement_model: Callable,
forward_model: Callable) -> dict:
forward_model: Callable,
desired_samples: Union[int, float],
alpha: float = 0.95,
proposals: Union[int, float] = 10000000) -> dict:
"""
Rejection sampler approach. Takes the whole model information and the
forward model $g$, as well as the measurement model $f$ to generate samples
according to the posterior distribution
according to the posterior distribution.
For an a priori check if the approach is applicable, we check whether the
[alpha/2, 1-alpha/2] coverage interval of the prior pi(x) falls into the
same interval of the data distribution.
Arguments:
model {dict} -- model as created in `model.independent_model`
measurement_model {Callable} -- measurement model
forward_model {Callable} -- forward model
desired_samples {Union[int, float]} -- number of desired accepted samples
alpha {float} -- alpha level for coverage interval {default:0.95}
proposals {Union[int, float]} -- number of samples to draw per iteration {default:1e7}
Returns:
dict -- Result dictionary containing accepted samples
"""
# pylint: disable=too-many-arguments
# Sample from the prior
sample_dict = {}
keys = []
means = []
for key, value in model["variables"].items():
keys.append(key)
sample_dict[key] = value["samples"]
means.append(np.mean(value["samples"]))
# generate samples from the prior for X
x_samples = forward_model(**sample_dict)
# check if the algorithm is applicable
if np.percentile(x_samples, 97.5) < model["xbar"] - 1.96*model["s"] or \
np.percentile(x_samples, 2.5) > model["xbar"] + 1.96*model["s"]:
print("Prior 95% CI does not contain data --> the approach is not applicable")
# raise ValueError("Prior 95% CI does not contain data --> the approach is not applicable")
# pass
# Statistics of measurements
if "likeYZ" in model:
like_values = model["likeYZ"](**sample_dict) / model["variables"]["Y"]["pdf"](sample_dict["Y"])
# normalization = 0 # TODO # pylint: disable=fixme
else:
# pos_x, grid_x = get_hist_data(x_samples, bins=100, density=True)
# like_px = model["like"](grid_x)
# normalization = trapz(pos_x*like_px, x=grid_x)
# print(f"normalization={normalization}")
like_values = model["like"](x_samples)
supremum_m = np.max(like_values)
print(f"M value: {supremum_m}")
assert np.all(like_values <= supremum_m)
# rejection sampling according to likelihood `like(y, z)`
accepted = np.random.uniform(size=model["n_samples"]) < (like_values/supremum_m)
accepted_variables = {}
for key, value in model["variables"].items():
accepted_variables[key] = value["samples"][accepted]
retval = {
"X_accept": x_samples[accepted],
"Y_accept": measurement_model(X=x_samples[accepted], **accepted_variables),
"Z_accept": accepted_variables,
"M": supremum_m,
# "Z": normalization,
"X_samples": x_samples,
"like_values": like_values
}
n_samples = int(desired_samples)
assert n_samples >= 1
retval = dict[str, Any]()
retval["X_accept"] = []
retval["Y_accept"] = []
retval["Z_accept"] = dict[str, Any]()
retval["M"] = -1
retval["X_samples"] = []
retval["like_values"] = []
retval["acc_rate"] = []
n_accepted = 0
while n_accepted < n_samples:
sample_dict = {}
for key, value in model["variables"].items():
sample_dict[key] = value["rv"].rvs(int(proposals))
# generate samples from the prior for X
x_samples = forward_model(**sample_dict)
if len(retval["X_samples"]) < 1e8:
retval["X_samples"] = np.concatenate([retval["X_samples"], x_samples])
# check if the algorithm is applicable
if np.quantile(x_samples, 1-(1-alpha)/2) < norm(model["xbar"], model["s"]).ppf((1-alpha)/2) or \
np.quantile(x_samples, (1-alpha)/2) > norm(model["xbar"], model["s"]).ppf(1-(1-alpha)/2):
print("WARNING! Prior 95% CI does not contain data --> the approach is not applicable")
# raise ValueError("Prior 95% CI does not contain data --> the approach is not applicable")
# pass
# Statistics of measurements
if "likeYZ" in model:
like_values = model["likeYZ"](**sample_dict) / model["variables"]["Y"]["pdf"](sample_dict["Y"])
else:
like_values = model["like"](x_samples)
retval["M"] = np.max([float(np.max(like_values)), retval["M"]])
assert np.all(like_values <= retval["M"])
# rejection sampling according to likelihood `like(y, z)`
accepted = np.random.uniform(size=int(proposals)) < (like_values/retval["M"])
for key, value in model["variables"].items():
if key == "Y":
continue
if key not in retval["Z_accept"]:
retval["Z_accept"][key] = []
retval["Z_accept"][key].append(sample_dict[key][accepted])
n_accepted = n_accepted + len(x_samples[accepted])
retval["X_accept"].append(x_samples[accepted])
retval["acc_rate"].append(len(x_samples[accepted])/int(proposals)*100)
print(f"Accepted samples: {n_accepted}/{n_samples}, "
f"Acceptance rate: {len(x_samples[accepted])/int(proposals)*100:.6f}%")
retval["like_values"].append(like_values)
gc.collect()
for key, value in retval["Z_accept"].items():
if key == "Y":
continue
retval["Z_accept"][key] = np.concatenate(value)[:n_samples]
retval["X_accept"] = np.concatenate(retval["X_accept"])[:n_samples]
retval["like_values"] = np.concatenate(retval["like_values"])
retval["Y_accept"] = measurement_model(X=retval["X_accept"], **retval["Z_accept"])[:n_samples]
retval["X_samples"] = retval["X_samples"][:int(1e8)]
retval["acc_rate"] = np.mean(retval["acc_rate"])
return retval
......
......@@ -41,43 +41,56 @@ def main(n_samples: int):
"""
if not os.path.exists("data/sampler_visualization"):
os.makedirs("data/sampler_visualization")
p_den = norm(loc=1, scale=1)
q_den = norm(loc=1.2, scale=4)
prior_den = norm(loc=1.5, scale=0.5)
x_grid = np.linspace(-10, 11, num=100)
x_prior = prior_den.rvs(n_samples + int(1e7))
x_grid = np.linspace(0, 3, num=1000)
supremum = np.max(p_den.pdf(x_grid)/q_den.pdf(x_grid))
def like(_x: np.ndarray):
"""
Small helper function for the likelihood from a noninformative prior on the variance
Arguments:
_x {float} -- type a realization
Returns:
float -- likelihood value at realization
"""
return (1 + (((_x - 1.15)/(0.3/np.sqrt(5)))**2)/(5-1))**(-5/2)
q_samples = q_den.rvs(size=n_samples)
acceptedx = []
acceptedy = []
rejectedx = []
rejectedy = []
like_val = []
for lia in range(n_samples):
uniform_sample = np.random.uniform(0, supremum*q_den.pdf(q_samples[lia]))
like = p_den.pdf(q_samples[lia]) # /q.pdf(qy[lia])
if uniform_sample < like:
acceptedx.append(q_samples[lia])
uniform_sample = np.random.uniform(0, 1)
loc_like = like(x_prior[lia])
like_val.append(loc_like)
if uniform_sample < loc_like:
acceptedx.append(x_prior[lia])
acceptedy.append(uniform_sample)
else:
rejectedx.append(q_samples[lia])
rejectedx.append(x_prior[lia])
rejectedy.append(uniform_sample)
fig, ax1 = plt.subplots()
ax1.plot(x_grid, q_den.pdf(x_grid), label="prior")
ax1.plot(x_grid, p_den.pdf(x_grid), label="posterior")
ax1.plot(x_grid, supremum*q_den.pdf(x_grid), linestyle="dashed", color="black", label="envelope")
ax1.scatter(acceptedx, acceptedy, marker="o", color="green", s=2, alpha=0.5, edgecolors="green")
ax1.scatter(rejectedx, rejectedy, marker="o", color="red", s=2, alpha=0.5, edgecolors="red")
ax1.set_ylabel("Probability density function")
ax1.set_xlabel("measurand Y [a.u.]")
ax1.set_xlim(-10, 11)
fig, axes = plt.subplots()
axes.hist(x_prior, label="prior", density=True, bins=100, alpha=0.5)
# ax1.plot(x_grid, p_den.pdf(x_grid), label="posterior")
axes.plot(x_grid, like(x_grid), linestyle="dashed", color="black", label="likelihood")
axes.scatter(acceptedx, acceptedy, marker="o", color="green", s=2, alpha=0.5, edgecolors="green")
axes.scatter(rejectedx, rejectedy, marker="o", color="red", s=2, alpha=0.5, edgecolors="red")
axes.set_ylabel("PDF / Uniform samples")
axes.set_xlabel("X [units of type A quantity]")
axes.set_xlim(0, 3)
axes.set_ylim(bottom=0, top=1)
# ax.set_title(f"Number of samples: N={n_samples}")
ax1.legend()
axes.legend(loc="upper right")
print(f"Acceptance rate: {len(acceptedx)/n_samples*100}%")
plt.tight_layout()
fig.savefig(f"data/sampler_visualization/N_{n_samples}.pdf")
fig.savefig(f"data/sampler_visualization/N_{n_samples}.pdf", dpi=100)
if __name__ == "__main__":
main(100)
main(1000)
main(10000)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment