Compare revisions

Nando Hegemann · Nando Hegemann · 4e44eeb6 · 4e44eeb6 · 4e44eeb6 · 4e44eeb6
--- a/app/exercises/Exercise 1 - Scat.ipynb
+++ b/app/exercises/Exercise 1 - Scat.ipynb
+%% Cell type:markdown id:a3cf80d8-ef6d-4dec-8234-22967c45231d tags:
+
+# Scatterometry
+
+%% Cell type:markdown id:b75bfa2e-067f-48b8-93e5-e84bc7da3ff3 tags:
+
+![bla](../../img/notebooks/scat_setup.png)
+
+%% Cell type:code id:3f27e08c-8fab-4917-9d4a-caed6af9ad88 tags:
+
+``` python
+import sys
+import numpy as np
+import pythia as pt  # see doc: www.pythia-uq.com
+import emcee
+import matplotlib.pyplot as plt
+import matplotlib.cm as cmap
+
+sys.path.append("../../src")
+from utils import Gaussian  # local import
+```
+
+%% Cell type:markdown id:c810ea31-b8c9-46c7-a4b4-e058a9ae5030 tags:
+
+## Load scatterometry data
+
+%% Cell type:code id:504e11ab-01f5-455a-8a72-7097278f8023 tags:
+
+``` python
+path = "../../data/scat/"
+```
+
+%% Cell type:code id:c547374f-6999-4923-beaa-3da43fbbab79 tags:
+
+``` python
+angles = np.load(path+"angles.npy")  # (90, 2)
+
+# reshape
+angles = np.concatenate([angles, angles], axis=0)  # (180, 2)
+angle_idxs = [range(45), range(45,90), range(90,135), range(135,180)]
+angle_labels = ["$\phi=0^\circ$, s pol","$\phi=0^\circ$, p pol",
+                     "$\phi=90^\circ$, s pol","$\phi=90^\circ$, p pol"]
+print(f"azimuth phi: {np.unique(angles[:,0])}")
+print(f"incidence theta: {np.unique(angles[:,1])}")
+```
+
+%% Cell type:code id:f9b2d87e-2fcb-463b-b39c-d87844772c99 tags:
+
+``` python
+x_data = np.load(path+"x_train.npy")  # (10_000, 6)
+y_data = np.load(path+"y_train.npy")  # (90, 10_000, 2)
+w_data = np.load(path+"w_train.npy")  # (10_000, )
+
+# reshape y_data
+y_data = np.concatenate([y_data[:45,:,0], y_data[:45,:,1],
+                         y_data[45:,:,0], y_data[45:,:,1]], axis=0).T  # (10_000, 180)
+```
+
+%% Cell type:code id:700eec32-db8b-4443-a05d-fbce6dedc62c tags:
+
+``` python
+fig = plt.figure()
+for idxs, label in zip(angle_idxs, angle_labels):
+    plt.plot(angles[idxs,1], y_data[375,idxs], label=label)
+    plt.legend()
+plt.show()
+```
+
+%% Cell type:code id:a16775a5-b12f-4792-957a-76d39be52aa7 tags:
+
+``` python
+# load parameter information
+params = []
+for dct in np.load(path+"parameter.npy", allow_pickle=True):
+    params.append(pt.parameter.Parameter(
+        index=dct["_idx"], name=dct["_name"], domain=dct["_dom"], distribution=dct["_dist"],
+        alpha=dct["_alpha"], beta=dct["_beta"]))
+```
+
+%% Cell type:code id:ecdb40d0-5cb5-488e-b757-2d9be9112bff tags:
+
+``` python
+print("parameter  dist    domain")
+print("-"*30)
+for param in params:
+    print(f"{param.name:<9}  {param.distribution:<5}   {np.array(param.domain, dtype=int)}")
+```
+
+%% Cell type:code id:79a57610-24f5-46b3-9ae0-0c1182c3bd61 tags:
+
+``` python
+fig, ax = plt.subplot_mosaic([["h", "cd", "swa"],["t", "r_top", "r_bot"]], constrained_layout=True)
+for j, param in enumerate(params):
+    ax[param.name].hist(x_data[:, j], bins=50)
+    ax[param.name].set_title(param.name)
+    ax[param.name].set_yticks([])
+plt.show()
+```
+
+%% Cell type:markdown id:5c22c2ef-b248-4b5b-8065-5c325aa25d85 tags:
+
+## Approximate Forward Problem
+
+%% Cell type:markdown id:18e99871-d6f8-4f7b-9382-f6503c92d93b tags:
+
+#### Split training and test data
+
+%% Cell type:code id:84c2cbfd-9ce8-437a-8d1a-385e479ee59b tags:
+
+``` python
+split = int(0.8*x_data.shape[0])
+
+# training data
+x_train = x_data[:split]
+y_train = y_data[:split]
+w_train = w_data[:split]
+
+# test data
+x_test = x_data[split:]
+y_test = y_data[split:]
+```
+
+%% Cell type:markdown id:02490c03-13e6-48c2-9552-16ce43d6d939 tags:
+
+#### Define index set
+
+%% Cell type:code id:e2e8204e-c516-48fc-85cd-bcabf3b38938 tags:
+
+``` python
+# define multiindices
+index_set = ...  # TODO: define simplex index set of polynomial of degree less then 5
+
+print(f"Multiindex information:")
+print(f"    number of indices: {index_set.shape[0]}")
+print(f"    dimension: {index_set.shape[1]}")
+print(f"    maximum dimension: {index_set.max}")
+print(f"    number of sobol indices: {len(index_set.sobol_tuples)}")
+```
+
+%% Cell type:markdown id:cbcc9c33-79e6-4ffb-b51a-9615e3de4907 tags:
+
+#### Train PC approximation
+
+%% Cell type:code id:4884e6b5-1fb4-4d57-800a-774a6fb3e9a2 tags:
+
+``` python
+# run pc approximation
+surrogate = ...  # TODO: use PyThia to create a polynomial chaos surrogate
+```
+
+%% Cell type:markdown id:b096635a-830a-4039-a07c-5145dd0a55c4 tags:
+
+#### Compute approximation error
+
+%% Cell type:code id:c311f090-b48e-4ffa-b963-a97827583247 tags:
+
+``` python
+# test approximation error
+y_approx = ...  # TODO: evaluate the PC surrogate on the test set
+
+# L2-L2 error
+err_abs = np.sum(np.linalg.norm(y_test - y_approx, axis=1))/y_test.shape[0]
+err_rel = np.sum(np.linalg.norm((y_test - y_approx)/y_test, axis=1))/y_test.shape[0]
+print(f"L2-L2 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+
+# C0-L2 error
+err_abs = np.sum(np.max(np.abs(y_test - y_approx), axis=1))/y_test.shape[0]
+err_rel = np.sum(np.max(np.abs(y_test - y_approx)/np.abs(y_test), axis=1))/y_test.shape[0]
+print(f"L2-C0 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+```
+
+%% Cell type:code id:cf030598-1d06-400e-a7f7-e3ee3ffcea9b tags:
+
+``` python
+fig, axes = plt.subplot_mosaic([["1", "2", "3"], ["4", "5", "6"]], constrained_layout=True, figsize=(9,6))
+for ax in axes.values():
+    j = np.random.randint(0, x_test.shape[0])
+    for c, (idxs, label) in enumerate(zip(angle_idxs, angle_labels)):
+        ax.plot(angles[idxs,1], y_test[j,idxs], label=label, color=cmap.tab20(2*c))
+        ax.plot(angles[idxs,1], y_approx[j,idxs], "--", label=label, color=cmap.tab20(2*c+1))
+    ax.set_title(f"test data index: {j}")
+plt.show()
+```
+
+%% Cell type:markdown id:187e51ae-047f-42a4-8671-3cb0eaa11472 tags:
+
+## Global Sensitivity Analysis
+
+%% Cell type:code id:8e990349-158f-4aba-90fe-2e56d6b248e7 tags:
+
+``` python
+max_vals = ...  # TODO: compute the maximum attained of each Sobol index
+l2_vals = ...  # TODO: compute the average attained of each Sobol index
+```
+
+%% Cell type:code id:ccbcf0d9-d125-401c-a127-5d86b4aec94c tags:
+
+``` python
+sobol_max = [list(reversed([x for _,x in sorted(zip(max_vals, index_set.sobol_tuples))]))]
+sobol_L2 = [list(reversed([x for _,x in sorted(zip(l2_vals, index_set.sobol_tuples))]))]
+
+print(f"{'#':>2}  {'max':<10}  {'L2':<10}")
+print("-"*25)
+for j in range(10):
+    print(f"{j+1:>2}  {str(sobol_max[0][j]):<10}  {str(sobol_L2[0][j]):<10}")
+```
+
+%% Cell type:code id:50af6adc-ac11-458a-91c3-598240960cf9 tags:
+
+``` python
+def binom(n,k):
+    return np.math.factorial(n) / np.math.factorial(k) / np.math.factorial(n-k)
+bdry = [0]*7
+for jj in range(1,len(bdry)):
+    bdry[jj] = int(bdry[jj-1] + binom(len(bdry)-1,jj))
+
+fig, axes = plt.subplot_mosaic([["max"], ["L2"]])
+
+axes["max"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["max"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["max"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["max"].semilogy(range(surrogate.sobol.shape[0]), max_vals,
+                     "-o", linewidth=1, label="max")
+axes["max"].legend()
+axes["max"].grid()
+
+axes["L2"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["L2"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["L2"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["L2"].semilogy(range(surrogate.sobol.shape[0]), l2_vals,
+                     "-o", linewidth=1, label="$L^2$")
+axes["L2"].legend()
+axes["L2"].grid()
+
+plt.show()
+```
+
+%% Cell type:markdown id:7252d885-84b5-4a47-9cc2-b3f6374351b9 tags:
+
+## Inverse Problem & Parameter Reconstruction
+
+%% Cell type:markdown id:b0d4d74c-682c-4823-93d1-121fd3eb164f tags:
+
+#### Load measurements
+
+%% Cell type:code id:ecc36513-8627-4f07-886d-9dcbfc6f9e88 tags:
+
+``` python
+# measurement
+x_true = x_data[-1].reshape(1, -1)
+y_true = y_data[-1].reshape(1, -1)
+b = 0.015
+noise = np.random.normal(0, b, y_true.shape)
+y_meas = y_true + noise
+```
+
+%% Cell type:code id:d0fa7413-97cf-4dec-9403-b0f64a7818c7 tags:
+
+``` python
+fig = plt.figure()
+for idxs, label in zip(angle_idxs, angle_labels):
+    plt.plot(angles[idxs,1], y_true[0,idxs], label=label)
+    plt.scatter(angles[idxs,1], y_meas[0, idxs], s=2)
+    plt.legend()
+    plt.title("Ground truth (solid) and noisy measurement (dots)")
+plt.show()
+```
+
+%% Cell type:markdown id:40495846-7f57-4ee6-9b06-26d91882addc tags:
+
+#### Define Bayesian posterior
+
+%% Cell type:code id:1186d066-6018-4a18-a805-d5e15d1eaad7 tags:
+
+``` python
+# define forward and error model
+hyper_params = [pt.parameter.Parameter(index=len(params)+1, name="b", domain=[0, 0.1], distribution="uniform")]
+dim = len(params+hyper_params)
+prior = ...  # TODO: define the extended prior
+forward_model = lambda x: ...  # TODO: define the extended forward model
+sigma = lambda x: np.abs(x[:, -1]).reshape(-1, 1)*np.ones((1, y_meas.shape[1]))  # error model
+
+# define Bayesian log-posterior
+lh = Gaussian(forward_model, sigma, dim)
+def log_posterior(x, meas):
+    return lh.log_likelihood(x, meas) + prior.log_pdf(x)
+```
+
+%% Cell type:markdown id:ba006e83-5173-419b-9d11-176c14e60242 tags:
+
+#### Setup MCMC
+
+%% Cell type:code id:f3a47c73-b2ae-427f-b56a-e194e5c84dbe tags:
+
+``` python
+# setup MCMC
+n_walkers = 15
+chain_length = 1000
+seed = prior.sample(n_walkers)
+```
+
+%% Cell type:code id:68accd23-f187-4f69-9c37-dac3ada699d7 tags:
+
+``` python
+# define runner
+runner = ...  # TODO: initiate runner
+```
+
+%% Cell type:markdown id:02c2b899-95df-4be0-a384-269f2cf17c21 tags:
+
+#### Run MCMC
+
+%% Cell type:code id:2f0a03eb-1bde-4fea-9682-fd123b6b0ba5 tags:
+
+``` python
+# discard burn-in
+state = runner.run_mcmc(seed, 500, progress=True)
+runner.reset()
+```
+
+%% Cell type:code id:30451bb1-e0bd-4435-8db5-2dd24488d010 tags:
+
+``` python
+# run MCMC
+... # TODO: run MCMC chains
+```
+
+%% Cell type:markdown id:a3571a01-e1bd-4a85-924f-043ef1d574b1 tags:
+
+#### Data Analysis
+
+%% Cell type:code id:1653cf3c-3f1d-40bf-84fd-28ce76676364 tags:
+
+``` python
+# simple evaluation
+samples = ...  # TODO: get samples from runner
+mean = np.mean(samples, axis=0)
+std = np.std(samples, axis=0)
+```
+
+%% Cell type:code id:2214221a-a2be-40f7-88c4-5c0d7091210e tags:
+
+``` python
+# plot posterior distribution
+samples = runner.get_chain(flat=True)
+layout = [[j] for j in range(7)]
+layout = [[0, 1], [2, 3], [4, 5], [6, 6]]
+fig, axes = plt.subplot_mosaic(layout, figsize=(12, 7.5))
+for j, (param, ax) in enumerate(zip(params+hyper_params, axes.values())):
+    bins = 50 if param.name != "b" else 150
+    ax.hist(samples[:, j], bins=bins, density=True, range=param.domain, alpha=0.5)
+    ax.axvline(x=mean[j], color=cmap.tab10(0), label="mean")
+    ax.axvline(x=mean[j]-std[j], ls="--", color=cmap.tab10(0), label="std")
+    ax.axvline(x=mean[j]+std[j], ls="--", color=cmap.tab10(0))
+    if param.name == "b":
+        ax.axvline(x=b, color="k", label="true value")
+    else:
+        ax.axvline(x=x_true[0, j], color="k", label="true value")
+    ax.set_ylabel(f"{j+1} - {param.name}")
+    ax.set_yticks([])
+    ax.legend()
+plt.show()
+```
+
+%% Cell type:code id:e9b15e08-8248-46cd-99c6-e5f8cd64c6e0 tags:
+
+``` python
+fig, axes = plt.subplot_mosaic([["approx", "error"]], figsize=(15, 5))
+vals = surrogate.eval(np.mean(samples, axis=0)[:-1])
+errs = y_true - vals
+for j, (idxs, label) in enumerate(zip(angle_idxs, angle_labels)):
+    color = cmap.tab10(j)
+    axes["approx"].plot(angles[idxs,1], vals[0, idxs], color=color, label=label)
+    axes["approx"].plot(angles[idxs,1], y_true[0, idxs], "--", color=color)
+    axes["approx"].scatter(angles[idxs,1], y_meas[0, idxs], s=2, color=color)
+    axes["approx"].legend()
+    axes["approx"].set_title("Ground truth (solid) and noisy measurement (dots)")
+
+    axes["error"].plot(angles[idxs, 1], errs[0, idxs], color=color, label=label)
+    axes["error"].legend()
+    axes["error"].grid()
+    axes["error"].set_title("Pointwise deviation from ground truth")
+plt.show()
+```
+
+%% Cell type:code id:36afafba-3de1-4dde-b114-b6b278a62473 tags:
+
+``` python
+```
+%% Cell type:markdown id:a3cf80d8-ef6d-4dec-8234-22967c45231d tags:
+
+# Scatterometry
+
+%% Cell type:markdown id:b75bfa2e-067f-48b8-93e5-e84bc7da3ff3 tags:
+
+![bla](../../img/notebooks/scat_setup.png)
+
+%% Cell type:code id:3f27e08c-8fab-4917-9d4a-caed6af9ad88 tags:
+
+``` python
+import sys
+import numpy as np
+import pythia as pt  # see doc: www.pythia-uq.com
+import emcee
+import matplotlib.pyplot as plt
+import matplotlib.cm as cmap
+
+sys.path.append("../../src")
+from utils import Gaussian  # local import
+```
+
+%% Cell type:markdown id:c810ea31-b8c9-46c7-a4b4-e058a9ae5030 tags:
+
+## Load scatterometry data
+
+%% Cell type:code id:504e11ab-01f5-455a-8a72-7097278f8023 tags:
+
+``` python
+path = "../../data/scat/"
+```
+
+%% Cell type:code id:c547374f-6999-4923-beaa-3da43fbbab79 tags:
+
+``` python
+angles = np.load(path+"angles.npy")  # (90, 2)
+
+# reshape
+angles = np.concatenate([angles, angles], axis=0)  # (180, 2)
+angle_idxs = [range(45), range(45,90), range(90,135), range(135,180)]
+angle_labels = ["$\phi=0^\circ$, s pol","$\phi=0^\circ$, p pol",
+                     "$\phi=90^\circ$, s pol","$\phi=90^\circ$, p pol"]
+print(f"azimuth phi: {np.unique(angles[:,0])}")
+print(f"incidence theta: {np.unique(angles[:,1])}")
+```
+
+%% Cell type:code id:f9b2d87e-2fcb-463b-b39c-d87844772c99 tags:
+
+``` python
+x_data = np.load(path+"x_train.npy")  # (10_000, 6)
+y_data = np.load(path+"y_train.npy")  # (90, 10_000, 2)
+w_data = np.load(path+"w_train.npy")  # (10_000, )
+
+# reshape y_data
+y_data = np.concatenate([y_data[:45,:,0], y_data[:45,:,1],
+                         y_data[45:,:,0], y_data[45:,:,1]], axis=0).T  # (10_000, 180)
+```
+
+%% Cell type:code id:700eec32-db8b-4443-a05d-fbce6dedc62c tags:
+
+``` python
+fig = plt.figure()
+for idxs, label in zip(angle_idxs, angle_labels):
+    plt.plot(angles[idxs,1], y_data[375,idxs], label=label)
+    plt.legend()
+plt.show()
+```
+
+%% Cell type:code id:a16775a5-b12f-4792-957a-76d39be52aa7 tags:
+
+``` python
+# load parameter information
+params = []
+for dct in np.load(path+"parameter.npy", allow_pickle=True):
+    params.append(pt.parameter.Parameter(
+        index=dct["_idx"], name=dct["_name"], domain=dct["_dom"], distribution=dct["_dist"],
+        alpha=dct["_alpha"], beta=dct["_beta"]))
+```
+
+%% Cell type:code id:ecdb40d0-5cb5-488e-b757-2d9be9112bff tags:
+
+``` python
+print("parameter  dist    domain")
+print("-"*30)
+for param in params:
+    print(f"{param.name:<9}  {param.distribution:<5}   {np.array(param.domain, dtype=int)}")
+```
+
+%% Cell type:code id:79a57610-24f5-46b3-9ae0-0c1182c3bd61 tags:
+
+``` python
+fig, ax = plt.subplot_mosaic([["h", "cd", "swa"],["t", "r_top", "r_bot"]], constrained_layout=True)
+for j, param in enumerate(params):
+    ax[param.name].hist(x_data[:, j], bins=50)
+    ax[param.name].set_title(param.name)
+    ax[param.name].set_yticks([])
+plt.show()
+```
+
+%% Cell type:markdown id:5c22c2ef-b248-4b5b-8065-5c325aa25d85 tags:
+
+## Approximate Forward Problem
+
+%% Cell type:markdown id:18e99871-d6f8-4f7b-9382-f6503c92d93b tags:
+
+#### Split training and test data
+
+%% Cell type:code id:84c2cbfd-9ce8-437a-8d1a-385e479ee59b tags:
+
+``` python
+split = int(0.8*x_data.shape[0])
+
+# training data
+x_train = x_data[:split]
+y_train = y_data[:split]
+w_train = w_data[:split]
+
+# test data
+x_test = x_data[split:]
+y_test = y_data[split:]
+```
+
+%% Cell type:markdown id:02490c03-13e6-48c2-9552-16ce43d6d939 tags:
+
+#### Define index set
+
+%% Cell type:code id:e2e8204e-c516-48fc-85cd-bcabf3b38938 tags:
+
+``` python
+# define multiindices
+index_set = ...  # TODO: define simplex index set of polynomial of degree less then 5
+
+print(f"Multiindex information:")
+print(f"    number of indices: {index_set.shape[0]}")
+print(f"    dimension: {index_set.shape[1]}")
+print(f"    maximum dimension: {index_set.max}")
+print(f"    number of sobol indices: {len(index_set.sobol_tuples)}")
+```
+
+%% Cell type:markdown id:cbcc9c33-79e6-4ffb-b51a-9615e3de4907 tags:
+
+#### Train PC approximation
+
+%% Cell type:code id:4884e6b5-1fb4-4d57-800a-774a6fb3e9a2 tags:
+
+``` python
+# run pc approximation
+surrogate = ...  # TODO: use PyThia to create a polynomial chaos surrogate
+```
+
+%% Cell type:markdown id:b096635a-830a-4039-a07c-5145dd0a55c4 tags:
+
+#### Compute approximation error
+
+%% Cell type:code id:c311f090-b48e-4ffa-b963-a97827583247 tags:
+
+``` python
+# test approximation error
+y_approx = ...  # TODO: evaluate the PC surrogate on the test set
+
+# L2-L2 error
+err_abs = np.sum(np.linalg.norm(y_test - y_approx, axis=1))/y_test.shape[0]
+err_rel = np.sum(np.linalg.norm((y_test - y_approx)/y_test, axis=1))/y_test.shape[0]
+print(f"L2-L2 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+
+# C0-L2 error
+err_abs = np.sum(np.max(np.abs(y_test - y_approx), axis=1))/y_test.shape[0]
+err_rel = np.sum(np.max(np.abs(y_test - y_approx)/np.abs(y_test), axis=1))/y_test.shape[0]
+print(f"L2-C0 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+```
+
+%% Cell type:code id:cf030598-1d06-400e-a7f7-e3ee3ffcea9b tags:
+
+``` python
+fig, axes = plt.subplot_mosaic([["1", "2", "3"], ["4", "5", "6"]], constrained_layout=True, figsize=(9,6))
+for ax in axes.values():
+    j = np.random.randint(0, x_test.shape[0])
+    for c, (idxs, label) in enumerate(zip(angle_idxs, angle_labels)):
+        ax.plot(angles[idxs,1], y_test[j,idxs], label=label, color=cmap.tab20(2*c))
+        ax.plot(angles[idxs,1], y_approx[j,idxs], "--", label=label, color=cmap.tab20(2*c+1))
+    ax.set_title(f"test data index: {j}")
+plt.show()
+```
+
+%% Cell type:markdown id:187e51ae-047f-42a4-8671-3cb0eaa11472 tags:
+
+## Global Sensitivity Analysis
+
+%% Cell type:code id:8e990349-158f-4aba-90fe-2e56d6b248e7 tags:
+
+``` python
+max_vals = ...  # TODO: compute the maximum attained of each Sobol index
+l2_vals = ...  # TODO: compute the average attained of each Sobol index
+```
+
+%% Cell type:code id:ccbcf0d9-d125-401c-a127-5d86b4aec94c tags:
+
+``` python
+sobol_max = [list(reversed([x for _,x in sorted(zip(max_vals, index_set.sobol_tuples))]))]
+sobol_L2 = [list(reversed([x for _,x in sorted(zip(l2_vals, index_set.sobol_tuples))]))]
+
+print(f"{'#':>2}  {'max':<10}  {'L2':<10}")
+print("-"*25)
+for j in range(10):
+    print(f"{j+1:>2}  {str(sobol_max[0][j]):<10}  {str(sobol_L2[0][j]):<10}")
+```
+
+%% Cell type:code id:50af6adc-ac11-458a-91c3-598240960cf9 tags:
+
+``` python
+def binom(n,k):
+    return np.math.factorial(n) / np.math.factorial(k) / np.math.factorial(n-k)
+bdry = [0]*7
+for jj in range(1,len(bdry)):
+    bdry[jj] = int(bdry[jj-1] + binom(len(bdry)-1,jj))
+
+fig, axes = plt.subplot_mosaic([["max"], ["L2"]])
+
+axes["max"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["max"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["max"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["max"].semilogy(range(surrogate.sobol.shape[0]), max_vals,
+                     "-o", linewidth=1, label="max")
+axes["max"].legend()
+axes["max"].grid()
+
+axes["L2"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["L2"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["L2"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["L2"].semilogy(range(surrogate.sobol.shape[0]), l2_vals,
+                     "-o", linewidth=1, label="$L^2$")
+axes["L2"].legend()
+axes["L2"].grid()
+
+plt.show()
+```
+
+%% Cell type:markdown id:7252d885-84b5-4a47-9cc2-b3f6374351b9 tags:
+
+## Inverse Problem & Parameter Reconstruction
+
+%% Cell type:markdown id:b0d4d74c-682c-4823-93d1-121fd3eb164f tags:
+
+#### Load measurements
+
+%% Cell type:code id:ecc36513-8627-4f07-886d-9dcbfc6f9e88 tags:
+
+``` python
+# measurement
+x_true = x_data[-1].reshape(1, -1)
+y_true = y_data[-1].reshape(1, -1)
+b = 0.015
+noise = np.random.normal(0, b, y_true.shape)
+y_meas = y_true + noise
+```
+
+%% Cell type:code id:d0fa7413-97cf-4dec-9403-b0f64a7818c7 tags:
+
+``` python
+fig = plt.figure()
+for idxs, label in zip(angle_idxs, angle_labels):
+    plt.plot(angles[idxs,1], y_true[0,idxs], label=label)
+    plt.scatter(angles[idxs,1], y_meas[0, idxs], s=2)
+    plt.legend()
+    plt.title("Ground truth (solid) and noisy measurement (dots)")
+plt.show()
+```
+
+%% Cell type:markdown id:40495846-7f57-4ee6-9b06-26d91882addc tags:
+
+#### Define Bayesian posterior
+
+%% Cell type:code id:1186d066-6018-4a18-a805-d5e15d1eaad7 tags:
+
+``` python
+# define forward and error model
+hyper_params = [pt.parameter.Parameter(index=len(params)+1, name="b", domain=[0, 0.1], distribution="uniform")]
+dim = len(params+hyper_params)
+prior = ...  # TODO: define the extended prior
+forward_model = lambda x: ...  # TODO: define the extended forward model
+sigma = lambda x: np.abs(x[:, -1]).reshape(-1, 1)*np.ones((1, y_meas.shape[1]))  # error model
+
+# define Bayesian log-posterior
+lh = Gaussian(forward_model, sigma, dim)
+def log_posterior(x, meas):
+    return lh.log_likelihood(x, meas) + prior.log_pdf(x)
+```
+
+%% Cell type:markdown id:ba006e83-5173-419b-9d11-176c14e60242 tags:
+
+#### Setup MCMC
+
+%% Cell type:code id:f3a47c73-b2ae-427f-b56a-e194e5c84dbe tags:
+
+``` python
+# setup MCMC
+n_walkers = 15
+chain_length = 1000
+seed = prior.sample(n_walkers)
+```
+
+%% Cell type:code id:68accd23-f187-4f69-9c37-dac3ada699d7 tags:
+
+``` python
+# define runner
+runner = ...  # TODO: initiate runner
+```
+
+%% Cell type:markdown id:02c2b899-95df-4be0-a384-269f2cf17c21 tags:
+
+#### Run MCMC
+
+%% Cell type:code id:2f0a03eb-1bde-4fea-9682-fd123b6b0ba5 tags:
+
+``` python
+# discard burn-in
+state = runner.run_mcmc(seed, 500, progress=True)
+runner.reset()
+```
+
+%% Cell type:code id:30451bb1-e0bd-4435-8db5-2dd24488d010 tags:
+
+``` python
+# run MCMC
+... # TODO: run MCMC chains
+```
+
+%% Cell type:markdown id:a3571a01-e1bd-4a85-924f-043ef1d574b1 tags:
+
+#### Data Analysis
+
+%% Cell type:code id:1653cf3c-3f1d-40bf-84fd-28ce76676364 tags:
+
+``` python
+# simple evaluation
+samples = ...  # TODO: get samples from runner
+mean = np.mean(samples, axis=0)
+std = np.std(samples, axis=0)
+```
+
+%% Cell type:code id:2214221a-a2be-40f7-88c4-5c0d7091210e tags:
+
+``` python
+# plot posterior distribution
+samples = runner.get_chain(flat=True)
+layout = [[j] for j in range(7)]
+layout = [[0, 1], [2, 3], [4, 5], [6, 6]]
+fig, axes = plt.subplot_mosaic(layout, figsize=(12, 7.5))
+for j, (param, ax) in enumerate(zip(params+hyper_params, axes.values())):
+    bins = 50 if param.name != "b" else 150
+    ax.hist(samples[:, j], bins=bins, density=True, range=param.domain, alpha=0.5)
+    ax.axvline(x=mean[j], color=cmap.tab10(0), label="mean")
+    ax.axvline(x=mean[j]-std[j], ls="--", color=cmap.tab10(0), label="std")
+    ax.axvline(x=mean[j]+std[j], ls="--", color=cmap.tab10(0))
+    if param.name == "b":
+        ax.axvline(x=b, color="k", label="true value")
+    else:
+        ax.axvline(x=x_true[0, j], color="k", label="true value")
+    ax.set_ylabel(f"{j+1} - {param.name}")
+    ax.set_yticks([])
+    ax.legend()
+plt.show()
+```
+
+%% Cell type:code id:e9b15e08-8248-46cd-99c6-e5f8cd64c6e0 tags:
+
+``` python
+fig, axes = plt.subplot_mosaic([["approx", "error"]], figsize=(15, 5))
+vals = surrogate.eval(np.mean(samples, axis=0)[:-1])
+errs = y_true - vals
+for j, (idxs, label) in enumerate(zip(angle_idxs, angle_labels)):
+    color = cmap.tab10(j)
+    axes["approx"].plot(angles[idxs,1], vals[0, idxs], color=color, label=label)
+    axes["approx"].plot(angles[idxs,1], y_true[0, idxs], "--", color=color)
+    axes["approx"].scatter(angles[idxs,1], y_meas[0, idxs], s=2, color=color)
+    axes["approx"].legend()
+    axes["approx"].set_title("Ground truth (solid) and noisy measurement (dots)")
+
+    axes["error"].plot(angles[idxs, 1], errs[0, idxs], color=color, label=label)
+    axes["error"].legend()
+    axes["error"].grid()
+    axes["error"].set_title("Pointwise deviation from ground truth")
+plt.show()
+```
+
+%% Cell type:code id:36afafba-3de1-4dde-b114-b6b278a62473 tags:
+
+``` python
+```
--- a/app/exercises/Exercise 2 - XRR.ipynb
+++ b/app/exercises/Exercise 2 - XRR.ipynb
+%% Cell type:markdown id:a3cf80d8-ef6d-4dec-8234-22967c45231d tags:
+
+# XRR
+
+%% Cell type:code id:3f27e08c-8fab-4917-9d4a-caed6af9ad88 tags:
+
+``` python
+import sys
+import numpy as np
+import emcee
+import pythia as pt  # see doc: www.pythia-uq.com
+import matplotlib.pyplot as plt
+import matplotlib.cm as cmap
+
+sys.path.append("../../src")
+from utils import Gaussian  # local import
+```
+
+%% Cell type:markdown id:c810ea31-b8c9-46c7-a4b4-e058a9ae5030 tags:
+
+## Load scatterometry data
+
+%% Cell type:code id:504e11ab-01f5-455a-8a72-7097278f8023 tags:
+
+``` python
+path = "../../data/xrr/"
+```
+
+%% Cell type:code id:c547374f-6999-4923-beaa-3da43fbbab79 tags:
+
+``` python
+omega = np.load(path+"omega.npy")
+
+print(f"shape omega: {omega.shape}")
+```
+
+%% Cell type:code id:f9b2d87e-2fcb-463b-b39c-d87844772c99 tags:
+
+``` python
+x_data = np.load(path+"x_train.npy")  # (10_000, 5)
+y_data = np.log10(np.load(path+"y_train.npy"))  # (10_000, 585)
+w_data = np.load(path+"w_train.npy")  # (10_000, )
+```
+
+%% Cell type:code id:700eec32-db8b-4443-a05d-fbce6dedc62c tags:
+
+``` python
+fig = plt.figure()
+for j, y in enumerate(y_data[:6]):
+    plt.plot(omega, y, label=f"sample {j+1}", color=cmap.tab20(j))
+    plt.legend()
+plt.show()
+```
+
+%% Cell type:code id:a16775a5-b12f-4792-957a-76d39be52aa7 tags:
+
+``` python
+# set parameter information
+param1 = pt.parameter.Parameter(index=1, name="ρRu", domain=[12590-25, 12590+25], distribution="uniform")
+param2 = pt.parameter.Parameter(index=2, name="wRu", domain=[291-1, 291+2], distribution="uniform")
+param3 = pt.parameter.Parameter(index=3, name="σRu", domain=[4-1, 4+1], distribution="uniform")
+param4 = pt.parameter.Parameter(index=4, name="wSiO2", domain=[18-3, 18+3], distribution="uniform")
+param5 = pt.parameter.Parameter(index=5, name="wRuO2", domain=[18-1, 18+1], distribution="uniform")
+params = [param1, param2, param3, param4, param5]
+param_names = [p.name for p in params]
+```
+
+%% Cell type:code id:ecdb40d0-5cb5-488e-b757-2d9be9112bff tags:
+
+``` python
+print("parameter  dist      domain")
+print("-"*40)
+for param in params:
+    print(f"{param.name:<9}  {param.distribution:<5}   {np.array(param.domain, dtype=int)}")
+```
+
+%% Cell type:code id:79a57610-24f5-46b3-9ae0-0c1182c3bd61 tags:
+
+``` python
+fig, ax = plt.subplot_mosaic([param_names], constrained_layout=True, figsize=(10, 3))
+for j, param in enumerate(params):
+    ax[param.name].hist(x_data[:, j], bins=50)
+    ax[param.name].set_title(param.name)
+    ax[param.name].set_yticks([])
+plt.show()
+```
+
+%% Cell type:markdown id:5c22c2ef-b248-4b5b-8065-5c325aa25d85 tags:
+
+## Approximate Forward Problem
+
+%% Cell type:markdown id:18e99871-d6f8-4f7b-9382-f6503c92d93b tags:
+
+#### Split training and test data
+
+%% Cell type:code id:84c2cbfd-9ce8-437a-8d1a-385e479ee59b tags:
+
+``` python
+split = int(0.8*x_data.shape[0])
+
+# training data
+x_train = x_data[:split]
+y_train = y_data[:split]
+w_train = w_data[:split]
+
+# test data
+x_test = x_data[split:]
+y_test = y_data[split:]
+```
+
+%% Cell type:markdown id:02490c03-13e6-48c2-9552-16ce43d6d939 tags:
+
+#### Define index set
+
+%% Cell type:code id:e2e8204e-c516-48fc-85cd-bcabf3b38938 tags:
+
+``` python
+# define multiindices
+indices = ...   # TODO: define simplex index set of polynomial of degree less then 5
+
+print(f"Multiindex information:")
+print(f"    number of indices: {index_set.shape[0]}")
+print(f"    dimension: {index_set.shape[1]}")
+print(f"    maximum dimension: {index_set.max}")
+print(f"    number of sobol indices: {len(index_set.sobol_tuples)}")
+```
+
+%% Cell type:markdown id:cbcc9c33-79e6-4ffb-b51a-9615e3de4907 tags:
+
+#### Train PC approximation
+
+%% Cell type:code id:4884e6b5-1fb4-4d57-800a-774a6fb3e9a2 tags:
+
+``` python
+# run pc approximation
+surrogate = ...    # TODO: use PyThia to create a polynomial chaos surrogate
+```
+
+%% Cell type:markdown id:b096635a-830a-4039-a07c-5145dd0a55c4 tags:
+
+#### Compute approximation error
+
+%% Cell type:code id:c311f090-b48e-4ffa-b963-a97827583247 tags:
+
+``` python
+# test approximation error
+y_approx = ...  # TODO: evaluate the PC surrogate on the test set
+
+# L2-L2 error
+err_abs = np.sum(np.linalg.norm(y_test - y_approx, axis=1))/y_test.shape[0]
+err_rel = np.sum(np.linalg.norm((y_test - y_approx)/y_test, axis=1))/y_test.shape[0]
+print(f"L2-L2 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+
+# C0-L2 error
+err_abs = np.sum(np.max(np.abs(y_test - y_approx), axis=1))/y_test.shape[0]
+err_rel = np.sum(np.max(np.abs(y_test - y_approx)/np.abs(y_test), axis=1))/y_test.shape[0]
+print(f"L2-C0 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+```
+
+%% Cell type:code id:cf030598-1d06-400e-a7f7-e3ee3ffcea9b tags:
+
+``` python
+fig, axes = plt.subplot_mosaic([["1", "2", "3"], ["4", "5", "6"]], constrained_layout=True, figsize=(9,6))
+for ax in axes.values():
+    j = np.random.randint(0, x_test.shape[0])
+    ax.plot(omega, y_test[j], label=f"ground truth", lw=0.5)
+    ax.plot(omega, y_approx[j], label="approx", lw=0.5)
+    ax.set_title(f"test data index: {j}")
+plt.show()
+```
+
+%% Cell type:markdown id:187e51ae-047f-42a4-8671-3cb0eaa11472 tags:
+
+## Global Sensitivity Analysis
+
+%% Cell type:code id:8e990349-158f-4aba-90fe-2e56d6b248e7 tags:
+
+``` python
+max_vals = ...  # TODO: compute the maximum attained of each Sobol index
+l2_vals = ...  # TODO: compute the average attained of each Sobol index
+```
+
+%% Cell type:code id:ccbcf0d9-d125-401c-a127-5d86b4aec94c tags:
+
+``` python
+sobol_max = [list(reversed([x for _,x in sorted(zip(max_vals, index_set.sobol_tuples))]))]
+sobol_L2 = [list(reversed([x for _,x in sorted(zip(l2_vals, index_set.sobol_tuples))]))]
+
+print(f"{'#':>2}  {'max':<10}  {'L2':<10}")
+print("-"*25)
+for j in range(10):
+    print(f"{j+1:>2}  {str(sobol_max[0][j]):<10}  {str(sobol_L2[0][j]):<10}")
+```
+
+%% Cell type:code id:50af6adc-ac11-458a-91c3-598240960cf9 tags:
+
+``` python
+def binom(n,k):
+    return np.math.factorial(n) / np.math.factorial(k) / np.math.factorial(n-k)
+bdry = [0]*5
+for jj in range(1,len(bdry)):
+    bdry[jj] = int(bdry[jj-1] + binom(len(bdry)-1,jj))
+
+fig, axes = plt.subplot_mosaic([["max"], ["L2"]])
+
+axes["max"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["max"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["max"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["max"].semilogy(range(surrogate.sobol.shape[0]), max_vals,
+                     "-o", linewidth=1, label="max")
+axes["max"].legend()
+axes["max"].grid()
+
+axes["L2"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["L2"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["L2"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["L2"].semilogy(range(surrogate.sobol.shape[0]), l2_vals,
+                     "-o", linewidth=1, label="$L^2$")
+axes["L2"].legend()
+axes["L2"].grid()
+
+plt.show()
+```
+
+%% Cell type:markdown id:7252d885-84b5-4a47-9cc2-b3f6374351b9 tags:
+
+## Inverse Problem & Parameter Reconstruction
+
+%% Cell type:markdown id:b0d4d74c-682c-4823-93d1-121fd3eb164f tags:
+
+#### Load measurements
+
+%% Cell type:code id:ecc36513-8627-4f07-886d-9dcbfc6f9e88 tags:
+
+``` python
+# measurement
+use_real_measurement = True
+if use_real_measurement is True:
+    y_meas = np.load(path+"measurement.npy").reshape(1, -1)
+    y_meas[0, np.where(y_meas[0]<1e-3)[0]] = 1e-3  # fix numerical issues
+    y_meas = np.log10(y_meas)
+else:
+    x_true = x_data[-1].reshape(1, -1)
+    y_true = y_data[-1].reshape(1, -1)
+    b = 0.05
+    noise = np.random.normal(0, b, y_true.shape)
+    y_meas = y_true + noise
+```
+
+%% Cell type:code id:d0fa7413-97cf-4dec-9403-b0f64a7818c7 tags:
+
+``` python
+fig = plt.figure()
+if use_real_measurement is False:
+    plt.plot(omega, y_true[0], label="y_true")
+plt.scatter(omega, y_meas[0], s=2, label="measurement", color=cmap.tab10(1))
+plt.legend()
+plt.title("Ground truth (solid) and noisy measurement (dots)")
+plt.show()
+```
+
+%% Cell type:markdown id:40495846-7f57-4ee6-9b06-26d91882addc tags:
+
+#### Define Bayesian posterior
+
+%% Cell type:code id:1186d066-6018-4a18-a805-d5e15d1eaad7 tags:
+
+``` python
+# define forward and error model
+hyper_params = [pt.parameter.Parameter(index=len(params)+1, name="b", domain=[0, 0.5], distribution="uniform")]
+dim = len(params+hyper_params)
+prior = ...  # TODO: define the extended prior
+forward_model = lambda x: ...  # TODO: define the extended forward model
+sigma = lambda x: np.abs(x[:, -1]).reshape(-1, 1)*np.ones((1, y_meas.shape[1]))  # error model
+
+# define Bayesian posterior
+lh = Gaussian(forward_model, sigma, dim)
+def log_posterior(x, meas):
+    return lh.log_likelihood(x, meas) + prior.log_pdf(x)
+```
+
+%% Cell type:markdown id:ba006e83-5173-419b-9d11-176c14e60242 tags:
+
+#### Setup MCMC
+
+%% Cell type:code id:f3a47c73-b2ae-427f-b56a-e194e5c84dbe tags:
+
+``` python
+# setup MCMC
+n_walkers = 15
+chain_length = 1000
+seed = prior.sample(n_walkers)
+```
+
+%% Cell type:code id:68accd23-f187-4f69-9c37-dac3ada699d7 tags:
+
+``` python
+# define runner
+runner = ...  # TODO: initiate runner
+```
+
+%% Cell type:markdown id:02c2b899-95df-4be0-a384-269f2cf17c21 tags:
+
+#### Run MCMC
+
+%% Cell type:code id:2f0a03eb-1bde-4fea-9682-fd123b6b0ba5 tags:
+
+``` python
+# discard burn-in
+state = runner.run_mcmc(seed, 500, progress=True)
+runner.reset()
+```
+
+%% Cell type:code id:30451bb1-e0bd-4435-8db5-2dd24488d010 tags:
+
+``` python
+# run MCMC
+... # TODO: run MCMC chains
+```
+
+%% Cell type:markdown id:a3571a01-e1bd-4a85-924f-043ef1d574b1 tags:
+
+#### Data Analysis
+
+%% Cell type:code id:1653cf3c-3f1d-40bf-84fd-28ce76676364 tags:
+
+``` python
+# simple evaluation
+samples = runner.get_chain(flat=True)
+mean = np.mean(samples, axis=0)
+std = np.std(samples, axis=0)
+```
+
+%% Cell type:code id:2214221a-a2be-40f7-88c4-5c0d7091210e tags:
+
+``` python
+# plot posterior distribution
+samples = runner.get_chain(flat=True)
+layout = [[j] for j in range(7)]
+layout = [[0, 1], [2, 3], [4, 5]]
+fig, axes = plt.subplot_mosaic(layout, figsize=(12, 7.5))
+for j, (param, ax) in enumerate(zip(params+hyper_params, axes.values())):
+    bins = 50 if param.name != "b" else 150
+    ax.hist(samples[:, j], bins=bins, density=True, range=param.domain, alpha=0.5)
+    ax.axvline(x=mean[j], color=cmap.tab10(0), label="mean")
+    ax.axvline(x=mean[j]-std[j], ls="--", color=cmap.tab10(0), label="std")
+    ax.axvline(x=mean[j]+std[j], ls="--", color=cmap.tab10(0))
+    if use_real_measurement is False:
+        if param.name == "b":
+            ax.axvline(x=b, color="k", label="true value")
+        else:
+            ax.axvline(x=x_true[0, j], color="k", label="true value")
+    ax.set_ylabel(f"{j+1} - {param.name}")
+    ax.set_yticks([])
+    ax.legend()
+plt.show()
+```
+
+%% Cell type:code id:e9b15e08-8248-46cd-99c6-e5f8cd64c6e0 tags:
+
+``` python
+vals = surrogate.eval(np.mean(samples, axis=0)[:-1])
+
+if use_real_measurement is True:
+    errs = np.abs((y_meas - vals))
+else:
+    errs = np.abs((y_true - vals))
+
+fig, axes = plt.subplot_mosaic([["approx", "error"]], figsize=(15, 5))
+axes["approx"].plot(omega, vals[0], label="approx", color=cmap.tab10(0))
+if use_real_measurement is False:
+    axes["approx"].plot(omega, y_true[0], "--", label="ground truth", color=cmap.tab10(1))
+axes["approx"].scatter(omega, y_meas[0], s=2, label="measurement", color=cmap.tab10(1))
+axes["approx"].legend()
+axes["approx"].set_title("Ground truth (solid) and noisy measurement (dots)")
+
+axes["error"].plot(omega, errs[0])
+axes["error"].grid()
+axes["error"].set_title("Pointwise deviation from ground truth")
+plt.show()
+```
+
+%% Cell type:code id:36afafba-3de1-4dde-b114-b6b278a62473 tags:
+
+``` python
+```
+%% Cell type:markdown id:a3cf80d8-ef6d-4dec-8234-22967c45231d tags:
+
+# XRR
+
+%% Cell type:code id:3f27e08c-8fab-4917-9d4a-caed6af9ad88 tags:
+
+``` python
+import sys
+import numpy as np
+import emcee
+import pythia as pt  # see doc: www.pythia-uq.com
+import matplotlib.pyplot as plt
+import matplotlib.cm as cmap
+
+sys.path.append("../../src")
+from utils import Gaussian  # local import
+```
+
+%% Cell type:markdown id:c810ea31-b8c9-46c7-a4b4-e058a9ae5030 tags:
+
+## Load scatterometry data
+
+%% Cell type:code id:504e11ab-01f5-455a-8a72-7097278f8023 tags:
+
+``` python
+path = "../../data/xrr/"
+```
+
+%% Cell type:code id:c547374f-6999-4923-beaa-3da43fbbab79 tags:
+
+``` python
+omega = np.load(path+"omega.npy")
+
+print(f"shape omega: {omega.shape}")
+```
+
+%% Cell type:code id:f9b2d87e-2fcb-463b-b39c-d87844772c99 tags:
+
+``` python
+x_data = np.load(path+"x_train.npy")  # (10_000, 5)
+y_data = np.log10(np.load(path+"y_train.npy"))  # (10_000, 585)
+w_data = np.load(path+"w_train.npy")  # (10_000, )
+```
+
+%% Cell type:code id:700eec32-db8b-4443-a05d-fbce6dedc62c tags:
+
+``` python
+fig = plt.figure()
+for j, y in enumerate(y_data[:6]):
+    plt.plot(omega, y, label=f"sample {j+1}", color=cmap.tab20(j))
+    plt.legend()
+plt.show()
+```
+
+%% Cell type:code id:a16775a5-b12f-4792-957a-76d39be52aa7 tags:
+
+``` python
+# set parameter information
+param1 = pt.parameter.Parameter(index=1, name="ρRu", domain=[12590-25, 12590+25], distribution="uniform")
+param2 = pt.parameter.Parameter(index=2, name="wRu", domain=[291-1, 291+2], distribution="uniform")
+param3 = pt.parameter.Parameter(index=3, name="σRu", domain=[4-1, 4+1], distribution="uniform")
+param4 = pt.parameter.Parameter(index=4, name="wSiO2", domain=[18-3, 18+3], distribution="uniform")
+param5 = pt.parameter.Parameter(index=5, name="wRuO2", domain=[18-1, 18+1], distribution="uniform")
+params = [param1, param2, param3, param4, param5]
+param_names = [p.name for p in params]
+```
+
+%% Cell type:code id:ecdb40d0-5cb5-488e-b757-2d9be9112bff tags:
+
+``` python
+print("parameter  dist      domain")
+print("-"*40)
+for param in params:
+    print(f"{param.name:<9}  {param.distribution:<5}   {np.array(param.domain, dtype=int)}")
+```
+
+%% Cell type:code id:79a57610-24f5-46b3-9ae0-0c1182c3bd61 tags:
+
+``` python
+fig, ax = plt.subplot_mosaic([param_names], constrained_layout=True, figsize=(10, 3))
+for j, param in enumerate(params):
+    ax[param.name].hist(x_data[:, j], bins=50)
+    ax[param.name].set_title(param.name)
+    ax[param.name].set_yticks([])
+plt.show()
+```
+
+%% Cell type:markdown id:5c22c2ef-b248-4b5b-8065-5c325aa25d85 tags:
+
+## Approximate Forward Problem
+
+%% Cell type:markdown id:18e99871-d6f8-4f7b-9382-f6503c92d93b tags:
+
+#### Split training and test data
+
+%% Cell type:code id:84c2cbfd-9ce8-437a-8d1a-385e479ee59b tags:
+
+``` python
+split = int(0.8*x_data.shape[0])
+
+# training data
+x_train = x_data[:split]
+y_train = y_data[:split]
+w_train = w_data[:split]
+
+# test data
+x_test = x_data[split:]
+y_test = y_data[split:]
+```
+
+%% Cell type:markdown id:02490c03-13e6-48c2-9552-16ce43d6d939 tags:
+
+#### Define index set
+
+%% Cell type:code id:e2e8204e-c516-48fc-85cd-bcabf3b38938 tags:
+
+``` python
+# define multiindices
+indices = ...   # TODO: define simplex index set of polynomial of degree less then 5
+
+print(f"Multiindex information:")
+print(f"    number of indices: {index_set.shape[0]}")
+print(f"    dimension: {index_set.shape[1]}")
+print(f"    maximum dimension: {index_set.max}")
+print(f"    number of sobol indices: {len(index_set.sobol_tuples)}")
+```
+
+%% Cell type:markdown id:cbcc9c33-79e6-4ffb-b51a-9615e3de4907 tags:
+
+#### Train PC approximation
+
+%% Cell type:code id:4884e6b5-1fb4-4d57-800a-774a6fb3e9a2 tags:
+
+``` python
+# run pc approximation
+surrogate = ...    # TODO: use PyThia to create a polynomial chaos surrogate
+```
+
+%% Cell type:markdown id:b096635a-830a-4039-a07c-5145dd0a55c4 tags:
+
+#### Compute approximation error
+
+%% Cell type:code id:c311f090-b48e-4ffa-b963-a97827583247 tags:
+
+``` python
+# test approximation error
+y_approx = ...  # TODO: evaluate the PC surrogate on the test set
+
+# L2-L2 error
+err_abs = np.sum(np.linalg.norm(y_test - y_approx, axis=1))/y_test.shape[0]
+err_rel = np.sum(np.linalg.norm((y_test - y_approx)/y_test, axis=1))/y_test.shape[0]
+print(f"L2-L2 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+
+# C0-L2 error
+err_abs = np.sum(np.max(np.abs(y_test - y_approx), axis=1))/y_test.shape[0]
+err_rel = np.sum(np.max(np.abs(y_test - y_approx)/np.abs(y_test), axis=1))/y_test.shape[0]
+print(f"L2-C0 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+```
+
+%% Cell type:code id:cf030598-1d06-400e-a7f7-e3ee3ffcea9b tags:
+
+``` python
+fig, axes = plt.subplot_mosaic([["1", "2", "3"], ["4", "5", "6"]], constrained_layout=True, figsize=(9,6))
+for ax in axes.values():
+    j = np.random.randint(0, x_test.shape[0])
+    ax.plot(omega, y_test[j], label=f"ground truth", lw=0.5)
+    ax.plot(omega, y_approx[j], label="approx", lw=0.5)
+    ax.set_title(f"test data index: {j}")
+plt.show()
+```
+
+%% Cell type:markdown id:187e51ae-047f-42a4-8671-3cb0eaa11472 tags:
+
+## Global Sensitivity Analysis
+
+%% Cell type:code id:8e990349-158f-4aba-90fe-2e56d6b248e7 tags:
+
+``` python
+max_vals = ...  # TODO: compute the maximum attained of each Sobol index
+l2_vals = ...  # TODO: compute the average attained of each Sobol index
+```
+
+%% Cell type:code id:ccbcf0d9-d125-401c-a127-5d86b4aec94c tags:
+
+``` python
+sobol_max = [list(reversed([x for _,x in sorted(zip(max_vals, index_set.sobol_tuples))]))]
+sobol_L2 = [list(reversed([x for _,x in sorted(zip(l2_vals, index_set.sobol_tuples))]))]
+
+print(f"{'#':>2}  {'max':<10}  {'L2':<10}")
+print("-"*25)
+for j in range(10):
+    print(f"{j+1:>2}  {str(sobol_max[0][j]):<10}  {str(sobol_L2[0][j]):<10}")
+```
+
+%% Cell type:code id:50af6adc-ac11-458a-91c3-598240960cf9 tags:
+
+``` python
+def binom(n,k):
+    return np.math.factorial(n) / np.math.factorial(k) / np.math.factorial(n-k)
+bdry = [0]*5
+for jj in range(1,len(bdry)):
+    bdry[jj] = int(bdry[jj-1] + binom(len(bdry)-1,jj))
+
+fig, axes = plt.subplot_mosaic([["max"], ["L2"]])
+
+axes["max"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["max"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["max"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["max"].semilogy(range(surrogate.sobol.shape[0]), max_vals,
+                     "-o", linewidth=1, label="max")
+axes["max"].legend()
+axes["max"].grid()
+
+axes["L2"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["L2"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["L2"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["L2"].semilogy(range(surrogate.sobol.shape[0]), l2_vals,
+                     "-o", linewidth=1, label="$L^2$")
+axes["L2"].legend()
+axes["L2"].grid()
+
+plt.show()
+```
+
+%% Cell type:markdown id:7252d885-84b5-4a47-9cc2-b3f6374351b9 tags:
+
+## Inverse Problem & Parameter Reconstruction
+
+%% Cell type:markdown id:b0d4d74c-682c-4823-93d1-121fd3eb164f tags:
+
+#### Load measurements
+
+%% Cell type:code id:ecc36513-8627-4f07-886d-9dcbfc6f9e88 tags:
+
+``` python
+# measurement
+use_real_measurement = True
+if use_real_measurement is True:
+    y_meas = np.load(path+"measurement.npy").reshape(1, -1)
+    y_meas[0, np.where(y_meas[0]<1e-3)[0]] = 1e-3  # fix numerical issues
+    y_meas = np.log10(y_meas)
+else:
+    x_true = x_data[-1].reshape(1, -1)
+    y_true = y_data[-1].reshape(1, -1)
+    b = 0.05
+    noise = np.random.normal(0, b, y_true.shape)
+    y_meas = y_true + noise
+```
+
+%% Cell type:code id:d0fa7413-97cf-4dec-9403-b0f64a7818c7 tags:
+
+``` python
+fig = plt.figure()
+if use_real_measurement is False:
+    plt.plot(omega, y_true[0], label="y_true")
+plt.scatter(omega, y_meas[0], s=2, label="measurement", color=cmap.tab10(1))
+plt.legend()
+plt.title("Ground truth (solid) and noisy measurement (dots)")
+plt.show()
+```
+
+%% Cell type:markdown id:40495846-7f57-4ee6-9b06-26d91882addc tags:
+
+#### Define Bayesian posterior
+
+%% Cell type:code id:1186d066-6018-4a18-a805-d5e15d1eaad7 tags:
+
+``` python
+# define forward and error model
+hyper_params = [pt.parameter.Parameter(index=len(params)+1, name="b", domain=[0, 0.5], distribution="uniform")]
+dim = len(params+hyper_params)
+prior = ...  # TODO: define the extended prior
+forward_model = lambda x: ...  # TODO: define the extended forward model
+sigma = lambda x: np.abs(x[:, -1]).reshape(-1, 1)*np.ones((1, y_meas.shape[1]))  # error model
+
+# define Bayesian posterior
+lh = Gaussian(forward_model, sigma, dim)
+def log_posterior(x, meas):
+    return lh.log_likelihood(x, meas) + prior.log_pdf(x)
+```
+
+%% Cell type:markdown id:ba006e83-5173-419b-9d11-176c14e60242 tags:
+
+#### Setup MCMC
+
+%% Cell type:code id:f3a47c73-b2ae-427f-b56a-e194e5c84dbe tags:
+
+``` python
+# setup MCMC
+n_walkers = 15
+chain_length = 1000
+seed = prior.sample(n_walkers)
+```
+
+%% Cell type:code id:68accd23-f187-4f69-9c37-dac3ada699d7 tags:
+
+``` python
+# define runner
+runner = ...  # TODO: initiate runner
+```
+
+%% Cell type:markdown id:02c2b899-95df-4be0-a384-269f2cf17c21 tags:
+
+#### Run MCMC
+
+%% Cell type:code id:2f0a03eb-1bde-4fea-9682-fd123b6b0ba5 tags:
+
+``` python
+# discard burn-in
+state = runner.run_mcmc(seed, 500, progress=True)
+runner.reset()
+```
+
+%% Cell type:code id:30451bb1-e0bd-4435-8db5-2dd24488d010 tags:
+
+``` python
+# run MCMC
+... # TODO: run MCMC chains
+```
+
+%% Cell type:markdown id:a3571a01-e1bd-4a85-924f-043ef1d574b1 tags:
+
+#### Data Analysis
+
+%% Cell type:code id:1653cf3c-3f1d-40bf-84fd-28ce76676364 tags:
+
+``` python
+# simple evaluation
+samples = runner.get_chain(flat=True)
+mean = np.mean(samples, axis=0)
+std = np.std(samples, axis=0)
+```
+
+%% Cell type:code id:2214221a-a2be-40f7-88c4-5c0d7091210e tags:
+
+``` python
+# plot posterior distribution
+samples = runner.get_chain(flat=True)
+layout = [[j] for j in range(7)]
+layout = [[0, 1], [2, 3], [4, 5]]
+fig, axes = plt.subplot_mosaic(layout, figsize=(12, 7.5))
+for j, (param, ax) in enumerate(zip(params+hyper_params, axes.values())):
+    bins = 50 if param.name != "b" else 150
+    ax.hist(samples[:, j], bins=bins, density=True, range=param.domain, alpha=0.5)
+    ax.axvline(x=mean[j], color=cmap.tab10(0), label="mean")
+    ax.axvline(x=mean[j]-std[j], ls="--", color=cmap.tab10(0), label="std")
+    ax.axvline(x=mean[j]+std[j], ls="--", color=cmap.tab10(0))
+    if use_real_measurement is False:
+        if param.name == "b":
+            ax.axvline(x=b, color="k", label="true value")
+        else:
+            ax.axvline(x=x_true[0, j], color="k", label="true value")
+    ax.set_ylabel(f"{j+1} - {param.name}")
+    ax.set_yticks([])
+    ax.legend()
+plt.show()
+```
+
+%% Cell type:code id:e9b15e08-8248-46cd-99c6-e5f8cd64c6e0 tags:
+
+``` python
+vals = surrogate.eval(np.mean(samples, axis=0)[:-1])
+
+if use_real_measurement is True:
+    errs = np.abs((y_meas - vals))
+else:
+    errs = np.abs((y_true - vals))
+
+fig, axes = plt.subplot_mosaic([["approx", "error"]], figsize=(15, 5))
+axes["approx"].plot(omega, vals[0], label="approx", color=cmap.tab10(0))
+if use_real_measurement is False:
+    axes["approx"].plot(omega, y_true[0], "--", label="ground truth", color=cmap.tab10(1))
+axes["approx"].scatter(omega, y_meas[0], s=2, label="measurement", color=cmap.tab10(1))
+axes["approx"].legend()
+axes["approx"].set_title("Ground truth (solid) and noisy measurement (dots)")
+
+axes["error"].plot(omega, errs[0])
+axes["error"].grid()
+axes["error"].set_title("Pointwise deviation from ground truth")
+plt.show()
+```
+
+%% Cell type:code id:36afafba-3de1-4dde-b114-b6b278a62473 tags:
+
+``` python
+```
--- a/app/solutions/Solution 1 - Scat.ipynb
+++ b/app/solutions/Solution 1 - Scat.ipynb
+%% Cell type:markdown id:a3cf80d8-ef6d-4dec-8234-22967c45231d tags:
+
+# Scatterometry
+
+%% Cell type:markdown id:b75bfa2e-067f-48b8-93e5-e84bc7da3ff3 tags:
+
+![bla](../../img/notebooks/scat_setup.png)
+
+%% Cell type:code id:3f27e08c-8fab-4917-9d4a-caed6af9ad88 tags:
+
+``` python
+import sys
+import numpy as np
+import pythia as pt  # see doc: www.pythia-uq.com
+import emcee
+import matplotlib.pyplot as plt
+import matplotlib.cm as cmap
+
+sys.path.append("../../src")
+from utils import Gaussian  # local import
+```
+
+%% Cell type:markdown id:c810ea31-b8c9-46c7-a4b4-e058a9ae5030 tags:
+
+## Load scatterometry data
+
+%% Cell type:code id:504e11ab-01f5-455a-8a72-7097278f8023 tags:
+
+``` python
+path = "../../data/scat/"
+```
+
+%% Cell type:code id:c547374f-6999-4923-beaa-3da43fbbab79 tags:
+
+``` python
+angles = np.load(path+"angles.npy")  # (90, 2)
+
+# reshape
+angles = np.concatenate([angles, angles], axis=0)  # (180, 2)
+angle_idxs = [range(45), range(45,90), range(90,135), range(135,180)]
+angle_labels = ["$\phi=0^\circ$, s pol","$\phi=0^\circ$, p pol",
+                     "$\phi=90^\circ$, s pol","$\phi=90^\circ$, p pol"]
+print(f"azimuth phi: {np.unique(angles[:,0])}")
+print(f"incidence theta: {np.unique(angles[:,1])}")
+```
+
+%% Cell type:code id:f9b2d87e-2fcb-463b-b39c-d87844772c99 tags:
+
+``` python
+x_data = np.load(path+"x_train.npy")  # (10_000, 6)
+y_data = np.load(path+"y_train.npy")  # (90, 10_000, 2)
+w_data = np.load(path+"w_train.npy")  # (10_000, )
+
+# reshape y_data
+y_data = np.concatenate([y_data[:45,:,0], y_data[:45,:,1],
+                         y_data[45:,:,0], y_data[45:,:,1]], axis=0).T  # (10_000, 180)
+```
+
+%% Cell type:code id:700eec32-db8b-4443-a05d-fbce6dedc62c tags:
+
+``` python
+fig = plt.figure()
+for idxs, label in zip(angle_idxs, angle_labels):
+    plt.plot(angles[idxs,1], y_data[375,idxs], label=label)
+    plt.legend()
+plt.show()
+```
+
+%% Cell type:code id:a16775a5-b12f-4792-957a-76d39be52aa7 tags:
+
+``` python
+# load parameter information
+params = []
+for dct in np.load(path+"parameter.npy", allow_pickle=True):
+    params.append(pt.parameter.Parameter(
+        index=dct["_idx"], name=dct["_name"], domain=dct["_dom"], distribution=dct["_dist"],
+        alpha=dct["_alpha"], beta=dct["_beta"]))
+```
+
+%% Cell type:code id:ecdb40d0-5cb5-488e-b757-2d9be9112bff tags:
+
+``` python
+print("parameter  dist    domain")
+print("-"*30)
+for param in params:
+    print(f"{param.name:<9}  {param.distribution:<5}   {np.array(param.domain, dtype=int)}")
+```
+
+%% Cell type:code id:79a57610-24f5-46b3-9ae0-0c1182c3bd61 tags:
+
+``` python
+fig, ax = plt.subplot_mosaic([["h", "cd", "swa"],["t", "r_top", "r_bot"]], constrained_layout=True)
+for j, param in enumerate(params):
+    ax[param.name].hist(x_data[:, j], bins=50)
+    ax[param.name].set_title(param.name)
+    ax[param.name].set_yticks([])
+plt.show()
+```
+
+%% Cell type:markdown id:5c22c2ef-b248-4b5b-8065-5c325aa25d85 tags:
+
+## Approximate Forward Problem
+
+%% Cell type:markdown id:18e99871-d6f8-4f7b-9382-f6503c92d93b tags:
+
+#### Split training and test data
+
+%% Cell type:code id:84c2cbfd-9ce8-437a-8d1a-385e479ee59b tags:
+
+``` python
+split = int(0.8*x_data.shape[0])
+
+# training data
+x_train = x_data[:split]
+y_train = y_data[:split]
+w_train = w_data[:split]
+
+# test data
+x_test = x_data[split:]
+y_test = y_data[split:]
+```
+
+%% Cell type:markdown id:02490c03-13e6-48c2-9552-16ce43d6d939 tags:
+
+#### Define index set
+
+%% Cell type:code id:e2e8204e-c516-48fc-85cd-bcabf3b38938 tags:
+
+``` python
+# define multiindices
+indices = pt.index.simplex(dimension=len(params), maximum=4)
+index_set = pt.index.IndexSet(indices)
+
+print(f"Multiindex information:")
+print(f"    number of indices: {index_set.shape[0]}")
+print(f"    dimension: {index_set.shape[1]}")
+print(f"    maximum dimension: {index_set.max}")
+print(f"    number of sobol indices: {len(index_set.sobol_tuples)}")
+```
+
+%% Cell type:markdown id:cbcc9c33-79e6-4ffb-b51a-9615e3de4907 tags:
+
+#### Train PC approximation
+
+%% Cell type:code id:4884e6b5-1fb4-4d57-800a-774a6fb3e9a2 tags:
+
+``` python
+# run pc approximation
+surrogate = pt.chaos.PolynomialChaos(params, index_set, x_train, w_train, y_train)
+```
+
+%% Cell type:markdown id:b096635a-830a-4039-a07c-5145dd0a55c4 tags:
+
+#### Compute approximation error
+
+%% Cell type:code id:c311f090-b48e-4ffa-b963-a97827583247 tags:
+
+``` python
+# test approximation error
+y_approx = surrogate.eval(x_test)
+
+# L2-L2 error
+err_abs = np.sum(np.linalg.norm(y_test - y_approx, axis=1))/y_test.shape[0]
+err_rel = np.sum(np.linalg.norm((y_test - y_approx)/y_test, axis=1))/y_test.shape[0]
+print(f"L2-L2 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+
+# C0-L2 error
+err_abs = np.sum(np.max(np.abs(y_test - y_approx), axis=1))/y_test.shape[0]
+err_rel = np.sum(np.max(np.abs(y_test - y_approx)/np.abs(y_test), axis=1))/y_test.shape[0]
+print(f"L2-C0 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+```
+
+%% Cell type:code id:cf030598-1d06-400e-a7f7-e3ee3ffcea9b tags:
+
+``` python
+fig, axes = plt.subplot_mosaic([["1", "2", "3"], ["4", "5", "6"]], constrained_layout=True, figsize=(9,6))
+for ax in axes.values():
+    j = np.random.randint(0, x_test.shape[0])
+    for c, (idxs, label) in enumerate(zip(angle_idxs, angle_labels)):
+        ax.plot(angles[idxs,1], y_test[j,idxs], label=label, color=cmap.tab20(2*c))
+        ax.plot(angles[idxs,1], y_approx[j,idxs], "--", label=label, color=cmap.tab20(2*c+1))
+    ax.set_title(f"test data index: {j}")
+plt.show()
+```
+
+%% Cell type:markdown id:187e51ae-047f-42a4-8671-3cb0eaa11472 tags:
+
+## Global Sensitivity Analysis
+
+%% Cell type:code id:8e990349-158f-4aba-90fe-2e56d6b248e7 tags:
+
+``` python
+max_vals = np.max(surrogate.sobol, axis=1)
+l2_vals = np.linalg.norm(surrogate.sobol, axis=1)
+```
+
+%% Cell type:code id:ccbcf0d9-d125-401c-a127-5d86b4aec94c tags:
+
+``` python
+sobol_max = [list(reversed([x for _,x in sorted(zip(max_vals, index_set.sobol_tuples))]))]
+sobol_L2 = [list(reversed([x for _,x in sorted(zip(l2_vals, index_set.sobol_tuples))]))]
+
+print(f"{'#':>2}  {'max':<10}  {'L2':<10}")
+print("-"*25)
+for j in range(10):
+    print(f"{j+1:>2}  {str(sobol_max[0][j]):<10}  {str(sobol_L2[0][j]):<10}")
+```
+
+%% Cell type:code id:50af6adc-ac11-458a-91c3-598240960cf9 tags:
+
+``` python
+def binom(n,k):
+    return np.math.factorial(n) / np.math.factorial(k) / np.math.factorial(n-k)
+bdry = [0]*7
+for jj in range(1,len(bdry)):
+    bdry[jj] = int(bdry[jj-1] + binom(len(bdry)-1,jj))
+
+fig, axes = plt.subplot_mosaic([["max"], ["L2"]])
+
+axes["max"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["max"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["max"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["max"].semilogy(range(surrogate.sobol.shape[0]), max_vals,
+                     "-o", linewidth=1, label="max")
+axes["max"].legend()
+axes["max"].grid()
+
+axes["L2"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["L2"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["L2"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["L2"].semilogy(range(surrogate.sobol.shape[0]), l2_vals,
+                     "-o", linewidth=1, label="$L^2$")
+axes["L2"].legend()
+axes["L2"].grid()
+
+plt.show()
+```
+
+%% Cell type:markdown id:7252d885-84b5-4a47-9cc2-b3f6374351b9 tags:
+
+## Inverse Problem & Parameter Reconstruction
+
+%% Cell type:markdown id:b0d4d74c-682c-4823-93d1-121fd3eb164f tags:
+
+#### Load measurements
+
+%% Cell type:code id:ecc36513-8627-4f07-886d-9dcbfc6f9e88 tags:
+
+``` python
+# measurement
+x_true = x_data[-1].reshape(1, -1)
+y_true = y_data[-1].reshape(1, -1)
+b = 0.015
+noise = np.random.normal(0, b, y_true.shape)
+y_meas = y_true + noise
+```
+
+%% Cell type:code id:d0fa7413-97cf-4dec-9403-b0f64a7818c7 tags:
+
+``` python
+fig = plt.figure()
+for idxs, label in zip(angle_idxs, angle_labels):
+    plt.plot(angles[idxs,1], y_true[0,idxs], label=label)
+    plt.scatter(angles[idxs,1], y_meas[0, idxs], s=2)
+    plt.legend()
+    plt.title("Ground truth (solid) and noisy measurement (dots)")
+plt.show()
+```
+
+%% Cell type:markdown id:40495846-7f57-4ee6-9b06-26d91882addc tags:
+
+#### Define Bayesian posterior
+
+%% Cell type:code id:1186d066-6018-4a18-a805-d5e15d1eaad7 tags:
+
+``` python
+# define forward and error model
+hyper_params = [pt.parameter.Parameter(index=len(params)+1, name="b", domain=[0, 0.1], distribution="uniform")]
+dim = len(params+hyper_params)
+prior = pt.sampler.ParameterSampler(params+hyper_params)  # extended prior
+forward_model = lambda x: surrogate.eval(x[:,:-1])  # extended forward model
+sigma = lambda x: np.abs(x[:, -1]).reshape(-1, 1)*np.ones((1, y_meas.shape[1]))  # error model
+
+# define Bayesian posterior
+lh = Gaussian(forward_model, sigma, dim)
+def log_posterior(x, meas):
+    return lh.log_likelihood(x, meas) + prior.log_pdf(x)
+```
+
+%% Cell type:markdown id:ba006e83-5173-419b-9d11-176c14e60242 tags:
+
+#### Setup MCMC
+
+%% Cell type:code id:f3a47c73-b2ae-427f-b56a-e194e5c84dbe tags:
+
+``` python
+# setup MCMC
+n_walkers = 15
+chain_length = 1000
+seed = prior.sample(n_walkers)
+```
+
+%% Cell type:code id:68accd23-f187-4f69-9c37-dac3ada699d7 tags:
+
+``` python
+# define runner
+runner = emcee.EnsembleSampler(n_walkers, dim, log_posterior, args=[y_meas])
+```
+
+%% Cell type:markdown id:02c2b899-95df-4be0-a384-269f2cf17c21 tags:
+
+#### Run MCMC
+
+%% Cell type:code id:2f0a03eb-1bde-4fea-9682-fd123b6b0ba5 tags:
+
+``` python
+# discard burn-in
+state = runner.run_mcmc(seed, 500, progress=True)
+runner.reset()
+```
+
+%% Cell type:code id:30451bb1-e0bd-4435-8db5-2dd24488d010 tags:
+
+``` python
+# run MCMC
+runner.run_mcmc(seed, chain_length, progress=True);
+```
+
+%% Cell type:markdown id:a3571a01-e1bd-4a85-924f-043ef1d574b1 tags:
+
+#### Data Analysis
+
+%% Cell type:code id:1653cf3c-3f1d-40bf-84fd-28ce76676364 tags:
+
+``` python
+# simple evaluation
+samples = runner.get_chain(flat=True)
+mean = np.mean(samples, axis=0)
+std = np.std(samples, axis=0)
+```
+
+%% Cell type:code id:2214221a-a2be-40f7-88c4-5c0d7091210e tags:
+
+``` python
+# plot posterior distribution
+samples = runner.get_chain(flat=True)
+layout = [[j] for j in range(7)]
+layout = [[0, 1], [2, 3], [4, 5], [6, 6]]
+fig, axes = plt.subplot_mosaic(layout, figsize=(12, 7.5))
+for j, (param, ax) in enumerate(zip(params+hyper_params, axes.values())):
+    bins = 50 if param.name != "b" else 150
+    ax.hist(samples[:, j], bins=bins, density=True, range=param.domain, alpha=0.5)
+    ax.axvline(x=mean[j], color=cmap.tab10(0), label="mean")
+    ax.axvline(x=mean[j]-std[j], ls="--", color=cmap.tab10(0), label="std")
+    ax.axvline(x=mean[j]+std[j], ls="--", color=cmap.tab10(0))
+    if param.name == "b":
+        ax.axvline(x=b, color="k", label="true value")
+    else:
+        ax.axvline(x=x_true[0, j], color="k", label="true value")
+    ax.set_ylabel(f"{j+1} - {param.name}")
+    ax.set_yticks([])
+    ax.legend()
+plt.show()
+```
+
+%% Cell type:code id:e9b15e08-8248-46cd-99c6-e5f8cd64c6e0 tags:
+
+``` python
+fig, axes = plt.subplot_mosaic([["approx", "error"]], figsize=(15, 5))
+vals = surrogate.eval(np.mean(samples, axis=0)[:-1])
+errs = y_true - vals
+for j, (idxs, label) in enumerate(zip(angle_idxs, angle_labels)):
+    color = cmap.tab10(j)
+    axes["approx"].plot(angles[idxs,1], vals[0, idxs], color=color, label=label)
+    axes["approx"].plot(angles[idxs,1], y_true[0, idxs], "--", color=color)
+    axes["approx"].scatter(angles[idxs,1], y_meas[0, idxs], s=2, color=color)
+    axes["approx"].legend()
+    axes["approx"].set_title("Ground truth (solid) and noisy measurement (dots)")
+
+    axes["error"].plot(angles[idxs, 1], errs[0, idxs], color=color, label=label)
+    axes["error"].legend()
+    axes["error"].grid()
+    axes["error"].set_title("Pointwise deviation from ground truth")
+plt.show()
+```
+
+%% Cell type:code id:36afafba-3de1-4dde-b114-b6b278a62473 tags:
+
+``` python
+```
+%% Cell type:markdown id:a3cf80d8-ef6d-4dec-8234-22967c45231d tags:
+
+# Scatterometry
+
+%% Cell type:markdown id:b75bfa2e-067f-48b8-93e5-e84bc7da3ff3 tags:
+
+![bla](../../img/notebooks/scat_setup.png)
+
+%% Cell type:code id:3f27e08c-8fab-4917-9d4a-caed6af9ad88 tags:
+
+``` python
+import sys
+import numpy as np
+import pythia as pt  # see doc: www.pythia-uq.com
+import emcee
+import matplotlib.pyplot as plt
+import matplotlib.cm as cmap
+
+sys.path.append("../../src")
+from utils import Gaussian  # local import
+```
+
+%% Cell type:markdown id:c810ea31-b8c9-46c7-a4b4-e058a9ae5030 tags:
+
+## Load scatterometry data
+
+%% Cell type:code id:504e11ab-01f5-455a-8a72-7097278f8023 tags:
+
+``` python
+path = "../../data/scat/"
+```
+
+%% Cell type:code id:c547374f-6999-4923-beaa-3da43fbbab79 tags:
+
+``` python
+angles = np.load(path+"angles.npy")  # (90, 2)
+
+# reshape
+angles = np.concatenate([angles, angles], axis=0)  # (180, 2)
+angle_idxs = [range(45), range(45,90), range(90,135), range(135,180)]
+angle_labels = ["$\phi=0^\circ$, s pol","$\phi=0^\circ$, p pol",
+                     "$\phi=90^\circ$, s pol","$\phi=90^\circ$, p pol"]
+print(f"azimuth phi: {np.unique(angles[:,0])}")
+print(f"incidence theta: {np.unique(angles[:,1])}")
+```
+
+%% Cell type:code id:f9b2d87e-2fcb-463b-b39c-d87844772c99 tags:
+
+``` python
+x_data = np.load(path+"x_train.npy")  # (10_000, 6)
+y_data = np.load(path+"y_train.npy")  # (90, 10_000, 2)
+w_data = np.load(path+"w_train.npy")  # (10_000, )
+
+# reshape y_data
+y_data = np.concatenate([y_data[:45,:,0], y_data[:45,:,1],
+                         y_data[45:,:,0], y_data[45:,:,1]], axis=0).T  # (10_000, 180)
+```
+
+%% Cell type:code id:700eec32-db8b-4443-a05d-fbce6dedc62c tags:
+
+``` python
+fig = plt.figure()
+for idxs, label in zip(angle_idxs, angle_labels):
+    plt.plot(angles[idxs,1], y_data[375,idxs], label=label)
+    plt.legend()
+plt.show()
+```
+
+%% Cell type:code id:a16775a5-b12f-4792-957a-76d39be52aa7 tags:
+
+``` python
+# load parameter information
+params = []
+for dct in np.load(path+"parameter.npy", allow_pickle=True):
+    params.append(pt.parameter.Parameter(
+        index=dct["_idx"], name=dct["_name"], domain=dct["_dom"], distribution=dct["_dist"],
+        alpha=dct["_alpha"], beta=dct["_beta"]))
+```
+
+%% Cell type:code id:ecdb40d0-5cb5-488e-b757-2d9be9112bff tags:
+
+``` python
+print("parameter  dist    domain")
+print("-"*30)
+for param in params:
+    print(f"{param.name:<9}  {param.distribution:<5}   {np.array(param.domain, dtype=int)}")
+```
+
+%% Cell type:code id:79a57610-24f5-46b3-9ae0-0c1182c3bd61 tags:
+
+``` python
+fig, ax = plt.subplot_mosaic([["h", "cd", "swa"],["t", "r_top", "r_bot"]], constrained_layout=True)
+for j, param in enumerate(params):
+    ax[param.name].hist(x_data[:, j], bins=50)
+    ax[param.name].set_title(param.name)
+    ax[param.name].set_yticks([])
+plt.show()
+```
+
+%% Cell type:markdown id:5c22c2ef-b248-4b5b-8065-5c325aa25d85 tags:
+
+## Approximate Forward Problem
+
+%% Cell type:markdown id:18e99871-d6f8-4f7b-9382-f6503c92d93b tags:
+
+#### Split training and test data
+
+%% Cell type:code id:84c2cbfd-9ce8-437a-8d1a-385e479ee59b tags:
+
+``` python
+split = int(0.8*x_data.shape[0])
+
+# training data
+x_train = x_data[:split]
+y_train = y_data[:split]
+w_train = w_data[:split]
+
+# test data
+x_test = x_data[split:]
+y_test = y_data[split:]
+```
+
+%% Cell type:markdown id:02490c03-13e6-48c2-9552-16ce43d6d939 tags:
+
+#### Define index set
+
+%% Cell type:code id:e2e8204e-c516-48fc-85cd-bcabf3b38938 tags:
+
+``` python
+# define multiindices
+indices = pt.index.simplex(dimension=len(params), maximum=4)
+index_set = pt.index.IndexSet(indices)
+
+print(f"Multiindex information:")
+print(f"    number of indices: {index_set.shape[0]}")
+print(f"    dimension: {index_set.shape[1]}")
+print(f"    maximum dimension: {index_set.max}")
+print(f"    number of sobol indices: {len(index_set.sobol_tuples)}")
+```
+
+%% Cell type:markdown id:cbcc9c33-79e6-4ffb-b51a-9615e3de4907 tags:
+
+#### Train PC approximation
+
+%% Cell type:code id:4884e6b5-1fb4-4d57-800a-774a6fb3e9a2 tags:
+
+``` python
+# run pc approximation
+surrogate = pt.chaos.PolynomialChaos(params, index_set, x_train, w_train, y_train)
+```
+
+%% Cell type:markdown id:b096635a-830a-4039-a07c-5145dd0a55c4 tags:
+
+#### Compute approximation error
+
+%% Cell type:code id:c311f090-b48e-4ffa-b963-a97827583247 tags:
+
+``` python
+# test approximation error
+y_approx = surrogate.eval(x_test)
+
+# L2-L2 error
+err_abs = np.sum(np.linalg.norm(y_test - y_approx, axis=1))/y_test.shape[0]
+err_rel = np.sum(np.linalg.norm((y_test - y_approx)/y_test, axis=1))/y_test.shape[0]
+print(f"L2-L2 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+
+# C0-L2 error
+err_abs = np.sum(np.max(np.abs(y_test - y_approx), axis=1))/y_test.shape[0]
+err_rel = np.sum(np.max(np.abs(y_test - y_approx)/np.abs(y_test), axis=1))/y_test.shape[0]
+print(f"L2-C0 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+```
+
+%% Cell type:code id:cf030598-1d06-400e-a7f7-e3ee3ffcea9b tags:
+
+``` python
+fig, axes = plt.subplot_mosaic([["1", "2", "3"], ["4", "5", "6"]], constrained_layout=True, figsize=(9,6))
+for ax in axes.values():
+    j = np.random.randint(0, x_test.shape[0])
+    for c, (idxs, label) in enumerate(zip(angle_idxs, angle_labels)):
+        ax.plot(angles[idxs,1], y_test[j,idxs], label=label, color=cmap.tab20(2*c))
+        ax.plot(angles[idxs,1], y_approx[j,idxs], "--", label=label, color=cmap.tab20(2*c+1))
+    ax.set_title(f"test data index: {j}")
+plt.show()
+```
+
+%% Cell type:markdown id:187e51ae-047f-42a4-8671-3cb0eaa11472 tags:
+
+## Global Sensitivity Analysis
+
+%% Cell type:code id:8e990349-158f-4aba-90fe-2e56d6b248e7 tags:
+
+``` python
+max_vals = np.max(surrogate.sobol, axis=1)
+l2_vals = np.linalg.norm(surrogate.sobol, axis=1)
+```
+
+%% Cell type:code id:ccbcf0d9-d125-401c-a127-5d86b4aec94c tags:
+
+``` python
+sobol_max = [list(reversed([x for _,x in sorted(zip(max_vals, index_set.sobol_tuples))]))]
+sobol_L2 = [list(reversed([x for _,x in sorted(zip(l2_vals, index_set.sobol_tuples))]))]
+
+print(f"{'#':>2}  {'max':<10}  {'L2':<10}")
+print("-"*25)
+for j in range(10):
+    print(f"{j+1:>2}  {str(sobol_max[0][j]):<10}  {str(sobol_L2[0][j]):<10}")
+```
+
+%% Cell type:code id:50af6adc-ac11-458a-91c3-598240960cf9 tags:
+
+``` python
+def binom(n,k):
+    return np.math.factorial(n) / np.math.factorial(k) / np.math.factorial(n-k)
+bdry = [0]*7
+for jj in range(1,len(bdry)):
+    bdry[jj] = int(bdry[jj-1] + binom(len(bdry)-1,jj))
+
+fig, axes = plt.subplot_mosaic([["max"], ["L2"]])
+
+axes["max"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["max"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["max"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["max"].semilogy(range(surrogate.sobol.shape[0]), max_vals,
+                     "-o", linewidth=1, label="max")
+axes["max"].legend()
+axes["max"].grid()
+
+axes["L2"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["L2"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["L2"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["L2"].semilogy(range(surrogate.sobol.shape[0]), l2_vals,
+                     "-o", linewidth=1, label="$L^2$")
+axes["L2"].legend()
+axes["L2"].grid()
+
+plt.show()
+```
+
+%% Cell type:markdown id:7252d885-84b5-4a47-9cc2-b3f6374351b9 tags:
+
+## Inverse Problem & Parameter Reconstruction
+
+%% Cell type:markdown id:b0d4d74c-682c-4823-93d1-121fd3eb164f tags:
+
+#### Load measurements
+
+%% Cell type:code id:ecc36513-8627-4f07-886d-9dcbfc6f9e88 tags:
+
+``` python
+# measurement
+x_true = x_data[-1].reshape(1, -1)
+y_true = y_data[-1].reshape(1, -1)
+b = 0.015
+noise = np.random.normal(0, b, y_true.shape)
+y_meas = y_true + noise
+```
+
+%% Cell type:code id:d0fa7413-97cf-4dec-9403-b0f64a7818c7 tags:
+
+``` python
+fig = plt.figure()
+for idxs, label in zip(angle_idxs, angle_labels):
+    plt.plot(angles[idxs,1], y_true[0,idxs], label=label)
+    plt.scatter(angles[idxs,1], y_meas[0, idxs], s=2)
+    plt.legend()
+    plt.title("Ground truth (solid) and noisy measurement (dots)")
+plt.show()
+```
+
+%% Cell type:markdown id:40495846-7f57-4ee6-9b06-26d91882addc tags:
+
+#### Define Bayesian posterior
+
+%% Cell type:code id:1186d066-6018-4a18-a805-d5e15d1eaad7 tags:
+
+``` python
+# define forward and error model
+hyper_params = [pt.parameter.Parameter(index=len(params)+1, name="b", domain=[0, 0.1], distribution="uniform")]
+dim = len(params+hyper_params)
+prior = pt.sampler.ParameterSampler(params+hyper_params)  # extended prior
+forward_model = lambda x: surrogate.eval(x[:,:-1])  # extended forward model
+sigma = lambda x: np.abs(x[:, -1]).reshape(-1, 1)*np.ones((1, y_meas.shape[1]))  # error model
+
+# define Bayesian posterior
+lh = Gaussian(forward_model, sigma, dim)
+def log_posterior(x, meas):
+    return lh.log_likelihood(x, meas) + prior.log_pdf(x)
+```
+
+%% Cell type:markdown id:ba006e83-5173-419b-9d11-176c14e60242 tags:
+
+#### Setup MCMC
+
+%% Cell type:code id:f3a47c73-b2ae-427f-b56a-e194e5c84dbe tags:
+
+``` python
+# setup MCMC
+n_walkers = 15
+chain_length = 1000
+seed = prior.sample(n_walkers)
+```
+
+%% Cell type:code id:68accd23-f187-4f69-9c37-dac3ada699d7 tags:
+
+``` python
+# define runner
+runner = emcee.EnsembleSampler(n_walkers, dim, log_posterior, args=[y_meas])
+```
+
+%% Cell type:markdown id:02c2b899-95df-4be0-a384-269f2cf17c21 tags:
+
+#### Run MCMC
+
+%% Cell type:code id:2f0a03eb-1bde-4fea-9682-fd123b6b0ba5 tags:
+
+``` python
+# discard burn-in
+state = runner.run_mcmc(seed, 500, progress=True)
+runner.reset()
+```
+
+%% Cell type:code id:30451bb1-e0bd-4435-8db5-2dd24488d010 tags:
+
+``` python
+# run MCMC
+runner.run_mcmc(seed, chain_length, progress=True);
+```
+
+%% Cell type:markdown id:a3571a01-e1bd-4a85-924f-043ef1d574b1 tags:
+
+#### Data Analysis
+
+%% Cell type:code id:1653cf3c-3f1d-40bf-84fd-28ce76676364 tags:
+
+``` python
+# simple evaluation
+samples = runner.get_chain(flat=True)
+mean = np.mean(samples, axis=0)
+std = np.std(samples, axis=0)
+```
+
+%% Cell type:code id:2214221a-a2be-40f7-88c4-5c0d7091210e tags:
+
+``` python
+# plot posterior distribution
+samples = runner.get_chain(flat=True)
+layout = [[j] for j in range(7)]
+layout = [[0, 1], [2, 3], [4, 5], [6, 6]]
+fig, axes = plt.subplot_mosaic(layout, figsize=(12, 7.5))
+for j, (param, ax) in enumerate(zip(params+hyper_params, axes.values())):
+    bins = 50 if param.name != "b" else 150
+    ax.hist(samples[:, j], bins=bins, density=True, range=param.domain, alpha=0.5)
+    ax.axvline(x=mean[j], color=cmap.tab10(0), label="mean")
+    ax.axvline(x=mean[j]-std[j], ls="--", color=cmap.tab10(0), label="std")
+    ax.axvline(x=mean[j]+std[j], ls="--", color=cmap.tab10(0))
+    if param.name == "b":
+        ax.axvline(x=b, color="k", label="true value")
+    else:
+        ax.axvline(x=x_true[0, j], color="k", label="true value")
+    ax.set_ylabel(f"{j+1} - {param.name}")
+    ax.set_yticks([])
+    ax.legend()
+plt.show()
+```
+
+%% Cell type:code id:e9b15e08-8248-46cd-99c6-e5f8cd64c6e0 tags:
+
+``` python
+fig, axes = plt.subplot_mosaic([["approx", "error"]], figsize=(15, 5))
+vals = surrogate.eval(np.mean(samples, axis=0)[:-1])
+errs = y_true - vals
+for j, (idxs, label) in enumerate(zip(angle_idxs, angle_labels)):
+    color = cmap.tab10(j)
+    axes["approx"].plot(angles[idxs,1], vals[0, idxs], color=color, label=label)
+    axes["approx"].plot(angles[idxs,1], y_true[0, idxs], "--", color=color)
+    axes["approx"].scatter(angles[idxs,1], y_meas[0, idxs], s=2, color=color)
+    axes["approx"].legend()
+    axes["approx"].set_title("Ground truth (solid) and noisy measurement (dots)")
+
+    axes["error"].plot(angles[idxs, 1], errs[0, idxs], color=color, label=label)
+    axes["error"].legend()
+    axes["error"].grid()
+    axes["error"].set_title("Pointwise deviation from ground truth")
+plt.show()
+```
+
+%% Cell type:code id:36afafba-3de1-4dde-b114-b6b278a62473 tags:
+
+``` python
+```
--- a/app/solutions/Solution 2 - XRR.ipynb
+++ b/app/solutions/Solution 2 - XRR.ipynb
+%% Cell type:markdown id:a3cf80d8-ef6d-4dec-8234-22967c45231d tags:
+
+# XRR
+
+%% Cell type:code id:3f27e08c-8fab-4917-9d4a-caed6af9ad88 tags:
+
+``` python
+import sys
+import numpy as np
+import emcee
+import pythia as pt  # see doc: www.pythia-uq.com
+import matplotlib.pyplot as plt
+import matplotlib.cm as cmap
+
+sys.path.append("../../src")
+from utils import Gaussian  # local import
+```
+
+%% Cell type:markdown id:c810ea31-b8c9-46c7-a4b4-e058a9ae5030 tags:
+
+## Load scatterometry data
+
+%% Cell type:code id:504e11ab-01f5-455a-8a72-7097278f8023 tags:
+
+``` python
+path = "../../data/xrr/"
+```
+
+%% Cell type:code id:c547374f-6999-4923-beaa-3da43fbbab79 tags:
+
+``` python
+omega = np.load(path+"omega.npy")
+
+print(f"shape omega: {omega.shape}")
+```
+
+%% Cell type:code id:f9b2d87e-2fcb-463b-b39c-d87844772c99 tags:
+
+``` python
+x_data = np.load(path+"x_train.npy")  # (10_000, 5)
+y_data = np.log10(np.load(path+"y_train.npy"))  # (10_000, 585)
+w_data = np.load(path+"w_train.npy")  # (10_000, )
+```
+
+%% Cell type:code id:700eec32-db8b-4443-a05d-fbce6dedc62c tags:
+
+``` python
+fig = plt.figure()
+for j, y in enumerate(y_data[:6]):
+    plt.plot(omega, y, label=f"sample {j+1}", color=cmap.tab20(j))
+    plt.legend()
+plt.show()
+```
+
+%% Cell type:code id:a16775a5-b12f-4792-957a-76d39be52aa7 tags:
+
+``` python
+# set parameter information
+param1 = pt.parameter.Parameter(index=1, name="ρRu", domain=[12590-25, 12590+25], distribution="uniform")
+param2 = pt.parameter.Parameter(index=2, name="wRu", domain=[291-1, 291+2], distribution="uniform")
+param3 = pt.parameter.Parameter(index=3, name="σRu", domain=[4-1, 4+1], distribution="uniform")
+param4 = pt.parameter.Parameter(index=4, name="wSiO2", domain=[18-3, 18+3], distribution="uniform")
+param5 = pt.parameter.Parameter(index=5, name="wRuO2", domain=[18-1, 18+1], distribution="uniform")
+params = [param1, param2, param3, param4, param5]
+param_names = [p.name for p in params]
+```
+
+%% Cell type:code id:ecdb40d0-5cb5-488e-b757-2d9be9112bff tags:
+
+``` python
+print("parameter  dist      domain")
+print("-"*40)
+for param in params:
+    print(f"{param.name:<9}  {param.distribution:<5}   {np.array(param.domain, dtype=int)}")
+```
+
+%% Cell type:code id:79a57610-24f5-46b3-9ae0-0c1182c3bd61 tags:
+
+``` python
+fig, ax = plt.subplot_mosaic([param_names], constrained_layout=True, figsize=(10, 3))
+for j, param in enumerate(params):
+    ax[param.name].hist(x_data[:, j], bins=50)
+    ax[param.name].set_title(param.name)
+    ax[param.name].set_yticks([])
+plt.show()
+```
+
+%% Cell type:markdown id:5c22c2ef-b248-4b5b-8065-5c325aa25d85 tags:
+
+## Approximate Forward Problem
+
+%% Cell type:markdown id:18e99871-d6f8-4f7b-9382-f6503c92d93b tags:
+
+#### Split training and test data
+
+%% Cell type:code id:84c2cbfd-9ce8-437a-8d1a-385e479ee59b tags:
+
+``` python
+split = int(0.8*x_data.shape[0])
+
+# training data
+x_train = x_data[:split]
+y_train = y_data[:split]
+w_train = w_data[:split]
+
+# test data
+x_test = x_data[split:]
+y_test = y_data[split:]
+```
+
+%% Cell type:markdown id:02490c03-13e6-48c2-9552-16ce43d6d939 tags:
+
+#### Define index set
+
+%% Cell type:code id:e2e8204e-c516-48fc-85cd-bcabf3b38938 tags:
+
+``` python
+# define multiindices
+indices = pt.index.simplex(dimension=len(params), maximum=5)
+index_set = pt.index.IndexSet(indices)
+
+print(f"Multiindex information:")
+print(f"    number of indices: {index_set.shape[0]}")
+print(f"    dimension: {index_set.shape[1]}")
+print(f"    maximum dimension: {index_set.max}")
+print(f"    number of sobol indices: {len(index_set.sobol_tuples)}")
+```
+
+%% Cell type:markdown id:cbcc9c33-79e6-4ffb-b51a-9615e3de4907 tags:
+
+#### Train PC approximation
+
+%% Cell type:code id:4884e6b5-1fb4-4d57-800a-774a6fb3e9a2 tags:
+
+``` python
+# run pc approximation
+surrogate = pt.chaos.PolynomialChaos(params, index_set, x_train, w_train, y_train)
+```
+
+%% Cell type:markdown id:b096635a-830a-4039-a07c-5145dd0a55c4 tags:
+
+#### Compute approximation error
+
+%% Cell type:code id:c311f090-b48e-4ffa-b963-a97827583247 tags:
+
+``` python
+# test approximation error
+y_approx = surrogate.eval(x_test)
+
+# L2-L2 error
+err_abs = np.sum(np.linalg.norm(y_test - y_approx, axis=1))/y_test.shape[0]
+err_rel = np.sum(np.linalg.norm((y_test - y_approx)/y_test, axis=1))/y_test.shape[0]
+print(f"L2-L2 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+
+# C0-L2 error
+err_abs = np.sum(np.max(np.abs(y_test - y_approx), axis=1))/y_test.shape[0]
+err_rel = np.sum(np.max(np.abs(y_test - y_approx)/np.abs(y_test), axis=1))/y_test.shape[0]
+print(f"L2-C0 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+```
+
+%% Cell type:code id:cf030598-1d06-400e-a7f7-e3ee3ffcea9b tags:
+
+``` python
+fig, axes = plt.subplot_mosaic([["1", "2", "3"], ["4", "5", "6"]], constrained_layout=True, figsize=(9,6))
+for ax in axes.values():
+    j = np.random.randint(0, x_test.shape[0])
+    ax.plot(omega, y_test[j], label=f"ground truth", lw=0.5)
+    ax.plot(omega, y_approx[j], label="approx", lw=0.5)
+    ax.set_title(f"test data index: {j}")
+plt.show()
+```
+
+%% Cell type:markdown id:187e51ae-047f-42a4-8671-3cb0eaa11472 tags:
+
+## Global Sensitivity Analysis
+
+%% Cell type:code id:8e990349-158f-4aba-90fe-2e56d6b248e7 tags:
+
+``` python
+max_vals = np.max(surrogate.sobol, axis=1)
+l2_vals = np.linalg.norm(surrogate.sobol, axis=1)
+```
+
+%% Cell type:code id:ccbcf0d9-d125-401c-a127-5d86b4aec94c tags:
+
+``` python
+sobol_max = [list(reversed([x for _,x in sorted(zip(max_vals, index_set.sobol_tuples))]))]
+sobol_L2 = [list(reversed([x for _,x in sorted(zip(l2_vals, index_set.sobol_tuples))]))]
+
+print(f"{'#':>2}  {'max':<10}  {'L2':<10}")
+print("-"*25)
+for j in range(10):
+    print(f"{j+1:>2}  {str(sobol_max[0][j]):<10}  {str(sobol_L2[0][j]):<10}")
+```
+
+%% Cell type:code id:50af6adc-ac11-458a-91c3-598240960cf9 tags:
+
+``` python
+def binom(n,k):
+    return np.math.factorial(n) / np.math.factorial(k) / np.math.factorial(n-k)
+bdry = [0]*7
+for jj in range(1,len(bdry)):
+    bdry[jj] = int(bdry[jj-1] + binom(len(bdry)-1,jj))
+
+fig, axes = plt.subplot_mosaic([["max"], ["L2"]])
+
+axes["max"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["max"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["max"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["max"].semilogy(range(surrogate.sobol.shape[0]), max_vals,
+                     "-o", linewidth=1, label="max")
+axes["max"].legend()
+axes["max"].grid()
+
+axes["L2"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["L2"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["L2"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["L2"].semilogy(range(surrogate.sobol.shape[0]), l2_vals,
+                     "-o", linewidth=1, label="$L^2$")
+axes["L2"].legend()
+axes["L2"].grid()
+
+plt.show()
+```
+
+%% Cell type:markdown id:7252d885-84b5-4a47-9cc2-b3f6374351b9 tags:
+
+## Inverse Problem & Parameter Reconstruction
+
+%% Cell type:markdown id:fb10b512-2d01-445a-8711-3b57524e950b tags:
+
+$f(y_1,y_2) = y_1 $
+
+%% Cell type:markdown id:b0d4d74c-682c-4823-93d1-121fd3eb164f tags:
+
+#### Load measurements
+
+%% Cell type:code id:ecc36513-8627-4f07-886d-9dcbfc6f9e88 tags:
+
+``` python
+# measurement
+use_real_measurement = True
+if use_real_measurement is True:
+    y_meas = np.load(path+"measurement.npy").reshape(1, -1)
+    y_meas[0, np.where(y_meas[0]<1e-3)[0]] = 1e-3  # fix numerical issues
+    y_meas = np.log10(y_meas)
+else:
+    x_true = x_data[-1].reshape(1, -1)
+    y_true = y_data[-1].reshape(1, -1)
+    b = 0.05
+    noise = np.random.normal(0, b, y_true.shape)
+    y_meas = y_true + noise
+```
+
+%% Cell type:code id:d0fa7413-97cf-4dec-9403-b0f64a7818c7 tags:
+
+``` python
+fig = plt.figure()
+if use_real_measurement is False:
+    plt.plot(omega, y_true[0], label="y_true")
+plt.scatter(omega, y_meas[0], s=2, label="measurement", color=cmap.tab10(1))
+plt.legend()
+plt.title("Ground truth (solid) and noisy measurement (dots)")
+plt.show()
+```
+
+%% Cell type:markdown id:40495846-7f57-4ee6-9b06-26d91882addc tags:
+
+#### Define Bayesian posterior
+
+%% Cell type:code id:1186d066-6018-4a18-a805-d5e15d1eaad7 tags:
+
+``` python
+# define forward and error model
+hyper_params = [pt.parameter.Parameter(index=len(params)+1, name="b", domain=[0, 0.5], distribution="uniform")]
+dim = len(params+hyper_params)
+prior = pt.sampler.ParameterSampler(params+hyper_params)  # extended prior
+forward_model = lambda x: surrogate.eval(x[:,:-1])  # extended forward model
+sigma = lambda x: np.abs(x[:, -1]).reshape(-1, 1)*np.ones((1, y_meas.shape[1]))  # error model
+
+# define Bayesian posterior
+lh = Gaussian(forward_model, sigma, dim)
+def log_posterior(x, meas):
+    return lh.log_likelihood(x, meas) + prior.log_pdf(x)
+```
+
+%% Cell type:markdown id:ba006e83-5173-419b-9d11-176c14e60242 tags:
+
+#### Setup MCMC
+
+%% Cell type:code id:f3a47c73-b2ae-427f-b56a-e194e5c84dbe tags:
+
+``` python
+# setup MCMC
+n_walkers = 15
+chain_length = 1000
+seed = prior.sample(n_walkers)
+```
+
+%% Cell type:code id:68accd23-f187-4f69-9c37-dac3ada699d7 tags:
+
+``` python
+# define runner
+runner = emcee.EnsembleSampler(n_walkers, dim, log_posterior, args=[y_meas])
+```
+
+%% Cell type:markdown id:02c2b899-95df-4be0-a384-269f2cf17c21 tags:
+
+#### Run MCMC
+
+%% Cell type:code id:2f0a03eb-1bde-4fea-9682-fd123b6b0ba5 tags:
+
+``` python
+# discard burn-in
+state = runner.run_mcmc(seed, 500, progress=True)
+runner.reset()
+```
+
+%% Cell type:code id:30451bb1-e0bd-4435-8db5-2dd24488d010 tags:
+
+``` python
+# run MCMC
+runner.run_mcmc(seed, chain_length, progress=True);
+```
+
+%% Cell type:markdown id:a3571a01-e1bd-4a85-924f-043ef1d574b1 tags:
+
+#### Data Analysis
+
+%% Cell type:code id:1653cf3c-3f1d-40bf-84fd-28ce76676364 tags:
+
+``` python
+# simple evaluation
+samples = runner.get_chain(flat=True)
+mean = np.mean(samples, axis=0)
+std = np.std(samples, axis=0)
+```
+
+%% Cell type:code id:2214221a-a2be-40f7-88c4-5c0d7091210e tags:
+
+``` python
+# plot posterior distribution
+samples = runner.get_chain(flat=True)
+layout = [[j] for j in range(7)]
+layout = [[0, 1], [2, 3], [4, 5]]
+fig, axes = plt.subplot_mosaic(layout, figsize=(12, 7.5))
+for j, (param, ax) in enumerate(zip(params+hyper_params, axes.values())):
+    bins = 50 if param.name != "b" else 150
+    ax.hist(samples[:, j], bins=bins, density=True, range=param.domain, alpha=0.5)
+    ax.axvline(x=mean[j], color=cmap.tab10(0), label="mean")
+    ax.axvline(x=mean[j]-std[j], ls="--", color=cmap.tab10(0), label="std")
+    ax.axvline(x=mean[j]+std[j], ls="--", color=cmap.tab10(0))
+    if use_real_measurement is False:
+        if param.name == "b":
+            ax.axvline(x=b, color="k", label="true value")
+        else:
+            ax.axvline(x=x_true[0, j], color="k", label="true value")
+    ax.set_ylabel(f"{j+1} - {param.name}")
+    ax.set_yticks([])
+    ax.legend()
+plt.show()
+```
+
+%% Cell type:code id:e9b15e08-8248-46cd-99c6-e5f8cd64c6e0 tags:
+
+``` python
+vals = surrogate.eval(np.mean(samples, axis=0)[:-1])
+
+if use_real_measurement is True:
+    errs = np.abs((y_meas - vals))
+else:
+    errs = np.abs((y_true - vals))
+
+fig, axes = plt.subplot_mosaic([["approx", "error"]], figsize=(15, 5))
+axes["approx"].plot(omega, vals[0], label="approx", color=cmap.tab10(0))
+if use_real_measurement is False:
+    axes["approx"].plot(omega, y_true[0], "--", label="ground truth", color=cmap.tab10(1))
+axes["approx"].scatter(omega, y_meas[0], s=2, label="measurement", color=cmap.tab10(1))
+axes["approx"].legend()
+axes["approx"].set_title("Ground truth (solid) and noisy measurement (dots)")
+
+axes["error"].plot(omega, errs[0])
+axes["error"].grid()
+axes["error"].set_title("Pointwise deviation from ground truth")
+plt.show()
+```
+
+%% Cell type:code id:36afafba-3de1-4dde-b114-b6b278a62473 tags:
+
+``` python
+```
+%% Cell type:markdown id:a3cf80d8-ef6d-4dec-8234-22967c45231d tags:
+
+# XRR
+
+%% Cell type:code id:3f27e08c-8fab-4917-9d4a-caed6af9ad88 tags:
+
+``` python
+import sys
+import numpy as np
+import emcee
+import pythia as pt  # see doc: www.pythia-uq.com
+import matplotlib.pyplot as plt
+import matplotlib.cm as cmap
+
+sys.path.append("../../src")
+from utils import Gaussian  # local import
+```
+
+%% Cell type:markdown id:c810ea31-b8c9-46c7-a4b4-e058a9ae5030 tags:
+
+## Load scatterometry data
+
+%% Cell type:code id:504e11ab-01f5-455a-8a72-7097278f8023 tags:
+
+``` python
+path = "../../data/xrr/"
+```
+
+%% Cell type:code id:c547374f-6999-4923-beaa-3da43fbbab79 tags:
+
+``` python
+omega = np.load(path+"omega.npy")
+
+print(f"shape omega: {omega.shape}")
+```
+
+%% Cell type:code id:f9b2d87e-2fcb-463b-b39c-d87844772c99 tags:
+
+``` python
+x_data = np.load(path+"x_train.npy")  # (10_000, 5)
+y_data = np.log10(np.load(path+"y_train.npy"))  # (10_000, 585)
+w_data = np.load(path+"w_train.npy")  # (10_000, )
+```
+
+%% Cell type:code id:700eec32-db8b-4443-a05d-fbce6dedc62c tags:
+
+``` python
+fig = plt.figure()
+for j, y in enumerate(y_data[:6]):
+    plt.plot(omega, y, label=f"sample {j+1}", color=cmap.tab20(j))
+    plt.legend()
+plt.show()
+```
+
+%% Cell type:code id:a16775a5-b12f-4792-957a-76d39be52aa7 tags:
+
+``` python
+# set parameter information
+param1 = pt.parameter.Parameter(index=1, name="ρRu", domain=[12590-25, 12590+25], distribution="uniform")
+param2 = pt.parameter.Parameter(index=2, name="wRu", domain=[291-1, 291+2], distribution="uniform")
+param3 = pt.parameter.Parameter(index=3, name="σRu", domain=[4-1, 4+1], distribution="uniform")
+param4 = pt.parameter.Parameter(index=4, name="wSiO2", domain=[18-3, 18+3], distribution="uniform")
+param5 = pt.parameter.Parameter(index=5, name="wRuO2", domain=[18-1, 18+1], distribution="uniform")
+params = [param1, param2, param3, param4, param5]
+param_names = [p.name for p in params]
+```
+
+%% Cell type:code id:ecdb40d0-5cb5-488e-b757-2d9be9112bff tags:
+
+``` python
+print("parameter  dist      domain")
+print("-"*40)
+for param in params:
+    print(f"{param.name:<9}  {param.distribution:<5}   {np.array(param.domain, dtype=int)}")
+```
+
+%% Cell type:code id:79a57610-24f5-46b3-9ae0-0c1182c3bd61 tags:
+
+``` python
+fig, ax = plt.subplot_mosaic([param_names], constrained_layout=True, figsize=(10, 3))
+for j, param in enumerate(params):
+    ax[param.name].hist(x_data[:, j], bins=50)
+    ax[param.name].set_title(param.name)
+    ax[param.name].set_yticks([])
+plt.show()
+```
+
+%% Cell type:markdown id:5c22c2ef-b248-4b5b-8065-5c325aa25d85 tags:
+
+## Approximate Forward Problem
+
+%% Cell type:markdown id:18e99871-d6f8-4f7b-9382-f6503c92d93b tags:
+
+#### Split training and test data
+
+%% Cell type:code id:84c2cbfd-9ce8-437a-8d1a-385e479ee59b tags:
+
+``` python
+split = int(0.8*x_data.shape[0])
+
+# training data
+x_train = x_data[:split]
+y_train = y_data[:split]
+w_train = w_data[:split]
+
+# test data
+x_test = x_data[split:]
+y_test = y_data[split:]
+```
+
+%% Cell type:markdown id:02490c03-13e6-48c2-9552-16ce43d6d939 tags:
+
+#### Define index set
+
+%% Cell type:code id:e2e8204e-c516-48fc-85cd-bcabf3b38938 tags:
+
+``` python
+# define multiindices
+indices = pt.index.simplex(dimension=len(params), maximum=5)
+index_set = pt.index.IndexSet(indices)
+
+print(f"Multiindex information:")
+print(f"    number of indices: {index_set.shape[0]}")
+print(f"    dimension: {index_set.shape[1]}")
+print(f"    maximum dimension: {index_set.max}")
+print(f"    number of sobol indices: {len(index_set.sobol_tuples)}")
+```
+
+%% Cell type:markdown id:cbcc9c33-79e6-4ffb-b51a-9615e3de4907 tags:
+
+#### Train PC approximation
+
+%% Cell type:code id:4884e6b5-1fb4-4d57-800a-774a6fb3e9a2 tags:
+
+``` python
+# run pc approximation
+surrogate = pt.chaos.PolynomialChaos(params, index_set, x_train, w_train, y_train)
+```
+
+%% Cell type:markdown id:b096635a-830a-4039-a07c-5145dd0a55c4 tags:
+
+#### Compute approximation error
+
+%% Cell type:code id:c311f090-b48e-4ffa-b963-a97827583247 tags:
+
+``` python
+# test approximation error
+y_approx = surrogate.eval(x_test)
+
+# L2-L2 error
+err_abs = np.sum(np.linalg.norm(y_test - y_approx, axis=1))/y_test.shape[0]
+err_rel = np.sum(np.linalg.norm((y_test - y_approx)/y_test, axis=1))/y_test.shape[0]
+print(f"L2-L2 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+
+# C0-L2 error
+err_abs = np.sum(np.max(np.abs(y_test - y_approx), axis=1))/y_test.shape[0]
+err_rel = np.sum(np.max(np.abs(y_test - y_approx)/np.abs(y_test), axis=1))/y_test.shape[0]
+print(f"L2-C0 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)")
+```
+
+%% Cell type:code id:cf030598-1d06-400e-a7f7-e3ee3ffcea9b tags:
+
+``` python
+fig, axes = plt.subplot_mosaic([["1", "2", "3"], ["4", "5", "6"]], constrained_layout=True, figsize=(9,6))
+for ax in axes.values():
+    j = np.random.randint(0, x_test.shape[0])
+    ax.plot(omega, y_test[j], label=f"ground truth", lw=0.5)
+    ax.plot(omega, y_approx[j], label="approx", lw=0.5)
+    ax.set_title(f"test data index: {j}")
+plt.show()
+```
+
+%% Cell type:markdown id:187e51ae-047f-42a4-8671-3cb0eaa11472 tags:
+
+## Global Sensitivity Analysis
+
+%% Cell type:code id:8e990349-158f-4aba-90fe-2e56d6b248e7 tags:
+
+``` python
+max_vals = np.max(surrogate.sobol, axis=1)
+l2_vals = np.linalg.norm(surrogate.sobol, axis=1)
+```
+
+%% Cell type:code id:ccbcf0d9-d125-401c-a127-5d86b4aec94c tags:
+
+``` python
+sobol_max = [list(reversed([x for _,x in sorted(zip(max_vals, index_set.sobol_tuples))]))]
+sobol_L2 = [list(reversed([x for _,x in sorted(zip(l2_vals, index_set.sobol_tuples))]))]
+
+print(f"{'#':>2}  {'max':<10}  {'L2':<10}")
+print("-"*25)
+for j in range(10):
+    print(f"{j+1:>2}  {str(sobol_max[0][j]):<10}  {str(sobol_L2[0][j]):<10}")
+```
+
+%% Cell type:code id:50af6adc-ac11-458a-91c3-598240960cf9 tags:
+
+``` python
+def binom(n,k):
+    return np.math.factorial(n) / np.math.factorial(k) / np.math.factorial(n-k)
+bdry = [0]*7
+for jj in range(1,len(bdry)):
+    bdry[jj] = int(bdry[jj-1] + binom(len(bdry)-1,jj))
+
+fig, axes = plt.subplot_mosaic([["max"], ["L2"]])
+
+axes["max"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["max"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["max"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["max"].semilogy(range(surrogate.sobol.shape[0]), max_vals,
+                     "-o", linewidth=1, label="max")
+axes["max"].legend()
+axes["max"].grid()
+
+axes["L2"].axvspan(bdry[0], bdry[1], facecolor="k", alpha=0.2, label="SOB = (i,)")
+axes["L2"].axvspan(bdry[1], bdry[2], facecolor="k", alpha=0.15, label="SOB = (i,j)")
+axes["L2"].axvspan(bdry[2], bdry[3], facecolor="k", alpha=0.1, label="SOB = (i,j,k)")
+axes["L2"].semilogy(range(surrogate.sobol.shape[0]), l2_vals,
+                     "-o", linewidth=1, label="$L^2$")
+axes["L2"].legend()
+axes["L2"].grid()
+
+plt.show()
+```
+
+%% Cell type:markdown id:7252d885-84b5-4a47-9cc2-b3f6374351b9 tags:
+
+## Inverse Problem & Parameter Reconstruction
+
+%% Cell type:markdown id:fb10b512-2d01-445a-8711-3b57524e950b tags:
+
+$f(y_1,y_2) = y_1 $
+
+%% Cell type:markdown id:b0d4d74c-682c-4823-93d1-121fd3eb164f tags:
+
+#### Load measurements
+
+%% Cell type:code id:ecc36513-8627-4f07-886d-9dcbfc6f9e88 tags:
+
+``` python
+# measurement
+use_real_measurement = True
+if use_real_measurement is True:
+    y_meas = np.load(path+"measurement.npy").reshape(1, -1)
+    y_meas[0, np.where(y_meas[0]<1e-3)[0]] = 1e-3  # fix numerical issues
+    y_meas = np.log10(y_meas)
+else:
+    x_true = x_data[-1].reshape(1, -1)
+    y_true = y_data[-1].reshape(1, -1)
+    b = 0.05
+    noise = np.random.normal(0, b, y_true.shape)
+    y_meas = y_true + noise
+```
+
+%% Cell type:code id:d0fa7413-97cf-4dec-9403-b0f64a7818c7 tags:
+
+``` python
+fig = plt.figure()
+if use_real_measurement is False:
+    plt.plot(omega, y_true[0], label="y_true")
+plt.scatter(omega, y_meas[0], s=2, label="measurement", color=cmap.tab10(1))
+plt.legend()
+plt.title("Ground truth (solid) and noisy measurement (dots)")
+plt.show()
+```
+
+%% Cell type:markdown id:40495846-7f57-4ee6-9b06-26d91882addc tags:
+
+#### Define Bayesian posterior
+
+%% Cell type:code id:1186d066-6018-4a18-a805-d5e15d1eaad7 tags:
+
+``` python
+# define forward and error model
+hyper_params = [pt.parameter.Parameter(index=len(params)+1, name="b", domain=[0, 0.5], distribution="uniform")]
+dim = len(params+hyper_params)
+prior = pt.sampler.ParameterSampler(params+hyper_params)  # extended prior
+forward_model = lambda x: surrogate.eval(x[:,:-1])  # extended forward model
+sigma = lambda x: np.abs(x[:, -1]).reshape(-1, 1)*np.ones((1, y_meas.shape[1]))  # error model
+
+# define Bayesian posterior
+lh = Gaussian(forward_model, sigma, dim)
+def log_posterior(x, meas):
+    return lh.log_likelihood(x, meas) + prior.log_pdf(x)
+```
+
+%% Cell type:markdown id:ba006e83-5173-419b-9d11-176c14e60242 tags:
+
+#### Setup MCMC
+
+%% Cell type:code id:f3a47c73-b2ae-427f-b56a-e194e5c84dbe tags:
+
+``` python
+# setup MCMC
+n_walkers = 15
+chain_length = 1000
+seed = prior.sample(n_walkers)
+```
+
+%% Cell type:code id:68accd23-f187-4f69-9c37-dac3ada699d7 tags:
+
+``` python
+# define runner
+runner = emcee.EnsembleSampler(n_walkers, dim, log_posterior, args=[y_meas])
+```
+
+%% Cell type:markdown id:02c2b899-95df-4be0-a384-269f2cf17c21 tags:
+
+#### Run MCMC
+
+%% Cell type:code id:2f0a03eb-1bde-4fea-9682-fd123b6b0ba5 tags:
+
+``` python
+# discard burn-in
+state = runner.run_mcmc(seed, 500, progress=True)
+runner.reset()
+```
+
+%% Cell type:code id:30451bb1-e0bd-4435-8db5-2dd24488d010 tags:
+
+``` python
+# run MCMC
+runner.run_mcmc(seed, chain_length, progress=True);
+```
+
+%% Cell type:markdown id:a3571a01-e1bd-4a85-924f-043ef1d574b1 tags:
+
+#### Data Analysis
+
+%% Cell type:code id:1653cf3c-3f1d-40bf-84fd-28ce76676364 tags:
+
+``` python
+# simple evaluation
+samples = runner.get_chain(flat=True)
+mean = np.mean(samples, axis=0)
+std = np.std(samples, axis=0)
+```
+
+%% Cell type:code id:2214221a-a2be-40f7-88c4-5c0d7091210e tags:
+
+``` python
+# plot posterior distribution
+samples = runner.get_chain(flat=True)
+layout = [[j] for j in range(7)]
+layout = [[0, 1], [2, 3], [4, 5]]
+fig, axes = plt.subplot_mosaic(layout, figsize=(12, 7.5))
+for j, (param, ax) in enumerate(zip(params+hyper_params, axes.values())):
+    bins = 50 if param.name != "b" else 150
+    ax.hist(samples[:, j], bins=bins, density=True, range=param.domain, alpha=0.5)
+    ax.axvline(x=mean[j], color=cmap.tab10(0), label="mean")
+    ax.axvline(x=mean[j]-std[j], ls="--", color=cmap.tab10(0), label="std")
+    ax.axvline(x=mean[j]+std[j], ls="--", color=cmap.tab10(0))
+    if use_real_measurement is False:
+        if param.name == "b":
+            ax.axvline(x=b, color="k", label="true value")
+        else:
+            ax.axvline(x=x_true[0, j], color="k", label="true value")
+    ax.set_ylabel(f"{j+1} - {param.name}")
+    ax.set_yticks([])
+    ax.legend()
+plt.show()
+```
+
+%% Cell type:code id:e9b15e08-8248-46cd-99c6-e5f8cd64c6e0 tags:
+
+``` python
+vals = surrogate.eval(np.mean(samples, axis=0)[:-1])
+
+if use_real_measurement is True:
+    errs = np.abs((y_meas - vals))
+else:
+    errs = np.abs((y_true - vals))
+
+fig, axes = plt.subplot_mosaic([["approx", "error"]], figsize=(15, 5))
+axes["approx"].plot(omega, vals[0], label="approx", color=cmap.tab10(0))
+if use_real_measurement is False:
+    axes["approx"].plot(omega, y_true[0], "--", label="ground truth", color=cmap.tab10(1))
+axes["approx"].scatter(omega, y_meas[0], s=2, label="measurement", color=cmap.tab10(1))
+axes["approx"].legend()
+axes["approx"].set_title("Ground truth (solid) and noisy measurement (dots)")
+
+axes["error"].plot(omega, errs[0])
+axes["error"].grid()
+axes["error"].set_title("Pointwise deviation from ground truth")
+plt.show()
+```
+
+%% Cell type:code id:36afafba-3de1-4dde-b114-b6b278a62473 tags:
+
+``` python
+```
--- a/data/scat/angles.npy
+++ b/data/scat/angles.npy
--- a/data/scat/parameter.npy
+++ b/data/scat/parameter.npy
--- a/data/scat/w_train.npy
+++ b/data/scat/w_train.npy
--- a/data/scat/x_train.npy
+++ b/data/scat/x_train.npy
--- a/data/scat/y_train.npy
+++ b/data/scat/y_train.npy
--- a/data/xrr/measurement.npy
+++ b/data/xrr/measurement.npy
--- a/data/xrr/omega.npy
+++ b/data/xrr/omega.npy
--- a/data/xrr/w_train.npy
+++ b/data/xrr/w_train.npy
--- a/data/xrr/x_train.npy
+++ b/data/xrr/x_train.npy
--- a/data/xrr/y_train.npy
+++ b/data/xrr/y_train.npy
--- a/img/notebooks/scat_setup.png
+++ b/img/notebooks/scat_setup.png
--- a/src/utils.py
+++ b/src/utils.py
+from typing import Union, List, Tuple
+import numpy as np
+import pythia as pt
+
+
+class Gaussian:
+    """Gaussian likelihood function for differentiable forward problem.
+
+    Assemble the Gaussian likelihood
+
+    .. math::
+        \\mathcal{L}(x) = \\frac{1}{(2\\pi)^{M/2}\\sqrt{\\det \\Sigma}} \\exp\\Bigl( -\\frac{1}{2} \\Vert \\Sigma^{-1/2}(f(x)-\\delta) \\Vert \\Bigr)
+
+    for covariance matrix
+    :math:`\\Sigma = \\operatorname{diag}(\\sigma_1(y_1),\\dots,\\sigma_M(y_M))`
+    and measurement/observation :math:`\\delta`.
+
+    Parameters
+    ----------
+    f : function
+        Forward model.
+    sigma : function
+        Error model for standard deviation describing :math:`\\Sigma`.
+    xdim : int
+        Number of stochastic parameters.
+    """
+
+    def __init__(self, f, sigma, xdim):
+        """ Initialize Gaussian likelihood object. """
+        self._f = f
+        self._std = sigma
+        self._xdim = xdim
+
+    def likelihood(self, x, y_meas):
+        """ Evaluate the Gaussian likelihood with specified measurement.
+
+        Parameters
+        ----------
+        x : array_like
+            Realizations of stochastic parameters.
+        y_meas : array_like
+            Measurement :math:`\\delta`.
+        """
+        # check dimensionality and reshape if necessary.
+        y, f_val, std, mdim, fdim = self._reshape(x, y_meas)
+
+        a = (2*np.pi)**(-mdim*fdim/2)  # float
+        b = 1/np.prod(std, axis=(0, 2))**mdim  # shape: (#points,)
+        c = np.prod(np.exp(-(f_val-y)**2/(2*std**2)),
+                    axis=(0, 2))  # (#points,)
+
+        return a*b*c
+
+    def log_likelihood(self, x, y_meas):
+        """ Evaluate the Gaussian log-likelihood with specified measurement.
+
+        Parameters
+        ----------
+        x : array_like
+            Realizations of stochastic parameters.
+        y_meas : array_like
+            Measurement :math:`\\delta`.
+        """
+        # check dimensionality and reshape if necessary.
+        y, f_val, std, mdim, fdim = self._reshape(x, y_meas)
+
+        a = -mdim*fdim/2 * np.log(2*np.pi)  # float
+        b = -mdim * np.sum(np.log(std), axis=(0, 2))  # (#points,)
+        c = np.sum(-(f_val-y)**2/(2*std**2), axis=(0, 2))  # (#points,)
+
+        return a+b+c
+
+    def _reshape(self, x, y_meas):
+        """ Check shape compatibility of input and reshape if necessary.
+
+        Parameters
+        ----------
+        x : array_like
+            Realizations of stochastic parameters.
+        y_meas : array_like
+            Measurement :math:`\\delta`.
+        """
+        x_val = np.array(x)
+        y_val = np.array(y_meas)
+        # x shape: (#points, xdim)
+        if x_val.ndim < 2:
+            x_val.shape = 1, -1
+        assert x_val.shape[1] == self._xdim
+        # y_meas shape: (mdim, fdim)
+        if y_val.ndim < 2:
+            y_val.shape = 1, -1
+        assert x_val.ndim == 2 and y_val.ndim == 2
+
+        # get number of measurements and image dim of f
+        mdim, fdim = y_val.shape
+
+        f_val = self._f(x_val)
+        # f_val shape: (#points, fdim)
+        if f_val.ndim < 2:
+            f_val.shape = 1, -1
+        assert f_val.shape[1] == fdim
+
+        std = self._std(x_val)
+        # std shape: (#points, fdim)
+        if std.ndim < 2:
+            std.shape = 1, -1
+        if std.shape[1] == 1:
+            std = std*np.ones(fdim)
+        assert std.shape[1] == fdim
+        assert np.min(std) >= 0
+
+        # Reshape for fast multiplication
+        y = np.expand_dims(y_meas, axis=1)  # (mdim, 1, fdim)
+        std = np.expand_dims(self._std(x_val), axis=0)  # (1, #points, fdim)
+        f_val = np.expand_dims(f_val, axis=0)  # (1, #points, fdim)
+
+        return y, f_val, std, mdim, fdim
+
+
+class Batman:
+    """Target function for tutorial 1."""
+
+    def __init__(self) -> None:
+        """Target function for tutorial 1."""
+
+    def _f(self, y: np.ndarray) -> np.ndarray:
+        """Helper function."""
+        val = (np.abs(y/2)
+               - (3*np.sqrt(33)-7)/122 * y**2
+               - 3
+               + np.sqrt(1-(np.abs(np.abs(y)-2)-1)**2)
+               )
+        return val
+
+    def _g(self, y: np.ndarray) -> np.ndarray:
+        """Helper function."""
+        a = -(np.abs(y)-1)*(np.abs(y)-0.75)
+        b = np.zeros_like(a)
+        idx = np.where(np.abs(a) <= 1e-14)[0]
+        b[idx] = 1
+        idx = np.where(np.abs(a) >= 1e-14)[0]
+        b[idx] = np.abs(a[idx])/a[idx]
+        val = 9*np.sqrt(b) - 8*np.abs(y)
+        return val
+
+    def _h(self, y: np.ndarray) -> np.ndarray:
+        """Helper function."""
+        a = -(np.abs(y)-0.5)*(np.abs(y)-0.75)
+        val = 3*np.abs(y) + 0.75*np.sqrt(np.abs(a)/a)
+        return val
+
+    def _p(self, y: np.ndarray) -> np.ndarray:
+        """Helper function."""
+        a = -(y-0.5)*(y+0.5)
+        val = 2.25*np.sqrt(np.abs(a)/a)
+        return val
+
+    def _q(self, y: np.ndarray) -> np.ndarray:
+        """Helper function."""
+        a = 6*np.sqrt(10)/7
+        b = (1.5 - 0.5*np.abs(y)) * np.sqrt(np.abs(np.abs(y)-1)/(np.abs(y)-1))
+        c = 6*np.sqrt(10)/14 * np.sqrt(4-(np.abs(y)-1)**2)
+        return a + b - c
+
+    def _r(self, y: np.ndarray) -> np.ndarray:
+        """Helper function."""
+        a = np.sqrt(1-(y/7)**2)
+        b = np.sqrt(np.abs(np.abs(y)-3)/(np.abs(y)-3))
+        return 3 * a * b
+
+    def _s(self, y: np.ndarray) -> np.ndarray:
+        """Helper function."""
+        a = np.sqrt(1-(y/7)**2)
+        b = np.sqrt(np.abs(np.abs(y)-4)/(np.abs(y)-4))
+        return -3 * a * b
+
+    def _t(self, y: np.ndarray) -> np.ndarray:
+        """Helper function."""
+        return np.sqrt(np.abs(y)/y)
+
+    def upper(self, y: np.ndarray) -> np.ndarray:
+        """Upper part of the batman function."""
+        val = np.zeros(y.size)
+
+        # set r parts
+        idx = np.where(np.logical_and(y < -3, y >= -7))
+        val[idx] = self._r(y[idx])
+        idx = np.where(np.logical_and(y <= 7, y > 3))
+        val[idx] = self._r(y[idx])
+
+        # set q parts
+        idx = np.where(np.logical_and(y < -1, y >= -3))
+        val[idx] = self._q(y[idx])
+        idx = np.where(np.logical_and(y <= 3, y > 1))
+        val[idx] = self._q(y[idx])
+
+        # set g parts
+        idx = np.where(np.logical_and(y <= -0.75, y >= -1))
+        val[idx] = self._g(y[idx])
+        idx = np.where(np.logical_and(y <= 1, y >= 0.75))
+        val[idx] = self._g(y[idx])
+
+        # set h parts
+        idx = np.where(np.logical_and(y < -0.5, y > -0.75))
+        val[idx] = self._h(y[idx])
+        idx = np.where(np.logical_and(y < 0.75, y > 0.5))
+        val[idx] = self._h(y[idx])
+
+        # set p parts
+        idx = np.where(np.logical_and(y >= -0.5, y <= 0.5))
+        val[idx] = self._p(y[idx])
+
+        return val + 4
+
+    def lower(self, y: np.ndarray) -> np.ndarray:
+        """Lower part of the batman function."""
+        val = np.zeros(y.size)
+
+        # set s parts
+        idx = np.where(np.logical_and(y <= -4, y >= -7))
+        val[idx] = self._s(y[idx])
+        idx = np.where(np.logical_and(y <= 7, y >= 4))
+        val[idx] = self._s(y[idx])
+
+        # set f parts
+        idx = np.where(np.logical_and(y <= 4, y >= -4))
+        val[idx] = self._f(y[idx])
+
+        return val + 4
+
+    def eval(self, y: np.ndarray) -> np.ndarray:
+        """Evaluate target function.
+
+        Parameters
+        ----------
+        y : np.ndarray
+            Parameter values.
+
+        Returns
+        -------
+        :
+            Forward model evaluted in parameter values.
+        """
+        if y.ndim > 1:
+            y = y.flatten()
+        return np.array([self.upper(y), self.lower(y)]).T
+
+
+class Sobol:
+    """Target function for tutorial 2."""
+
+    def __init__(self, a: Union[List, Tuple, np.ndarray]) -> None:
+        """Target function for tutorial 2.
+
+        Parameters
+        ----------
+        a : array_like
+            Parameters for Sobol functions.
+        """
+        self.a = np.array(a)
+        assert self.a.ndim == 1
+
+    def eval(self, y: np.ndarray) -> np.ndarray:
+        """Evaluate target function.
+
+        Parameters
+        ----------
+        y : np.ndarray
+            Parameter values.
+
+        Returns
+        -------
+        :
+            Forward model evaluted in parameter values.
+        """
+        if y.ndim == 1:
+            y.shape = 1, -1
+        assert y.ndim == 2
+        assert y.shape[1] == self.a.size
+        ret = (np.abs(4.0*y - 2.0) + self.a) / (1.0 + self.a)
+        return np.prod(ret, axis=1).reshape(-1, 1)
+
+    def indices(self) -> Tuple[List, np.ndarray]:
+        """Compute analytical Sobol indices for target function.
+
+        Returns
+        -------
+        sobol_tuples : list of tuples
+            List of Sobol tuples.
+        sobol_indices : np.ndarray
+            Values of Sobol indices ordered the same as `sobol_tuples`.
+        """
+        sobol = {}
+        beta = (1.0+self.a)**(-2) / 3
+        var = np.prod(1.0 + beta) - 1.0
+        sobol_tuples = pt.index.IndexSet(
+            pt.index.tensor([1]*self.a.size)).sobol_tuples
+        for sdx in sobol_tuples:
+            sobol[sdx] = 1.0 / var
+            for k in sdx:
+                sobol[sdx] *= beta[k-1]
+        sobol_indices = np.array(list(sobol.values()))
+        return sobol_tuples, sobol_indices
+
+
+class Sine:
+    """Target function for tutorial 3."""
+
+    def __init__(self, scales: Union[list, tuple, np.ndarray]) -> None:
+        """Target function for tutorial 3.
+
+        Parameters
+        ----------
+        scales : array_like
+            Scales for each summand.
+        """
+        self.scales = np.array(scales)
+        self.points = np.linspace(0, 2*np.pi, 200)
+        assert self.scales.ndim == 1
+
+    def eval(self, x: np.ndarray) -> np.ndarray:
+        """Evaluate target function.
+
+        Parameters
+        ----------
+        x : np.ndarray
+            Parameter values.
+
+        Returns
+        -------
+        :
+            Forward model evaluted in parameter values.
+        """
+        ret = np.zeros((x.shape[0], self.points.size))
+        for j, (s, xj) in enumerate(zip(self.scales, x.T)):
+            ret += (s * np.reshape(xj, (-1, 1))
+                    * np.sin((j+1)*self.points).reshape(1, -1))
+        return ret
+
+
+if __name__ == "__main__":
+    pass
No results found