diff --git a/app/exercises/Exercise 1 - Scat.ipynb b/app/exercises/Exercise 1 - Scat.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8a0ae64666b5c333bd78cebbbc1d8b4452d3d5ec --- /dev/null +++ b/app/exercises/Exercise 1 - Scat.ipynb @@ -0,0 +1,591 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a3cf80d8-ef6d-4dec-8234-22967c45231d", + "metadata": {}, + "source": [ + "# Scatterometry" + ] + }, + { + "cell_type": "markdown", + "id": "b75bfa2e-067f-48b8-93e5-e84bc7da3ff3", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f27e08c-8fab-4917-9d4a-caed6af9ad88", + "metadata": {}, + "outputs": [], + "source": [ + "# from contextlib import contextmanager\n", + "import time\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.cm as cmap\n", + "import emcee\n", + "import pythia as pt # see doc: www.pythia-uq.com\n", + "from utils import Gaussian # local import" + ] + }, + { + "cell_type": "markdown", + "id": "c810ea31-b8c9-46c7-a4b4-e058a9ae5030", + "metadata": { + "tags": [] + }, + "source": [ + "## Load scatterometry data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "504e11ab-01f5-455a-8a72-7097278f8023", + "metadata": {}, + "outputs": [], + "source": [ + "path = \"../data/scat/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c547374f-6999-4923-beaa-3da43fbbab79", + "metadata": {}, + "outputs": [], + "source": [ + "angles = np.load(path+\"angles.npy\") # (90, 2)\n", + "\n", + "# reshape\n", + "angles = np.concatenate([angles, angles], axis=0) # (180, 2)\n", + "angle_idxs = [range(45), range(45,90), range(90,135), range(135,180)]\n", + "angle_labels = [\"$\\phi=0^\\circ$, s pol\",\"$\\phi=0^\\circ$, p pol\",\n", + " \"$\\phi=90^\\circ$, s pol\",\"$\\phi=90^\\circ$, p pol\"]\n", + "print(f\"azimuth phi: {np.unique(angles[:,0])}\")\n", + "print(f\"incidence theta: {np.unique(angles[:,1])}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9b2d87e-2fcb-463b-b39c-d87844772c99", + "metadata": {}, + "outputs": [], + "source": [ + "x_data = np.load(path+\"x_train.npy\") # (10_000, 6)\n", + "y_data = np.load(path+\"y_train.npy\") # (90, 10_000, 2)\n", + "w_data = np.load(path+\"w_train.npy\") # (10_000, )\n", + "\n", + "# reshape y_data\n", + "y_data = np.concatenate([y_data[:45,:,0], y_data[:45,:,1],\n", + " y_data[45:,:,0], y_data[45:,:,1]], axis=0).T # (10_000, 180)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "700eec32-db8b-4443-a05d-fbce6dedc62c", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure()\n", + "for idxs, label in zip(angle_idxs, angle_labels):\n", + " plt.plot(angles[idxs,1], y_data[375,idxs], label=label)\n", + " plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a16775a5-b12f-4792-957a-76d39be52aa7", + "metadata": {}, + "outputs": [], + "source": [ + "# load parameter information\n", + "params = []\n", + "for dct in np.load(path+\"parameter.npy\", allow_pickle=True):\n", + " params.append(pt.parameter.Parameter(\n", + " index=dct[\"_idx\"], name=dct[\"_name\"], domain=dct[\"_dom\"], distribution=dct[\"_dist\"],\n", + " alpha=dct[\"_alpha\"], beta=dct[\"_beta\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecdb40d0-5cb5-488e-b757-2d9be9112bff", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"parameter dist domain\")\n", + "print(\"-\"*30)\n", + "for param in params:\n", + " print(f\"{param.name:<9} {param.distribution:<5} {np.array(param.domain, dtype=int)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79a57610-24f5-46b3-9ae0-0c1182c3bd61", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplot_mosaic([[\"h\", \"cd\", \"swa\"],[\"t\", \"r_top\", \"r_bot\"]], constrained_layout=True)\n", + "for j, param in enumerate(params):\n", + " ax[param.name].hist(x_data[:, j], bins=50)\n", + " ax[param.name].set_title(param.name)\n", + " ax[param.name].set_yticks([])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "5c22c2ef-b248-4b5b-8065-5c325aa25d85", + "metadata": { + "tags": [] + }, + "source": [ + "## Approximate Forward Problem" + ] + }, + { + "cell_type": "markdown", + "id": "18e99871-d6f8-4f7b-9382-f6503c92d93b", + "metadata": { + "tags": [] + }, + "source": [ + "#### Split training and test data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84c2cbfd-9ce8-437a-8d1a-385e479ee59b", + "metadata": {}, + "outputs": [], + "source": [ + "split = int(0.8*x_data.shape[0])\n", + "\n", + "# training data\n", + "x_train = x_data[:split]\n", + "y_train = y_data[:split]\n", + "w_train = w_data[:split]\n", + "\n", + "# test data\n", + "x_test = x_data[split:]\n", + "y_test = y_data[split:]" + ] + }, + { + "cell_type": "markdown", + "id": "02490c03-13e6-48c2-9552-16ce43d6d939", + "metadata": { + "tags": [] + }, + "source": [ + "#### Define index set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2e8204e-c516-48fc-85cd-bcabf3b38938", + "metadata": {}, + "outputs": [], + "source": [ + "# define multiindices\n", + "index_set = ... # TODO: define simplex index set of polynomial of degree less then 5\n", + "\n", + "print(f\"Multiindex information:\")\n", + "print(f\" number of indices: {index_set.shape[0]}\")\n", + "print(f\" dimension: {index_set.shape[1]}\")\n", + "print(f\" maximum dimension: {index_set.max}\")\n", + "print(f\" number of sobol indices: {len(index_set.sobol_tuples)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cbcc9c33-79e6-4ffb-b51a-9615e3de4907", + "metadata": {}, + "source": [ + "#### Train PC approximation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4884e6b5-1fb4-4d57-800a-774a6fb3e9a2", + "metadata": {}, + "outputs": [], + "source": [ + "# run pc approximation\n", + "surrogate = ... # TODO: use PyThia to create a polynomial chaos surrogate" + ] + }, + { + "cell_type": "markdown", + "id": "b096635a-830a-4039-a07c-5145dd0a55c4", + "metadata": {}, + "source": [ + "#### Compute approximation error" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c311f090-b48e-4ffa-b963-a97827583247", + "metadata": {}, + "outputs": [], + "source": [ + "# test approximation error\n", + "y_approx = ... # TODO: evaluate the PC surrogate on the test set\n", + "\n", + "# L2-L2 error\n", + "err_abs = np.sum(np.linalg.norm(y_test - y_approx, axis=1))/y_test.shape[0]\n", + "err_rel = np.sum(np.linalg.norm((y_test - y_approx)/y_test, axis=1))/y_test.shape[0]\n", + "print(f\"L2-L2 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)\")\n", + "\n", + "# C0-L2 error\n", + "err_abs = np.sum(np.max(np.abs(y_test - y_approx), axis=1))/y_test.shape[0]\n", + "err_rel = np.sum(np.max(np.abs(y_test - y_approx)/np.abs(y_test), axis=1))/y_test.shape[0]\n", + "print(f\"L2-C0 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf030598-1d06-400e-a7f7-e3ee3ffcea9b", + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplot_mosaic([[\"1\", \"2\", \"3\"], [\"4\", \"5\", \"6\"]], constrained_layout=True, figsize=(9,6))\n", + "for ax in axes.values():\n", + " j = np.random.randint(0, x_test.shape[0])\n", + " for c, (idxs, label) in enumerate(zip(angle_idxs, angle_labels)):\n", + " ax.plot(angles[idxs,1], y_test[j,idxs], label=label, color=cmap.tab20(2*c))\n", + " ax.plot(angles[idxs,1], y_approx[j,idxs], \"--\", label=label, color=cmap.tab20(2*c+1))\n", + " ax.set_title(f\"test data index: {j}\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "187e51ae-047f-42a4-8671-3cb0eaa11472", + "metadata": {}, + "source": [ + "## Global Sensitivity Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e990349-158f-4aba-90fe-2e56d6b248e7", + "metadata": {}, + "outputs": [], + "source": [ + "max_vals = ... # TODO: compute the maximum attained of each Sobol index\n", + "l2_vals = ... # TODO: compute the average attained of each Sobol index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ccbcf0d9-d125-401c-a127-5d86b4aec94c", + "metadata": {}, + "outputs": [], + "source": [ + "sobol_max = [list(reversed([x for _,x in sorted(zip(max_vals, index_set.sobol_tuples))]))]\n", + "sobol_L2 = [list(reversed([x for _,x in sorted(zip(l2_vals, index_set.sobol_tuples))]))]\n", + "\n", + "print(f\"{'#':>2} {'max':<10} {'L2':<10}\")\n", + "print(\"-\"*25)\n", + "for j in range(10):\n", + " print(f\"{j+1:>2} {str(sobol_max[0][j]):<10} {str(sobol_L2[0][j]):<10}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50af6adc-ac11-458a-91c3-598240960cf9", + "metadata": {}, + "outputs": [], + "source": [ + "def binom(n,k): \n", + " return np.math.factorial(n) / np.math.factorial(k) / np.math.factorial(n-k)\n", + "bdry = [0]*7\n", + "for jj in range(1,len(bdry)):\n", + " bdry[jj] = int(bdry[jj-1] + binom(len(bdry)-1,jj))\n", + " \n", + "fig, axes = plt.subplot_mosaic([[\"max\"], [\"L2\"]])\n", + "\n", + "axes[\"max\"].axvspan(bdry[0], bdry[1], facecolor=\"k\", alpha=0.2, label=\"SOB = (i,)\")\n", + "axes[\"max\"].axvspan(bdry[1], bdry[2], facecolor=\"k\", alpha=0.15, label=\"SOB = (i,j)\")\n", + "axes[\"max\"].axvspan(bdry[2], bdry[3], facecolor=\"k\", alpha=0.1, label=\"SOB = (i,j,k)\")\n", + "axes[\"max\"].semilogy(range(surrogate.sobol.shape[0]), max_vals,\n", + " \"-o\", linewidth=1, label=\"max\")\n", + "axes[\"max\"].legend()\n", + "axes[\"max\"].grid()\n", + "\n", + "axes[\"L2\"].axvspan(bdry[0], bdry[1], facecolor=\"k\", alpha=0.2, label=\"SOB = (i,)\")\n", + "axes[\"L2\"].axvspan(bdry[1], bdry[2], facecolor=\"k\", alpha=0.15, label=\"SOB = (i,j)\")\n", + "axes[\"L2\"].axvspan(bdry[2], bdry[3], facecolor=\"k\", alpha=0.1, label=\"SOB = (i,j,k)\")\n", + "axes[\"L2\"].semilogy(range(surrogate.sobol.shape[0]), l2_vals,\n", + " \"-o\", linewidth=1, label=\"$L^2$\")\n", + "axes[\"L2\"].legend()\n", + "axes[\"L2\"].grid()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "7252d885-84b5-4a47-9cc2-b3f6374351b9", + "metadata": {}, + "source": [ + "## Inverse Problem & Parameter Reconstruction" + ] + }, + { + "cell_type": "markdown", + "id": "b0d4d74c-682c-4823-93d1-121fd3eb164f", + "metadata": {}, + "source": [ + "#### Load measurements" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecc36513-8627-4f07-886d-9dcbfc6f9e88", + "metadata": {}, + "outputs": [], + "source": [ + "# measurement\n", + "x_true = x_data[-1].reshape(1, -1)\n", + "y_true = y_data[-1].reshape(1, -1)\n", + "b = 0.015\n", + "noise = np.random.normal(0, b, y_true.shape)\n", + "y_meas = y_true + noise" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0fa7413-97cf-4dec-9403-b0f64a7818c7", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure()\n", + "for idxs, label in zip(angle_idxs, angle_labels):\n", + " plt.plot(angles[idxs,1], y_true[0,idxs], label=label)\n", + " plt.scatter(angles[idxs,1], y_meas[0, idxs], s=2)\n", + " plt.legend()\n", + " plt.title(\"Ground truth (solid) and noisy measurement (dots)\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "40495846-7f57-4ee6-9b06-26d91882addc", + "metadata": {}, + "source": [ + "#### Define Bayesian posterior" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1186d066-6018-4a18-a805-d5e15d1eaad7", + "metadata": {}, + "outputs": [], + "source": [ + "# define forward and error model\n", + "hyper_params = [pt.parameter.Parameter(index=len(params)+1, name=\"b\", domain=[0, 0.1], distribution=\"uniform\")]\n", + "dim = len(params+hyper_params)\n", + "prior = ... # TODO: define the extended prior\n", + "forward_model = lambda x: ... # TODO: define the extended forward model\n", + "sigma = lambda x: np.abs(x[:, -1]).reshape(-1, 1)*np.ones((1, y_meas.shape[1])) # error model\n", + "\n", + "# define Bayesian log-posterior\n", + "lh = Gaussian(forward_model, sigma, dim)\n", + "def log_posterior(x, meas):\n", + " return lh.log_likelihood(x, meas) + prior.log_pdf(x)" + ] + }, + { + "cell_type": "markdown", + "id": "ba006e83-5173-419b-9d11-176c14e60242", + "metadata": {}, + "source": [ + "#### Setup MCMC" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3a47c73-b2ae-427f-b56a-e194e5c84dbe", + "metadata": {}, + "outputs": [], + "source": [ + "# setup MCMC\n", + "n_walkers = 15\n", + "chain_length = 1000\n", + "seed = prior.sample(n_walkers)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68accd23-f187-4f69-9c37-dac3ada699d7", + "metadata": {}, + "outputs": [], + "source": [ + "# define runner\n", + "runner = ... # TODO: initiate runner" + ] + }, + { + "cell_type": "markdown", + "id": "02c2b899-95df-4be0-a384-269f2cf17c21", + "metadata": {}, + "source": [ + "#### Run MCMC" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f0a03eb-1bde-4fea-9682-fd123b6b0ba5", + "metadata": {}, + "outputs": [], + "source": [ + "# discard burn-in\n", + "state = runner.run_mcmc(seed, 500, progress=True)\n", + "runner.reset()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30451bb1-e0bd-4435-8db5-2dd24488d010", + "metadata": {}, + "outputs": [], + "source": [ + "# run MCMC\n", + "... # TODO: run MCMC chains" + ] + }, + { + "cell_type": "markdown", + "id": "a3571a01-e1bd-4a85-924f-043ef1d574b1", + "metadata": { + "tags": [] + }, + "source": [ + "#### Data Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1653cf3c-3f1d-40bf-84fd-28ce76676364", + "metadata": {}, + "outputs": [], + "source": [ + "# simple evaluation\n", + "samples = ... # TODO: get samples from runner\n", + "mean = np.mean(samples, axis=0)\n", + "std = np.std(samples, axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2214221a-a2be-40f7-88c4-5c0d7091210e", + "metadata": {}, + "outputs": [], + "source": [ + "# plot posterior distribution\n", + "samples = runner.get_chain(flat=True)\n", + "layout = [[j] for j in range(7)]\n", + "layout = [[0, 1], [2, 3], [4, 5], [6, 6]]\n", + "fig, axes = plt.subplot_mosaic(layout, figsize=(12, 7.5))\n", + "for j, (param, ax) in enumerate(zip(params+hyper_params, axes.values())):\n", + " bins = 50 if param.name != \"b\" else 150\n", + " ax.hist(samples[:, j], bins=bins, density=True, range=param.domain, alpha=0.5)\n", + " ax.axvline(x=mean[j], color=cmap.tab10(0), label=\"mean\")\n", + " ax.axvline(x=mean[j]-std[j], ls=\"--\", color=cmap.tab10(0), label=\"std\")\n", + " ax.axvline(x=mean[j]+std[j], ls=\"--\", color=cmap.tab10(0))\n", + " if param.name == \"b\":\n", + " ax.axvline(x=b, color=\"k\", label=\"true value\")\n", + " else: \n", + " ax.axvline(x=x_true[0, j], color=\"k\", label=\"true value\")\n", + " ax.set_ylabel(f\"{j+1} - {param.name}\")\n", + " ax.set_yticks([])\n", + " ax.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9b15e08-8248-46cd-99c6-e5f8cd64c6e0", + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplot_mosaic([[\"approx\", \"error\"]], figsize=(15, 5))\n", + "vals = surrogate.eval(np.mean(samples, axis=0)[:-1])\n", + "errs = y_true - vals\n", + "for j, (idxs, label) in enumerate(zip(angle_idxs, angle_labels)):\n", + " color = cmap.tab10(j)\n", + " axes[\"approx\"].plot(angles[idxs,1], vals[0, idxs], color=color, label=label)\n", + " axes[\"approx\"].plot(angles[idxs,1], y_true[0, idxs], \"--\", color=color)\n", + " axes[\"approx\"].scatter(angles[idxs,1], y_meas[0, idxs], s=2, color=color)\n", + " axes[\"approx\"].legend()\n", + " axes[\"approx\"].set_title(\"Ground truth (solid) and noisy measurement (dots)\")\n", + " \n", + " axes[\"error\"].plot(angles[idxs, 1], errs[0, idxs], color=color, label=label)\n", + " axes[\"error\"].legend()\n", + " axes[\"error\"].grid()\n", + " axes[\"error\"].set_title(\"Pointwise deviation from ground truth\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36afafba-3de1-4dde-b114-b6b278a62473", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/app/exercises/Exercise 2 - XRR.ipynb b/app/exercises/Exercise 2 - XRR.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..ddcf38de4d1e943bf7c354079d4b25fe926f31e8 --- /dev/null +++ b/app/exercises/Exercise 2 - XRR.ipynb @@ -0,0 +1,587 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a3cf80d8-ef6d-4dec-8234-22967c45231d", + "metadata": {}, + "source": [ + "# XRR" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f27e08c-8fab-4917-9d4a-caed6af9ad88", + "metadata": {}, + "outputs": [], + "source": [ + "# from contextlib import contextmanager\n", + "import time\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.cm as cmap\n", + "import emcee\n", + "import pythia as pt # see doc: www.pythia-uq.com\n", + "# import xrayutilities as xu\n", + "from utils import Gaussian # local import\n", + "# import hessian_back as hess # local import" + ] + }, + { + "cell_type": "markdown", + "id": "c810ea31-b8c9-46c7-a4b4-e058a9ae5030", + "metadata": { + "tags": [] + }, + "source": [ + "## Load scatterometry data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "504e11ab-01f5-455a-8a72-7097278f8023", + "metadata": {}, + "outputs": [], + "source": [ + "path = \"../data/xrr/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c547374f-6999-4923-beaa-3da43fbbab79", + "metadata": {}, + "outputs": [], + "source": [ + "omega = np.load(path+\"omega.npy\")\n", + "\n", + "print(f\"shape omega: {omega.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9b2d87e-2fcb-463b-b39c-d87844772c99", + "metadata": {}, + "outputs": [], + "source": [ + "x_data = np.load(path+\"x_train.npy\") # (10_000, 5)\n", + "y_data = np.log10(np.load(path+\"y_train.npy\")) # (10_000, 585)\n", + "w_data = np.load(path+\"w_train.npy\") # (10_000, )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "700eec32-db8b-4443-a05d-fbce6dedc62c", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure()\n", + "for j, y in enumerate(y_data[:5]):\n", + " plt.plot(omega, y, label=f\"sample {j+1}\")\n", + " plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a16775a5-b12f-4792-957a-76d39be52aa7", + "metadata": {}, + "outputs": [], + "source": [ + "# set parameter information\n", + "param1 = pt.parameter.Parameter(index=1, name=\"ÏRu\", domain=[12590-25, 12590+25], distribution=\"uniform\")\n", + "param2 = pt.parameter.Parameter(index=2, name=\"wRu\", domain=[291-1, 291+1], distribution=\"uniform\")\n", + "param3 = pt.parameter.Parameter(index=3, name=\"σRu\", domain=[4-1, 4+1], distribution=\"uniform\")\n", + "param4 = pt.parameter.Parameter(index=4, name=\"wSiO2\", domain=[18-3, 18+3], distribution=\"uniform\")\n", + "param5 = pt.parameter.Parameter(index=5, name=\"wRuO2\", domain=[18-1, 18+1], distribution=\"uniform\")\n", + "params = [param1, param2, param3, param4, param5]\n", + "param_names = [p.name for p in params]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecdb40d0-5cb5-488e-b757-2d9be9112bff", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"parameter dist domain\")\n", + "print(\"-\"*40)\n", + "for param in params:\n", + " print(f\"{param.name:<9} {param.distribution:<5} {np.array(param.domain, dtype=int)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79a57610-24f5-46b3-9ae0-0c1182c3bd61", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplot_mosaic([param_names], constrained_layout=True, figsize=(10, 3))\n", + "for j, param in enumerate(params):\n", + " ax[param.name].hist(x_data[:, j], bins=50)\n", + " ax[param.name].set_title(param.name)\n", + " ax[param.name].set_yticks([])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "5c22c2ef-b248-4b5b-8065-5c325aa25d85", + "metadata": { + "tags": [] + }, + "source": [ + "## Approximate Forward Problem" + ] + }, + { + "cell_type": "markdown", + "id": "18e99871-d6f8-4f7b-9382-f6503c92d93b", + "metadata": { + "tags": [] + }, + "source": [ + "#### Split training and test data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84c2cbfd-9ce8-437a-8d1a-385e479ee59b", + "metadata": {}, + "outputs": [], + "source": [ + "split = int(0.8*x_data.shape[0])\n", + "\n", + "# training data\n", + "x_train = x_data[:split]\n", + "y_train = y_data[:split]\n", + "w_train = w_data[:split]\n", + "\n", + "# test data\n", + "x_test = x_data[split:]\n", + "y_test = y_data[split:]" + ] + }, + { + "cell_type": "markdown", + "id": "02490c03-13e6-48c2-9552-16ce43d6d939", + "metadata": { + "tags": [] + }, + "source": [ + "#### Define index set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2e8204e-c516-48fc-85cd-bcabf3b38938", + "metadata": {}, + "outputs": [], + "source": [ + "# define multiindices\n", + "indices = ... # TODO: define simplex index set of polynomial of degree less then 5\n", + "\n", + "print(f\"Multiindex information:\")\n", + "print(f\" number of indices: {index_set.shape[0]}\")\n", + "print(f\" dimension: {index_set.shape[1]}\")\n", + "print(f\" maximum dimension: {index_set.max}\")\n", + "print(f\" number of sobol indices: {len(index_set.sobol_tuples)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cbcc9c33-79e6-4ffb-b51a-9615e3de4907", + "metadata": {}, + "source": [ + "#### Train PC approximation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4884e6b5-1fb4-4d57-800a-774a6fb3e9a2", + "metadata": {}, + "outputs": [], + "source": [ + "# run pc approximation\n", + "surrogate = ... # TODO: use PyThia to create a polynomial chaos surrogate" + ] + }, + { + "cell_type": "markdown", + "id": "b096635a-830a-4039-a07c-5145dd0a55c4", + "metadata": {}, + "source": [ + "#### Compute approximation error" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c311f090-b48e-4ffa-b963-a97827583247", + "metadata": {}, + "outputs": [], + "source": [ + "# test approximation error\n", + "y_approx = ... # TODO: evaluate the PC surrogate on the test set\n", + "\n", + "# L2-L2 error\n", + "err_abs = np.sum(np.linalg.norm(y_test - y_approx, axis=1))/y_test.shape[0]\n", + "err_rel = np.sum(np.linalg.norm((y_test - y_approx)/y_test, axis=1))/y_test.shape[0]\n", + "print(f\"L2-L2 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)\")\n", + "\n", + "# C0-L2 error\n", + "err_abs = np.sum(np.max(np.abs(y_test - y_approx), axis=1))/y_test.shape[0]\n", + "err_rel = np.sum(np.max(np.abs(y_test - y_approx)/np.abs(y_test), axis=1))/y_test.shape[0]\n", + "print(f\"L2-C0 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf030598-1d06-400e-a7f7-e3ee3ffcea9b", + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplot_mosaic([[\"1\", \"2\", \"3\"], [\"4\", \"5\", \"6\"]], constrained_layout=True, figsize=(9,6))\n", + "for ax in axes.values():\n", + " j = np.random.randint(0, x_test.shape[0])\n", + " ax.plot(omega, y_test[j], label=f\"ground truth\", lw=0.5)\n", + " ax.plot(omega, y_approx[j], label=\"approx\", lw=0.5)\n", + " ax.set_title(f\"test data index: {j}\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "187e51ae-047f-42a4-8671-3cb0eaa11472", + "metadata": {}, + "source": [ + "## Global Sensitivity Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9754f22b-b261-4aba-9506-9487b6611b3f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e990349-158f-4aba-90fe-2e56d6b248e7", + "metadata": {}, + "outputs": [], + "source": [ + "max_vals = ... # TODO: compute the maximum attained of each Sobol index\n", + "l2_vals = ... # TODO: compute the average attained of each Sobol index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ccbcf0d9-d125-401c-a127-5d86b4aec94c", + "metadata": {}, + "outputs": [], + "source": [ + "sobol_max = [list(reversed([x for _,x in sorted(zip(max_vals, index_set.sobol_tuples))]))]\n", + "sobol_L2 = [list(reversed([x for _,x in sorted(zip(l2_vals, index_set.sobol_tuples))]))]\n", + "\n", + "print(f\"{'#':>2} {'max':<10} {'L2':<10}\")\n", + "print(\"-\"*25)\n", + "for j in range(10):\n", + " print(f\"{j+1:>2} {str(sobol_max[0][j]):<10} {str(sobol_L2[0][j]):<10}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50af6adc-ac11-458a-91c3-598240960cf9", + "metadata": {}, + "outputs": [], + "source": [ + "def binom(n,k): \n", + " return np.math.factorial(n) / np.math.factorial(k) / np.math.factorial(n-k)\n", + "bdry = [0]*7\n", + "for jj in range(1,len(bdry)):\n", + " bdry[jj] = int(bdry[jj-1] + binom(len(bdry)-1,jj))\n", + " \n", + "fig, axes = plt.subplot_mosaic([[\"max\"], [\"L2\"]])\n", + "\n", + "axes[\"max\"].axvspan(bdry[0], bdry[1], facecolor=\"k\", alpha=0.2, label=\"SOB = (i,)\")\n", + "axes[\"max\"].axvspan(bdry[1], bdry[2], facecolor=\"k\", alpha=0.15, label=\"SOB = (i,j)\")\n", + "axes[\"max\"].axvspan(bdry[2], bdry[3], facecolor=\"k\", alpha=0.1, label=\"SOB = (i,j,k)\")\n", + "axes[\"max\"].semilogy(range(surrogate.sobol.shape[0]), max_vals,\n", + " \"-o\", linewidth=1, label=\"max\")\n", + "axes[\"max\"].legend()\n", + "axes[\"max\"].grid()\n", + "\n", + "axes[\"L2\"].axvspan(bdry[0], bdry[1], facecolor=\"k\", alpha=0.2, label=\"SOB = (i,)\")\n", + "axes[\"L2\"].axvspan(bdry[1], bdry[2], facecolor=\"k\", alpha=0.15, label=\"SOB = (i,j)\")\n", + "axes[\"L2\"].axvspan(bdry[2], bdry[3], facecolor=\"k\", alpha=0.1, label=\"SOB = (i,j,k)\")\n", + "axes[\"L2\"].semilogy(range(surrogate.sobol.shape[0]), l2_vals,\n", + " \"-o\", linewidth=1, label=\"$L^2$\")\n", + "axes[\"L2\"].legend()\n", + "axes[\"L2\"].grid()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "7252d885-84b5-4a47-9cc2-b3f6374351b9", + "metadata": {}, + "source": [ + "## Inverse Problem & Parameter Reconstruction" + ] + }, + { + "cell_type": "markdown", + "id": "b0d4d74c-682c-4823-93d1-121fd3eb164f", + "metadata": {}, + "source": [ + "#### Load measurements" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecc36513-8627-4f07-886d-9dcbfc6f9e88", + "metadata": {}, + "outputs": [], + "source": [ + "# measurement\n", + "use_real_measurement = False\n", + "if use_real_measurement is True:\n", + " y_meas = np.log10(np.load(path+\"measurement.npy\").reshape(1, -1))\n", + "else:\n", + " x_true = x_data[-1].reshape(1, -1)\n", + " y_true = y_data[-1].reshape(1, -1)\n", + " b = 0.05\n", + " noise = np.random.normal(0, b, y_true.shape)\n", + " y_meas = y_true + noise" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0fa7413-97cf-4dec-9403-b0f64a7818c7", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure()\n", + "if use_real_measurement is False:\n", + " plt.plot(omega, y_true[0], label=\"y_true\")\n", + "plt.scatter(omega, y_meas[0], s=2, label=\"measurement\", color=cmap.tab10(1))\n", + "plt.legend()\n", + "plt.title(\"Ground truth (solid) and noisy measurement (dots)\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "40495846-7f57-4ee6-9b06-26d91882addc", + "metadata": {}, + "source": [ + "#### Define Bayesian posterior" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1186d066-6018-4a18-a805-d5e15d1eaad7", + "metadata": {}, + "outputs": [], + "source": [ + "# define forward and error model\n", + "hyper_params = [pt.parameter.Parameter(index=len(params)+1, name=\"b\", domain=[0, 0.1], distribution=\"uniform\")]\n", + "dim = len(params+hyper_params)\n", + "prior = ... # TODO: define the extended prior\n", + "forward_model = lambda x: ... # TODO: define the extended forward model\n", + "sigma = lambda x: np.abs(x[:, -1]).reshape(-1, 1)*np.ones((1, y_meas.shape[1])) # error model\n", + "\n", + "# define Bayesian posterior\n", + "lh = Gaussian(forward_model, sigma, dim)\n", + "def log_posterior(x, meas):\n", + " return lh.log_likelihood(x, meas) + prior.log_pdf(x)" + ] + }, + { + "cell_type": "markdown", + "id": "ba006e83-5173-419b-9d11-176c14e60242", + "metadata": {}, + "source": [ + "#### Setup MCMC" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3a47c73-b2ae-427f-b56a-e194e5c84dbe", + "metadata": {}, + "outputs": [], + "source": [ + "# setup MCMC\n", + "n_walkers = 15\n", + "chain_length = 1000\n", + "seed = prior.sample(n_walkers)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68accd23-f187-4f69-9c37-dac3ada699d7", + "metadata": {}, + "outputs": [], + "source": [ + "# define runner\n", + "runner = ... # TODO: initiate runner" + ] + }, + { + "cell_type": "markdown", + "id": "02c2b899-95df-4be0-a384-269f2cf17c21", + "metadata": {}, + "source": [ + "#### Run MCMC" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f0a03eb-1bde-4fea-9682-fd123b6b0ba5", + "metadata": {}, + "outputs": [], + "source": [ + "# discard burn-in\n", + "state = runner.run_mcmc(seed, 500, progress=True)\n", + "runner.reset()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30451bb1-e0bd-4435-8db5-2dd24488d010", + "metadata": {}, + "outputs": [], + "source": [ + "# run MCMC\n", + "... # TODO: run MCMC chains" + ] + }, + { + "cell_type": "markdown", + "id": "a3571a01-e1bd-4a85-924f-043ef1d574b1", + "metadata": { + "tags": [] + }, + "source": [ + "#### Data Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1653cf3c-3f1d-40bf-84fd-28ce76676364", + "metadata": {}, + "outputs": [], + "source": [ + "# simple evaluation\n", + "samples = runner.get_chain(flat=True)\n", + "mean = np.mean(samples, axis=0)\n", + "std = np.std(samples, axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2214221a-a2be-40f7-88c4-5c0d7091210e", + "metadata": {}, + "outputs": [], + "source": [ + "# plot posterior distribution\n", + "samples = runner.get_chain(flat=True)\n", + "layout = [[j] for j in range(7)]\n", + "layout = [[0, 1], [2, 3], [4, 5]]\n", + "fig, axes = plt.subplot_mosaic(layout, figsize=(12, 7.5))\n", + "for j, (param, ax) in enumerate(zip(params+hyper_params, axes.values())):\n", + " bins = 50 if param.name != \"b\" else 150\n", + " ax.hist(samples[:, j], bins=bins, density=True, range=param.domain, alpha=0.5)\n", + " ax.axvline(x=mean[j], color=cmap.tab10(0), label=\"mean\")\n", + " ax.axvline(x=mean[j]-std[j], ls=\"--\", color=cmap.tab10(0), label=\"std\")\n", + " ax.axvline(x=mean[j]+std[j], ls=\"--\", color=cmap.tab10(0))\n", + " if use_real_measurement is False:\n", + " if param.name == \"b\":\n", + " ax.axvline(x=b, color=\"k\", label=\"true value\")\n", + " else: \n", + " ax.axvline(x=x_true[0, j], color=\"k\", label=\"true value\")\n", + " ax.set_ylabel(f\"{j+1} - {param.name}\")\n", + " ax.set_yticks([])\n", + " ax.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9b15e08-8248-46cd-99c6-e5f8cd64c6e0", + "metadata": {}, + "outputs": [], + "source": [ + "vals = surrogate.eval(np.mean(samples, axis=0)[:-1])\n", + "errs = np.abs((y_true - vals)/y_true)\n", + "\n", + "fig, axes = plt.subplot_mosaic([[\"approx\", \"error\"]], figsize=(15, 5))\n", + "axes[\"approx\"].plot(omega, vals[0], label=\"approx\", color=cmap.tab10(0))\n", + "axes[\"approx\"].plot(omega, y_true[0], \"--\", label=\"ground truth\", color=cmap.tab10(1))\n", + "axes[\"approx\"].scatter(omega, y_meas[0], s=2, label=\"measurement\", color=cmap.tab10(1))\n", + "axes[\"approx\"].legend()\n", + "axes[\"approx\"].set_title(\"Ground truth (solid) and noisy measurement (dots)\")\n", + "\n", + "axes[\"error\"].plot(omega, errs[0])\n", + "axes[\"error\"].grid()\n", + "axes[\"error\"].set_title(\"Pointwise deviation from ground truth\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36afafba-3de1-4dde-b114-b6b278a62473", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/app/solutions/Solution 1 - Scat.ipynb b/app/solutions/Solution 1 - Scat.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..09bd4426536135034c78dbadf5d93f729546b343 --- /dev/null +++ b/app/solutions/Solution 1 - Scat.ipynb @@ -0,0 +1,592 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a3cf80d8-ef6d-4dec-8234-22967c45231d", + "metadata": {}, + "source": [ + "# Scatterometry" + ] + }, + { + "cell_type": "markdown", + "id": "b75bfa2e-067f-48b8-93e5-e84bc7da3ff3", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f27e08c-8fab-4917-9d4a-caed6af9ad88", + "metadata": {}, + "outputs": [], + "source": [ + "# from contextlib import contextmanager\n", + "import time\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.cm as cmap\n", + "import emcee\n", + "import pythia as pt # see doc: www.pythia-uq.com\n", + "from utils import Gaussian # local import" + ] + }, + { + "cell_type": "markdown", + "id": "c810ea31-b8c9-46c7-a4b4-e058a9ae5030", + "metadata": { + "tags": [] + }, + "source": [ + "## Load scatterometry data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "504e11ab-01f5-455a-8a72-7097278f8023", + "metadata": {}, + "outputs": [], + "source": [ + "path = \"../data/scat/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c547374f-6999-4923-beaa-3da43fbbab79", + "metadata": {}, + "outputs": [], + "source": [ + "angles = np.load(path+\"angles.npy\") # (90, 2)\n", + "\n", + "# reshape\n", + "angles = np.concatenate([angles, angles], axis=0) # (180, 2)\n", + "angle_idxs = [range(45), range(45,90), range(90,135), range(135,180)]\n", + "angle_labels = [\"$\\phi=0^\\circ$, s pol\",\"$\\phi=0^\\circ$, p pol\",\n", + " \"$\\phi=90^\\circ$, s pol\",\"$\\phi=90^\\circ$, p pol\"]\n", + "print(f\"azimuth phi: {np.unique(angles[:,0])}\")\n", + "print(f\"incidence theta: {np.unique(angles[:,1])}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9b2d87e-2fcb-463b-b39c-d87844772c99", + "metadata": {}, + "outputs": [], + "source": [ + "x_data = np.load(path+\"x_train.npy\") # (10_000, 6)\n", + "y_data = np.load(path+\"y_train.npy\") # (90, 10_000, 2)\n", + "w_data = np.load(path+\"w_train.npy\") # (10_000, )\n", + "\n", + "# reshape y_data\n", + "y_data = np.concatenate([y_data[:45,:,0], y_data[:45,:,1],\n", + " y_data[45:,:,0], y_data[45:,:,1]], axis=0).T # (10_000, 180)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "700eec32-db8b-4443-a05d-fbce6dedc62c", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure()\n", + "for idxs, label in zip(angle_idxs, angle_labels):\n", + " plt.plot(angles[idxs,1], y_data[375,idxs], label=label)\n", + " plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a16775a5-b12f-4792-957a-76d39be52aa7", + "metadata": {}, + "outputs": [], + "source": [ + "# load parameter information\n", + "params = []\n", + "for dct in np.load(path+\"parameter.npy\", allow_pickle=True):\n", + " params.append(pt.parameter.Parameter(\n", + " index=dct[\"_idx\"], name=dct[\"_name\"], domain=dct[\"_dom\"], distribution=dct[\"_dist\"],\n", + " alpha=dct[\"_alpha\"], beta=dct[\"_beta\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecdb40d0-5cb5-488e-b757-2d9be9112bff", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"parameter dist domain\")\n", + "print(\"-\"*30)\n", + "for param in params:\n", + " print(f\"{param.name:<9} {param.distribution:<5} {np.array(param.domain, dtype=int)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79a57610-24f5-46b3-9ae0-0c1182c3bd61", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplot_mosaic([[\"h\", \"cd\", \"swa\"],[\"t\", \"r_top\", \"r_bot\"]], constrained_layout=True)\n", + "for j, param in enumerate(params):\n", + " ax[param.name].hist(x_data[:, j], bins=50)\n", + " ax[param.name].set_title(param.name)\n", + " ax[param.name].set_yticks([])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "5c22c2ef-b248-4b5b-8065-5c325aa25d85", + "metadata": { + "tags": [] + }, + "source": [ + "## Approximate Forward Problem" + ] + }, + { + "cell_type": "markdown", + "id": "18e99871-d6f8-4f7b-9382-f6503c92d93b", + "metadata": { + "tags": [] + }, + "source": [ + "#### Split training and test data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84c2cbfd-9ce8-437a-8d1a-385e479ee59b", + "metadata": {}, + "outputs": [], + "source": [ + "split = int(0.8*x_data.shape[0])\n", + "\n", + "# training data\n", + "x_train = x_data[:split]\n", + "y_train = y_data[:split]\n", + "w_train = w_data[:split]\n", + "\n", + "# test data\n", + "x_test = x_data[split:]\n", + "y_test = y_data[split:]" + ] + }, + { + "cell_type": "markdown", + "id": "02490c03-13e6-48c2-9552-16ce43d6d939", + "metadata": { + "tags": [] + }, + "source": [ + "#### Define index set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2e8204e-c516-48fc-85cd-bcabf3b38938", + "metadata": {}, + "outputs": [], + "source": [ + "# define multiindices\n", + "indices = pt.index.simplex(dimension=len(params), maximum=4)\n", + "index_set = pt.index.IndexSet(indices)\n", + "\n", + "print(f\"Multiindex information:\")\n", + "print(f\" number of indices: {index_set.shape[0]}\")\n", + "print(f\" dimension: {index_set.shape[1]}\")\n", + "print(f\" maximum dimension: {index_set.max}\")\n", + "print(f\" number of sobol indices: {len(index_set.sobol_tuples)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cbcc9c33-79e6-4ffb-b51a-9615e3de4907", + "metadata": {}, + "source": [ + "#### Train PC approximation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4884e6b5-1fb4-4d57-800a-774a6fb3e9a2", + "metadata": {}, + "outputs": [], + "source": [ + "# run pc approximation\n", + "surrogate = pt.chaos.PolynomialChaos(params, index_set, x_train, w_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "id": "b096635a-830a-4039-a07c-5145dd0a55c4", + "metadata": {}, + "source": [ + "#### Compute approximation error" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c311f090-b48e-4ffa-b963-a97827583247", + "metadata": {}, + "outputs": [], + "source": [ + "# test approximation error\n", + "y_approx = surrogate.eval(x_test)\n", + "\n", + "# L2-L2 error\n", + "err_abs = np.sum(np.linalg.norm(y_test - y_approx, axis=1))/y_test.shape[0]\n", + "err_rel = np.sum(np.linalg.norm((y_test - y_approx)/y_test, axis=1))/y_test.shape[0]\n", + "print(f\"L2-L2 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)\")\n", + "\n", + "# C0-L2 error\n", + "err_abs = np.sum(np.max(np.abs(y_test - y_approx), axis=1))/y_test.shape[0]\n", + "err_rel = np.sum(np.max(np.abs(y_test - y_approx)/np.abs(y_test), axis=1))/y_test.shape[0]\n", + "print(f\"L2-C0 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf030598-1d06-400e-a7f7-e3ee3ffcea9b", + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplot_mosaic([[\"1\", \"2\", \"3\"], [\"4\", \"5\", \"6\"]], constrained_layout=True, figsize=(9,6))\n", + "for ax in axes.values():\n", + " j = np.random.randint(0, x_test.shape[0])\n", + " for c, (idxs, label) in enumerate(zip(angle_idxs, angle_labels)):\n", + " ax.plot(angles[idxs,1], y_test[j,idxs], label=label, color=cmap.tab20(2*c))\n", + " ax.plot(angles[idxs,1], y_approx[j,idxs], \"--\", label=label, color=cmap.tab20(2*c+1))\n", + " ax.set_title(f\"test data index: {j}\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "187e51ae-047f-42a4-8671-3cb0eaa11472", + "metadata": {}, + "source": [ + "## Global Sensitivity Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e990349-158f-4aba-90fe-2e56d6b248e7", + "metadata": {}, + "outputs": [], + "source": [ + "max_vals = np.max(surrogate.sobol, axis=1)\n", + "l2_vals = np.linalg.norm(surrogate.sobol, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ccbcf0d9-d125-401c-a127-5d86b4aec94c", + "metadata": {}, + "outputs": [], + "source": [ + "sobol_max = [list(reversed([x for _,x in sorted(zip(max_vals, index_set.sobol_tuples))]))]\n", + "sobol_L2 = [list(reversed([x for _,x in sorted(zip(l2_vals, index_set.sobol_tuples))]))]\n", + "\n", + "print(f\"{'#':>2} {'max':<10} {'L2':<10}\")\n", + "print(\"-\"*25)\n", + "for j in range(10):\n", + " print(f\"{j+1:>2} {str(sobol_max[0][j]):<10} {str(sobol_L2[0][j]):<10}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50af6adc-ac11-458a-91c3-598240960cf9", + "metadata": {}, + "outputs": [], + "source": [ + "def binom(n,k): \n", + " return np.math.factorial(n) / np.math.factorial(k) / np.math.factorial(n-k)\n", + "bdry = [0]*7\n", + "for jj in range(1,len(bdry)):\n", + " bdry[jj] = int(bdry[jj-1] + binom(len(bdry)-1,jj))\n", + " \n", + "fig, axes = plt.subplot_mosaic([[\"max\"], [\"L2\"]])\n", + "\n", + "axes[\"max\"].axvspan(bdry[0], bdry[1], facecolor=\"k\", alpha=0.2, label=\"SOB = (i,)\")\n", + "axes[\"max\"].axvspan(bdry[1], bdry[2], facecolor=\"k\", alpha=0.15, label=\"SOB = (i,j)\")\n", + "axes[\"max\"].axvspan(bdry[2], bdry[3], facecolor=\"k\", alpha=0.1, label=\"SOB = (i,j,k)\")\n", + "axes[\"max\"].semilogy(range(surrogate.sobol.shape[0]), max_vals,\n", + " \"-o\", linewidth=1, label=\"max\")\n", + "axes[\"max\"].legend()\n", + "axes[\"max\"].grid()\n", + "\n", + "axes[\"L2\"].axvspan(bdry[0], bdry[1], facecolor=\"k\", alpha=0.2, label=\"SOB = (i,)\")\n", + "axes[\"L2\"].axvspan(bdry[1], bdry[2], facecolor=\"k\", alpha=0.15, label=\"SOB = (i,j)\")\n", + "axes[\"L2\"].axvspan(bdry[2], bdry[3], facecolor=\"k\", alpha=0.1, label=\"SOB = (i,j,k)\")\n", + "axes[\"L2\"].semilogy(range(surrogate.sobol.shape[0]), l2_vals,\n", + " \"-o\", linewidth=1, label=\"$L^2$\")\n", + "axes[\"L2\"].legend()\n", + "axes[\"L2\"].grid()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "7252d885-84b5-4a47-9cc2-b3f6374351b9", + "metadata": {}, + "source": [ + "## Inverse Problem & Parameter Reconstruction" + ] + }, + { + "cell_type": "markdown", + "id": "b0d4d74c-682c-4823-93d1-121fd3eb164f", + "metadata": {}, + "source": [ + "#### Load measurements" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecc36513-8627-4f07-886d-9dcbfc6f9e88", + "metadata": {}, + "outputs": [], + "source": [ + "# measurement\n", + "x_true = x_data[-1].reshape(1, -1)\n", + "y_true = y_data[-1].reshape(1, -1)\n", + "b = 0.015\n", + "noise = np.random.normal(0, b, y_true.shape)\n", + "y_meas = y_true + noise" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0fa7413-97cf-4dec-9403-b0f64a7818c7", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure()\n", + "for idxs, label in zip(angle_idxs, angle_labels):\n", + " plt.plot(angles[idxs,1], y_true[0,idxs], label=label)\n", + " plt.scatter(angles[idxs,1], y_meas[0, idxs], s=2)\n", + " plt.legend()\n", + " plt.title(\"Ground truth (solid) and noisy measurement (dots)\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "40495846-7f57-4ee6-9b06-26d91882addc", + "metadata": {}, + "source": [ + "#### Define Bayesian posterior" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1186d066-6018-4a18-a805-d5e15d1eaad7", + "metadata": {}, + "outputs": [], + "source": [ + "# define forward and error model\n", + "hyper_params = [pt.parameter.Parameter(index=len(params)+1, name=\"b\", domain=[0, 0.1], distribution=\"uniform\")]\n", + "dim = len(params+hyper_params)\n", + "prior = pt.sampler.ParameterSampler(params+hyper_params) # extended prior\n", + "forward_model = lambda x: surrogate.eval(x[:,:-1]) # extended forward model\n", + "sigma = lambda x: np.abs(x[:, -1]).reshape(-1, 1)*np.ones((1, y_meas.shape[1])) # error model\n", + "\n", + "# define Bayesian posterior\n", + "lh = Gaussian(forward_model, sigma, dim)\n", + "def log_posterior(x, meas):\n", + " return lh.log_likelihood(x, meas) + prior.log_pdf(x)" + ] + }, + { + "cell_type": "markdown", + "id": "ba006e83-5173-419b-9d11-176c14e60242", + "metadata": {}, + "source": [ + "#### Setup MCMC" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3a47c73-b2ae-427f-b56a-e194e5c84dbe", + "metadata": {}, + "outputs": [], + "source": [ + "# setup MCMC\n", + "n_walkers = 15\n", + "chain_length = 1000\n", + "seed = prior.sample(n_walkers)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68accd23-f187-4f69-9c37-dac3ada699d7", + "metadata": {}, + "outputs": [], + "source": [ + "# define runner\n", + "runner = emcee.EnsembleSampler(n_walkers, dim, log_posterior, args=[y_meas])" + ] + }, + { + "cell_type": "markdown", + "id": "02c2b899-95df-4be0-a384-269f2cf17c21", + "metadata": {}, + "source": [ + "#### Run MCMC" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f0a03eb-1bde-4fea-9682-fd123b6b0ba5", + "metadata": {}, + "outputs": [], + "source": [ + "# discard burn-in\n", + "state = runner.run_mcmc(seed, 500, progress=True)\n", + "runner.reset()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30451bb1-e0bd-4435-8db5-2dd24488d010", + "metadata": {}, + "outputs": [], + "source": [ + "# run MCMC\n", + "runner.run_mcmc(seed, chain_length, progress=True);" + ] + }, + { + "cell_type": "markdown", + "id": "a3571a01-e1bd-4a85-924f-043ef1d574b1", + "metadata": { + "tags": [] + }, + "source": [ + "#### Data Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1653cf3c-3f1d-40bf-84fd-28ce76676364", + "metadata": {}, + "outputs": [], + "source": [ + "# simple evaluation\n", + "samples = runner.get_chain(flat=True)\n", + "mean = np.mean(samples, axis=0)\n", + "std = np.std(samples, axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2214221a-a2be-40f7-88c4-5c0d7091210e", + "metadata": {}, + "outputs": [], + "source": [ + "# plot posterior distribution\n", + "samples = runner.get_chain(flat=True)\n", + "layout = [[j] for j in range(7)]\n", + "layout = [[0, 1], [2, 3], [4, 5], [6, 6]]\n", + "fig, axes = plt.subplot_mosaic(layout, figsize=(12, 7.5))\n", + "for j, (param, ax) in enumerate(zip(params+hyper_params, axes.values())):\n", + " bins = 50 if param.name != \"b\" else 150\n", + " ax.hist(samples[:, j], bins=bins, density=True, range=param.domain, alpha=0.5)\n", + " ax.axvline(x=mean[j], color=cmap.tab10(0), label=\"mean\")\n", + " ax.axvline(x=mean[j]-std[j], ls=\"--\", color=cmap.tab10(0), label=\"std\")\n", + " ax.axvline(x=mean[j]+std[j], ls=\"--\", color=cmap.tab10(0))\n", + " if param.name == \"b\":\n", + " ax.axvline(x=b, color=\"k\", label=\"true value\")\n", + " else: \n", + " ax.axvline(x=x_true[0, j], color=\"k\", label=\"true value\")\n", + " ax.set_ylabel(f\"{j+1} - {param.name}\")\n", + " ax.set_yticks([])\n", + " ax.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9b15e08-8248-46cd-99c6-e5f8cd64c6e0", + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplot_mosaic([[\"approx\", \"error\"]], figsize=(15, 5))\n", + "vals = surrogate.eval(np.mean(samples, axis=0)[:-1])\n", + "errs = y_true - vals\n", + "for j, (idxs, label) in enumerate(zip(angle_idxs, angle_labels)):\n", + " color = cmap.tab10(j)\n", + " axes[\"approx\"].plot(angles[idxs,1], vals[0, idxs], color=color, label=label)\n", + " axes[\"approx\"].plot(angles[idxs,1], y_true[0, idxs], \"--\", color=color)\n", + " axes[\"approx\"].scatter(angles[idxs,1], y_meas[0, idxs], s=2, color=color)\n", + " axes[\"approx\"].legend()\n", + " axes[\"approx\"].set_title(\"Ground truth (solid) and noisy measurement (dots)\")\n", + " \n", + " axes[\"error\"].plot(angles[idxs, 1], errs[0, idxs], color=color, label=label)\n", + " axes[\"error\"].legend()\n", + " axes[\"error\"].grid()\n", + " axes[\"error\"].set_title(\"Pointwise deviation from ground truth\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36afafba-3de1-4dde-b114-b6b278a62473", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/app/solutions/Solution 2 - XRR.ipynb b/app/solutions/Solution 2 - XRR.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..650b8244b002c99e310f3397b8817d708a1e8217 --- /dev/null +++ b/app/solutions/Solution 2 - XRR.ipynb @@ -0,0 +1,608 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a3cf80d8-ef6d-4dec-8234-22967c45231d", + "metadata": {}, + "source": [ + "# XRR" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f27e08c-8fab-4917-9d4a-caed6af9ad88", + "metadata": {}, + "outputs": [], + "source": [ + "# from contextlib import contextmanager\n", + "import time\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.cm as cmap\n", + "import emcee\n", + "import pythia as pt # see doc: www.pythia-uq.com\n", + "# import xrayutilities as xu\n", + "from utils import Gaussian # local import\n", + "# import hessian_back as hess # local import" + ] + }, + { + "cell_type": "markdown", + "id": "c810ea31-b8c9-46c7-a4b4-e058a9ae5030", + "metadata": { + "tags": [] + }, + "source": [ + "## Load scatterometry data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "504e11ab-01f5-455a-8a72-7097278f8023", + "metadata": {}, + "outputs": [], + "source": [ + "path = \"../data/xrr/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c547374f-6999-4923-beaa-3da43fbbab79", + "metadata": {}, + "outputs": [], + "source": [ + "omega = np.load(path+\"omega.npy\")\n", + "\n", + "print(f\"shape omega: {omega.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9b2d87e-2fcb-463b-b39c-d87844772c99", + "metadata": {}, + "outputs": [], + "source": [ + "x_data = np.load(path+\"x_train.npy\") # (10_000, 5)\n", + "y_data = np.log10(np.load(path+\"y_train.npy\")) # (10_000, 585)\n", + "w_data = np.load(path+\"w_train.npy\") # (10_000, )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "700eec32-db8b-4443-a05d-fbce6dedc62c", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure()\n", + "for j, y in enumerate(y_data[:5]):\n", + " plt.plot(omega, y, label=f\"sample {j+1}\")\n", + " plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a16775a5-b12f-4792-957a-76d39be52aa7", + "metadata": {}, + "outputs": [], + "source": [ + "# set parameter information\n", + "param1 = pt.parameter.Parameter(index=1, name=\"ÏRu\", domain=[12590-25, 12590+25], distribution=\"uniform\")\n", + "param2 = pt.parameter.Parameter(index=2, name=\"wRu\", domain=[291-1, 291+1], distribution=\"uniform\")\n", + "param3 = pt.parameter.Parameter(index=3, name=\"σRu\", domain=[4-1, 4+1], distribution=\"uniform\")\n", + "param4 = pt.parameter.Parameter(index=4, name=\"wSiO2\", domain=[18-3, 18+3], distribution=\"uniform\")\n", + "param5 = pt.parameter.Parameter(index=5, name=\"wRuO2\", domain=[18-1, 18+1], distribution=\"uniform\")\n", + "params = [param1, param2, param3, param4, param5]\n", + "param_names = [p.name for p in params]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecdb40d0-5cb5-488e-b757-2d9be9112bff", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"parameter dist domain\")\n", + "print(\"-\"*40)\n", + "for param in params:\n", + " print(f\"{param.name:<9} {param.distribution:<5} {np.array(param.domain, dtype=int)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79a57610-24f5-46b3-9ae0-0c1182c3bd61", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplot_mosaic([param_names], constrained_layout=True, figsize=(10, 3))\n", + "for j, param in enumerate(params):\n", + " ax[param.name].hist(x_data[:, j], bins=50)\n", + " ax[param.name].set_title(param.name)\n", + " ax[param.name].set_yticks([])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "5c22c2ef-b248-4b5b-8065-5c325aa25d85", + "metadata": { + "tags": [] + }, + "source": [ + "## Approximate Forward Problem" + ] + }, + { + "cell_type": "markdown", + "id": "18e99871-d6f8-4f7b-9382-f6503c92d93b", + "metadata": { + "tags": [] + }, + "source": [ + "#### Split training and test data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84c2cbfd-9ce8-437a-8d1a-385e479ee59b", + "metadata": {}, + "outputs": [], + "source": [ + "split = int(0.8*x_data.shape[0])\n", + "\n", + "# training data\n", + "x_train = x_data[:split]\n", + "y_train = y_data[:split]\n", + "w_train = w_data[:split]\n", + "\n", + "# test data\n", + "x_test = x_data[split:]\n", + "y_test = y_data[split:]" + ] + }, + { + "cell_type": "markdown", + "id": "02490c03-13e6-48c2-9552-16ce43d6d939", + "metadata": { + "tags": [] + }, + "source": [ + "#### Define index set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2e8204e-c516-48fc-85cd-bcabf3b38938", + "metadata": {}, + "outputs": [], + "source": [ + "# define multiindices\n", + "indices = pt.index.simplex(dimension=len(params), maximum=5)\n", + "index_set = pt.index.IndexSet(indices)\n", + "\n", + "print(f\"Multiindex information:\")\n", + "print(f\" number of indices: {index_set.shape[0]}\")\n", + "print(f\" dimension: {index_set.shape[1]}\")\n", + "print(f\" maximum dimension: {index_set.max}\")\n", + "print(f\" number of sobol indices: {len(index_set.sobol_tuples)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7dc7931-a943-4ce1-a770-c9b6d5176db8", + "metadata": {}, + "outputs": [], + "source": [ + "indices[-5:]" + ] + }, + { + "cell_type": "markdown", + "id": "cbcc9c33-79e6-4ffb-b51a-9615e3de4907", + "metadata": {}, + "source": [ + "#### Train PC approximation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4884e6b5-1fb4-4d57-800a-774a6fb3e9a2", + "metadata": {}, + "outputs": [], + "source": [ + "# run pc approximation\n", + "surrogate = pt.chaos.PolynomialChaos(params, index_set, x_train, w_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "id": "b096635a-830a-4039-a07c-5145dd0a55c4", + "metadata": {}, + "source": [ + "#### Compute approximation error" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c311f090-b48e-4ffa-b963-a97827583247", + "metadata": {}, + "outputs": [], + "source": [ + "# test approximation error\n", + "y_approx = surrogate.eval(x_test)\n", + "\n", + "# L2-L2 error\n", + "err_abs = np.sum(np.linalg.norm(y_test - y_approx, axis=1))/y_test.shape[0]\n", + "err_rel = np.sum(np.linalg.norm((y_test - y_approx)/y_test, axis=1))/y_test.shape[0]\n", + "print(f\"L2-L2 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)\")\n", + "\n", + "# C0-L2 error\n", + "err_abs = np.sum(np.max(np.abs(y_test - y_approx), axis=1))/y_test.shape[0]\n", + "err_rel = np.sum(np.max(np.abs(y_test - y_approx)/np.abs(y_test), axis=1))/y_test.shape[0]\n", + "print(f\"L2-C0 error: {err_abs:4.2e} (abs), {err_rel:4.2e} (rel)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf030598-1d06-400e-a7f7-e3ee3ffcea9b", + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplot_mosaic([[\"1\", \"2\", \"3\"], [\"4\", \"5\", \"6\"]], constrained_layout=True, figsize=(9,6))\n", + "for ax in axes.values():\n", + " j = np.random.randint(0, x_test.shape[0])\n", + " ax.plot(omega, y_test[j], label=f\"ground truth\", lw=0.5)\n", + " ax.plot(omega, y_approx[j], label=\"approx\", lw=0.5)\n", + " ax.set_title(f\"test data index: {j}\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "187e51ae-047f-42a4-8671-3cb0eaa11472", + "metadata": {}, + "source": [ + "## Global Sensitivity Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7dc50494-1dea-4597-a266-476289b1cb7d", + "metadata": {}, + "outputs": [], + "source": [ + "max_vals[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e990349-158f-4aba-90fe-2e56d6b248e7", + "metadata": {}, + "outputs": [], + "source": [ + "max_vals = np.max(surrogate.sobol, axis=1)\n", + "l2_vals = np.linalg.norm(surrogate.sobol, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ccbcf0d9-d125-401c-a127-5d86b4aec94c", + "metadata": {}, + "outputs": [], + "source": [ + "sobol_max = [list(reversed([x for _,x in sorted(zip(max_vals, index_set.sobol_tuples))]))]\n", + "sobol_L2 = [list(reversed([x for _,x in sorted(zip(l2_vals, index_set.sobol_tuples))]))]\n", + "\n", + "print(f\"{'#':>2} {'max':<10} {'L2':<10}\")\n", + "print(\"-\"*25)\n", + "for j in range(31):\n", + " print(f\"{j+1:>2} {str(sobol_max[0][j]):<10} {str(sobol_L2[0][j]):<10}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50af6adc-ac11-458a-91c3-598240960cf9", + "metadata": {}, + "outputs": [], + "source": [ + "def binom(n,k): \n", + " return np.math.factorial(n) / np.math.factorial(k) / np.math.factorial(n-k)\n", + "bdry = [0]*7\n", + "for jj in range(1,len(bdry)):\n", + " bdry[jj] = int(bdry[jj-1] + binom(len(bdry)-1,jj))\n", + " \n", + "fig, axes = plt.subplot_mosaic([[\"max\"], [\"L2\"]])\n", + "\n", + "axes[\"max\"].axvspan(bdry[0], bdry[1], facecolor=\"k\", alpha=0.2, label=\"SOB = (i,)\")\n", + "axes[\"max\"].axvspan(bdry[1], bdry[2], facecolor=\"k\", alpha=0.15, label=\"SOB = (i,j)\")\n", + "axes[\"max\"].axvspan(bdry[2], bdry[3], facecolor=\"k\", alpha=0.1, label=\"SOB = (i,j,k)\")\n", + "axes[\"max\"].semilogy(range(surrogate.sobol.shape[0]), max_vals,\n", + " \"-o\", linewidth=1, label=\"max\")\n", + "axes[\"max\"].legend()\n", + "axes[\"max\"].grid()\n", + "\n", + "axes[\"L2\"].axvspan(bdry[0], bdry[1], facecolor=\"k\", alpha=0.2, label=\"SOB = (i,)\")\n", + "axes[\"L2\"].axvspan(bdry[1], bdry[2], facecolor=\"k\", alpha=0.15, label=\"SOB = (i,j)\")\n", + "axes[\"L2\"].axvspan(bdry[2], bdry[3], facecolor=\"k\", alpha=0.1, label=\"SOB = (i,j,k)\")\n", + "axes[\"L2\"].semilogy(range(surrogate.sobol.shape[0]), l2_vals,\n", + " \"-o\", linewidth=1, label=\"$L^2$\")\n", + "axes[\"L2\"].legend()\n", + "axes[\"L2\"].grid()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "7252d885-84b5-4a47-9cc2-b3f6374351b9", + "metadata": {}, + "source": [ + "## Inverse Problem & Parameter Reconstruction" + ] + }, + { + "cell_type": "markdown", + "id": "fb10b512-2d01-445a-8711-3b57524e950b", + "metadata": {}, + "source": [ + "$f(y_1,y_2) = y_1 $" + ] + }, + { + "cell_type": "markdown", + "id": "b0d4d74c-682c-4823-93d1-121fd3eb164f", + "metadata": {}, + "source": [ + "#### Load measurements" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecc36513-8627-4f07-886d-9dcbfc6f9e88", + "metadata": {}, + "outputs": [], + "source": [ + "# measurement\n", + "use_real_measurement = False\n", + "if use_real_measurement is True:\n", + " y_meas = np.log10(np.load(path+\"measurement.npy\").reshape(1, -1))\n", + "else:\n", + " x_true = x_data[-1].reshape(1, -1)\n", + " y_true = y_data[-1].reshape(1, -1)\n", + " b = 0.05\n", + " noise = np.random.normal(0, b, y_true.shape)\n", + " y_meas = y_true + noise" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0fa7413-97cf-4dec-9403-b0f64a7818c7", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure()\n", + "if use_real_measurement is False:\n", + " plt.plot(omega, y_true[0], label=\"y_true\")\n", + "plt.scatter(omega, y_meas[0], s=2, label=\"measurement\", color=cmap.tab10(1))\n", + "plt.legend()\n", + "plt.title(\"Ground truth (solid) and noisy measurement (dots)\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "40495846-7f57-4ee6-9b06-26d91882addc", + "metadata": {}, + "source": [ + "#### Define Bayesian posterior" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1186d066-6018-4a18-a805-d5e15d1eaad7", + "metadata": {}, + "outputs": [], + "source": [ + "# define forward and error model\n", + "hyper_params = [pt.parameter.Parameter(index=len(params)+1, name=\"b\", domain=[0, 0.1], distribution=\"uniform\")]\n", + "dim = len(params+hyper_params)\n", + "prior = pt.sampler.ParameterSampler(params+hyper_params) # extended prior\n", + "forward_model = lambda x: surrogate.eval(x[:,:-1]) # extended forward model\n", + "sigma = lambda x: np.abs(x[:, -1]).reshape(-1, 1)*np.ones((1, y_meas.shape[1])) # error model\n", + "\n", + "# define Bayesian posterior\n", + "lh = Gaussian(forward_model, sigma, dim)\n", + "def log_posterior(x, meas):\n", + " return lh.log_likelihood(x, meas) + prior.log_pdf(x)" + ] + }, + { + "cell_type": "markdown", + "id": "ba006e83-5173-419b-9d11-176c14e60242", + "metadata": {}, + "source": [ + "#### Setup MCMC" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3a47c73-b2ae-427f-b56a-e194e5c84dbe", + "metadata": {}, + "outputs": [], + "source": [ + "# setup MCMC\n", + "n_walkers = 15\n", + "chain_length = 1000\n", + "seed = prior.sample(n_walkers)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68accd23-f187-4f69-9c37-dac3ada699d7", + "metadata": {}, + "outputs": [], + "source": [ + "# define runner\n", + "runner = emcee.EnsembleSampler(n_walkers, dim, log_posterior, args=[y_meas])" + ] + }, + { + "cell_type": "markdown", + "id": "02c2b899-95df-4be0-a384-269f2cf17c21", + "metadata": {}, + "source": [ + "#### Run MCMC" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f0a03eb-1bde-4fea-9682-fd123b6b0ba5", + "metadata": {}, + "outputs": [], + "source": [ + "# discard burn-in\n", + "state = runner.run_mcmc(seed, 500, progress=True)\n", + "runner.reset()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30451bb1-e0bd-4435-8db5-2dd24488d010", + "metadata": {}, + "outputs": [], + "source": [ + "# run MCMC\n", + "runner.run_mcmc(seed, chain_length, progress=True);" + ] + }, + { + "cell_type": "markdown", + "id": "a3571a01-e1bd-4a85-924f-043ef1d574b1", + "metadata": { + "tags": [] + }, + "source": [ + "#### Data Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1653cf3c-3f1d-40bf-84fd-28ce76676364", + "metadata": {}, + "outputs": [], + "source": [ + "# simple evaluation\n", + "samples = runner.get_chain(flat=True)\n", + "mean = np.mean(samples, axis=0)\n", + "std = np.std(samples, axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2214221a-a2be-40f7-88c4-5c0d7091210e", + "metadata": {}, + "outputs": [], + "source": [ + "# plot posterior distribution\n", + "samples = runner.get_chain(flat=True)\n", + "layout = [[j] for j in range(7)]\n", + "layout = [[0, 1], [2, 3], [4, 5]]\n", + "fig, axes = plt.subplot_mosaic(layout, figsize=(12, 7.5))\n", + "for j, (param, ax) in enumerate(zip(params+hyper_params, axes.values())):\n", + " bins = 50 if param.name != \"b\" else 150\n", + " ax.hist(samples[:, j], bins=bins, density=True, range=param.domain, alpha=0.5)\n", + " ax.axvline(x=mean[j], color=cmap.tab10(0), label=\"mean\")\n", + " ax.axvline(x=mean[j]-std[j], ls=\"--\", color=cmap.tab10(0), label=\"std\")\n", + " ax.axvline(x=mean[j]+std[j], ls=\"--\", color=cmap.tab10(0))\n", + " if use_real_measurement is False:\n", + " if param.name == \"b\":\n", + " ax.axvline(x=b, color=\"k\", label=\"true value\")\n", + " else: \n", + " ax.axvline(x=x_true[0, j], color=\"k\", label=\"true value\")\n", + " ax.set_ylabel(f\"{j+1} - {param.name}\")\n", + " ax.set_yticks([])\n", + " ax.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9b15e08-8248-46cd-99c6-e5f8cd64c6e0", + "metadata": {}, + "outputs": [], + "source": [ + "vals = surrogate.eval(np.mean(samples, axis=0)[:-1])\n", + "errs = np.abs((y_true - vals)/y_true)\n", + "\n", + "fig, axes = plt.subplot_mosaic([[\"approx\", \"error\"]], figsize=(15, 5))\n", + "axes[\"approx\"].plot(omega, vals[0], label=\"approx\", color=cmap.tab10(0))\n", + "axes[\"approx\"].plot(omega, y_true[0], \"--\", label=\"ground truth\", color=cmap.tab10(1))\n", + "axes[\"approx\"].scatter(omega, y_meas[0], s=2, label=\"measurement\", color=cmap.tab10(1))\n", + "axes[\"approx\"].legend()\n", + "axes[\"approx\"].set_title(\"Ground truth (solid) and noisy measurement (dots)\")\n", + "\n", + "axes[\"error\"].plot(omega, errs[0])\n", + "axes[\"error\"].grid()\n", + "axes[\"error\"].set_title(\"Pointwise deviation from ground truth\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36afafba-3de1-4dde-b114-b6b278a62473", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/data/scat/angles.npy b/data/scat/angles.npy new file mode 100644 index 0000000000000000000000000000000000000000..f9a05f5971c0d595c7adfaa4f35a84ed755a6fb7 Binary files /dev/null and b/data/scat/angles.npy differ diff --git a/data/scat/parameter.npy b/data/scat/parameter.npy new file mode 100644 index 0000000000000000000000000000000000000000..e91c711dceba1fd49038334451e35e7e99d9eef8 Binary files /dev/null and b/data/scat/parameter.npy differ diff --git a/data/scat/w_train.npy b/data/scat/w_train.npy new file mode 100644 index 0000000000000000000000000000000000000000..3dba9dab0941c58a3a899c0107cd1a3aede624d8 Binary files /dev/null and b/data/scat/w_train.npy differ diff --git a/data/scat/x_train.npy b/data/scat/x_train.npy new file mode 100644 index 0000000000000000000000000000000000000000..f0fe647d8cc44144fa820a0a31ca390da4245490 Binary files /dev/null and b/data/scat/x_train.npy differ diff --git a/data/scat/y_train.npy b/data/scat/y_train.npy new file mode 100644 index 0000000000000000000000000000000000000000..300166d32a5fa72f5f8ebd5b8c07cf0f1249f935 Binary files /dev/null and b/data/scat/y_train.npy differ diff --git a/data/xrr/measurement.npy b/data/xrr/measurement.npy new file mode 100644 index 0000000000000000000000000000000000000000..828c0ba249448f7f1bd00da833676ab5ddc248cf Binary files /dev/null and b/data/xrr/measurement.npy differ diff --git a/data/xrr/omega.npy b/data/xrr/omega.npy new file mode 100644 index 0000000000000000000000000000000000000000..db2865a266701d93d7741d201f4bc48c3696ed20 Binary files /dev/null and b/data/xrr/omega.npy differ diff --git a/data/xrr/w_train.npy b/data/xrr/w_train.npy new file mode 100644 index 0000000000000000000000000000000000000000..402d6c7640560b748bd3f8f82ba1caa90bfc6e0d Binary files /dev/null and b/data/xrr/w_train.npy differ diff --git a/data/xrr/x_train.npy b/data/xrr/x_train.npy new file mode 100644 index 0000000000000000000000000000000000000000..8e6c86dff6766682726a48e6b56b946cd5918ae1 Binary files /dev/null and b/data/xrr/x_train.npy differ diff --git a/data/xrr/y_train.npy b/data/xrr/y_train.npy new file mode 100644 index 0000000000000000000000000000000000000000..9f4bc9117464e3f6b55456b8de2c4ccf90099443 Binary files /dev/null and b/data/xrr/y_train.npy differ diff --git a/img/notebooks/scat_setup.png b/img/notebooks/scat_setup.png new file mode 100644 index 0000000000000000000000000000000000000000..936bad3aa09d970a904d8d4d1443df0cd31fb7b0 Binary files /dev/null and b/img/notebooks/scat_setup.png differ diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e9001492b7700046efae26b2000e9d3b912c7b03 --- /dev/null +++ b/src/utils.py @@ -0,0 +1,342 @@ +from typing import Union, List, Tuple +import numpy as np +import pythia as pt + + +class Gaussian: + """Gaussian likelihood function for differentiable forward problem. + + Assemble the Gaussian likelihood + + .. math:: + \\mathcal{L}(x) = \\frac{1}{(2\\pi)^{M/2}\\sqrt{\\det \\Sigma}} \\exp\\Bigl( -\\frac{1}{2} \\Vert \\Sigma^{-1/2}(f(x)-\\delta) \\Vert \\Bigr) + + for covariance matrix + :math:`\\Sigma = \\operatorname{diag}(\\sigma_1(y_1),\\dots,\\sigma_M(y_M))` + and measurement/observation :math:`\\delta`. + + Parameters + ---------- + f : function + Forward model. + sigma : function + Error model for standard deviation describing :math:`\\Sigma`. + xdim : int + Number of stochastic parameters. + """ + + def __init__(self, f, sigma, xdim): + """ Initialize Gaussian likelihood object. """ + self._f = f + self._std = sigma + self._xdim = xdim + + def likelihood(self, x, y_meas): + """ Evaluate the Gaussian likelihood with specified measurement. + + Parameters + ---------- + x : array_like + Realizations of stochastic parameters. + y_meas : array_like + Measurement :math:`\\delta`. + """ + # check dimensionality and reshape if necessary. + y, f_val, std, mdim, fdim = self._reshape(x, y_meas) + + a = (2*np.pi)**(-mdim*fdim/2) # float + b = 1/np.prod(std, axis=(0, 2))**mdim # shape: (#points,) + c = np.prod(np.exp(-(f_val-y)**2/(2*std**2)), + axis=(0, 2)) # (#points,) + + return a*b*c + + def log_likelihood(self, x, y_meas): + """ Evaluate the Gaussian log-likelihood with specified measurement. + + Parameters + ---------- + x : array_like + Realizations of stochastic parameters. + y_meas : array_like + Measurement :math:`\\delta`. + """ + # check dimensionality and reshape if necessary. + y, f_val, std, mdim, fdim = self._reshape(x, y_meas) + + a = -mdim*fdim/2 * np.log(2*np.pi) # float + b = -mdim * np.sum(np.log(std), axis=(0, 2)) # (#points,) + c = np.sum(-(f_val-y)**2/(2*std**2), axis=(0, 2)) # (#points,) + + return a+b+c + + def _reshape(self, x, y_meas): + """ Check shape compatibility of input and reshape if necessary. + + Parameters + ---------- + x : array_like + Realizations of stochastic parameters. + y_meas : array_like + Measurement :math:`\\delta`. + """ + x_val = np.array(x) + y_val = np.array(y_meas) + # x shape: (#points, xdim) + if x_val.ndim < 2: + x_val.shape = 1, -1 + assert x_val.shape[1] == self._xdim + # y_meas shape: (mdim, fdim) + if y_val.ndim < 2: + y_val.shape = 1, -1 + assert x_val.ndim == 2 and y_val.ndim == 2 + + # get number of measurements and image dim of f + mdim, fdim = y_val.shape + + f_val = self._f(x_val) + # f_val shape: (#points, fdim) + if f_val.ndim < 2: + f_val.shape = 1, -1 + assert f_val.shape[1] == fdim + + std = self._std(x_val) + # std shape: (#points, fdim) + if std.ndim < 2: + std.shape = 1, -1 + if std.shape[1] == 1: + std = std*np.ones(fdim) + assert std.shape[1] == fdim + assert np.min(std) >= 0 + + # Reshape for fast multiplication + y = np.expand_dims(y_meas, axis=1) # (mdim, 1, fdim) + std = np.expand_dims(self._std(x_val), axis=0) # (1, #points, fdim) + f_val = np.expand_dims(f_val, axis=0) # (1, #points, fdim) + + return y, f_val, std, mdim, fdim + + +class Batman: + """Target function for tutorial 1.""" + + def __init__(self) -> None: + """Target function for tutorial 1.""" + + def _f(self, y: np.ndarray) -> np.ndarray: + """Helper function.""" + val = (np.abs(y/2) + - (3*np.sqrt(33)-7)/122 * y**2 + - 3 + + np.sqrt(1-(np.abs(np.abs(y)-2)-1)**2) + ) + return val + + def _g(self, y: np.ndarray) -> np.ndarray: + """Helper function.""" + a = -(np.abs(y)-1)*(np.abs(y)-0.75) + b = np.zeros_like(a) + idx = np.where(np.abs(a) <= 1e-14)[0] + b[idx] = 1 + idx = np.where(np.abs(a) >= 1e-14)[0] + b[idx] = np.abs(a[idx])/a[idx] + val = 9*np.sqrt(b) - 8*np.abs(y) + return val + + def _h(self, y: np.ndarray) -> np.ndarray: + """Helper function.""" + a = -(np.abs(y)-0.5)*(np.abs(y)-0.75) + val = 3*np.abs(y) + 0.75*np.sqrt(np.abs(a)/a) + return val + + def _p(self, y: np.ndarray) -> np.ndarray: + """Helper function.""" + a = -(y-0.5)*(y+0.5) + val = 2.25*np.sqrt(np.abs(a)/a) + return val + + def _q(self, y: np.ndarray) -> np.ndarray: + """Helper function.""" + a = 6*np.sqrt(10)/7 + b = (1.5 - 0.5*np.abs(y)) * np.sqrt(np.abs(np.abs(y)-1)/(np.abs(y)-1)) + c = 6*np.sqrt(10)/14 * np.sqrt(4-(np.abs(y)-1)**2) + return a + b - c + + def _r(self, y: np.ndarray) -> np.ndarray: + """Helper function.""" + a = np.sqrt(1-(y/7)**2) + b = np.sqrt(np.abs(np.abs(y)-3)/(np.abs(y)-3)) + return 3 * a * b + + def _s(self, y: np.ndarray) -> np.ndarray: + """Helper function.""" + a = np.sqrt(1-(y/7)**2) + b = np.sqrt(np.abs(np.abs(y)-4)/(np.abs(y)-4)) + return -3 * a * b + + def _t(self, y: np.ndarray) -> np.ndarray: + """Helper function.""" + return np.sqrt(np.abs(y)/y) + + def upper(self, y: np.ndarray) -> np.ndarray: + """Upper part of the batman function.""" + val = np.zeros(y.size) + + # set r parts + idx = np.where(np.logical_and(y < -3, y >= -7)) + val[idx] = self._r(y[idx]) + idx = np.where(np.logical_and(y <= 7, y > 3)) + val[idx] = self._r(y[idx]) + + # set q parts + idx = np.where(np.logical_and(y < -1, y >= -3)) + val[idx] = self._q(y[idx]) + idx = np.where(np.logical_and(y <= 3, y > 1)) + val[idx] = self._q(y[idx]) + + # set g parts + idx = np.where(np.logical_and(y <= -0.75, y >= -1)) + val[idx] = self._g(y[idx]) + idx = np.where(np.logical_and(y <= 1, y >= 0.75)) + val[idx] = self._g(y[idx]) + + # set h parts + idx = np.where(np.logical_and(y < -0.5, y > -0.75)) + val[idx] = self._h(y[idx]) + idx = np.where(np.logical_and(y < 0.75, y > 0.5)) + val[idx] = self._h(y[idx]) + + # set p parts + idx = np.where(np.logical_and(y >= -0.5, y <= 0.5)) + val[idx] = self._p(y[idx]) + + return val + 4 + + def lower(self, y: np.ndarray) -> np.ndarray: + """Lower part of the batman function.""" + val = np.zeros(y.size) + + # set s parts + idx = np.where(np.logical_and(y <= -4, y >= -7)) + val[idx] = self._s(y[idx]) + idx = np.where(np.logical_and(y <= 7, y >= 4)) + val[idx] = self._s(y[idx]) + + # set f parts + idx = np.where(np.logical_and(y <= 4, y >= -4)) + val[idx] = self._f(y[idx]) + + return val + 4 + + def eval(self, y: np.ndarray) -> np.ndarray: + """Evaluate target function. + + Parameters + ---------- + y : np.ndarray + Parameter values. + + Returns + ------- + : + Forward model evaluted in parameter values. + """ + if y.ndim > 1: + y = y.flatten() + return np.array([self.upper(y), self.lower(y)]).T + + +class Sobol: + """Target function for tutorial 2.""" + + def __init__(self, a: Union[List, Tuple, np.ndarray]) -> None: + """Target function for tutorial 2. + + Parameters + ---------- + a : array_like + Parameters for Sobol functions. + """ + self.a = np.array(a) + assert self.a.ndim == 1 + + def eval(self, y: np.ndarray) -> np.ndarray: + """Evaluate target function. + + Parameters + ---------- + y : np.ndarray + Parameter values. + + Returns + ------- + : + Forward model evaluted in parameter values. + """ + if y.ndim == 1: + y.shape = 1, -1 + assert y.ndim == 2 + assert y.shape[1] == self.a.size + ret = (np.abs(4.0*y - 2.0) + self.a) / (1.0 + self.a) + return np.prod(ret, axis=1).reshape(-1, 1) + + def indices(self) -> Tuple[List, np.ndarray]: + """Compute analytical Sobol indices for target function. + + Returns + ------- + sobol_tuples : list of tuples + List of Sobol tuples. + sobol_indices : np.ndarray + Values of Sobol indices ordered the same as `sobol_tuples`. + """ + sobol = {} + beta = (1.0+self.a)**(-2) / 3 + var = np.prod(1.0 + beta) - 1.0 + sobol_tuples = pt.index.IndexSet( + pt.index.tensor([1]*self.a.size)).sobol_tuples + for sdx in sobol_tuples: + sobol[sdx] = 1.0 / var + for k in sdx: + sobol[sdx] *= beta[k-1] + sobol_indices = np.array(list(sobol.values())) + return sobol_tuples, sobol_indices + + +class Sine: + """Target function for tutorial 3.""" + + def __init__(self, scales: Union[list, tuple, np.ndarray]) -> None: + """Target function for tutorial 3. + + Parameters + ---------- + scales : array_like + Scales for each summand. + """ + self.scales = np.array(scales) + self.points = np.linspace(0, 2*np.pi, 200) + assert self.scales.ndim == 1 + + def eval(self, x: np.ndarray) -> np.ndarray: + """Evaluate target function. + + Parameters + ---------- + x : np.ndarray + Parameter values. + + Returns + ------- + : + Forward model evaluted in parameter values. + """ + ret = np.zeros((x.shape[0], self.points.size)) + for j, (s, xj) in enumerate(zip(self.scales, x.T)): + ret += (s * np.reshape(xj, (-1, 1)) + * np.sin((j+1)*self.points).reshape(1, -1)) + return ret + + +if __name__ == "__main__": + pass