diff --git a/.gitignore b/.gitignore index c56bb68296ed0c9ac9c03ad6c1c58ad78ab40489..6f57652a4cb05f7452989d673c932570b0791c9a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ __pycache__/ +.ipynb_checkpoints/ +data/ +img/ *.swp *.swo diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/mnist_image_classification.py b/app/mnist_image_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..5067407a43bc0218f6a01813400f751e38b755c6 --- /dev/null +++ b/app/mnist_image_classification.py @@ -0,0 +1,86 @@ +import os +import numpy as np +import torch +import matplotlib.pyplot as plt +import neural_networks_101.src as src + + +def main(): + """Main function.""" + # Random seed for reproducibility + torch.manual_seed(42) + + # Get CPU or GPU device for training + device = "cuda" if torch.cuda.is_available() else "cpu" + device = torch.device(device) + + print(src.misc.time_stamp(), "load MNIST data") + train_data, test_data = src.mnist.get_mnist_data() + + # plot some training data + print(src.misc.time_stamp(), "plot sample images (random selection)") + if not os.path.isdir("../img"): + os.makedirs("../img", exist_ok=True) + shape = (4, 6) + idxs = torch.randint(len(train_data), size=(int(np.prod(shape)),)) + src.mnist.plot_mnist_data(train_data, idxs, shape=shape) + file_name = "../img/mnist_images.png" + plt.savefig(file_name, dpi=200) + print(src.misc.time_stamp(), f"save to: {file_name}") + + print(src.misc.time_stamp(), "setup NN model") + # Send the model to the device (CPU or GPU) + model = src.mnist.NeuralNetwork().to(device) + # Define the optimizer to user for gradient descent + optimizer = torch.optim.Adadelta(model.parameters(), lr=1.0) + # Shrinks the learning rate by gamma every step_size + scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.7) + + # Train the model + n_epochs = 2 + for epoch in range(n_epochs): + print(src.misc.time_stamp(), f"training epoch {epoch+1} / {n_epochs}") + with src.misc.timeit("time: {:4.2f} s"): + src.mnist.train(model, device, train_data, optimizer, + log_interval=100) + test_loss, correct = src.mnist.test(model, device, test_data) + rate = 100 * correct / len(test_data) + print(src.misc.time_stamp(), + f"test set avg. loss: {test_loss}", + f" -- accuracy: {correct}/{len(test_data)} ({rate:4.1f} %)") + scheduler.step() + + # evaluate forward model + print(src.misc.time_stamp(), "evaluate model on training set") + # enable evaluation mode, return to training mode with model.train() + model.eval() + test_loader = torch.utils.data.DataLoader( + test_data, batch_size=100, num_workers=1, pin_memory=True, + shuffle=True) + targets, outputs = [], [] + with torch.no_grad(): # turn of gradient computation + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + targets.append(np.array(target)) + outputs.append(np.array(output)) + targets = np.array(targets).reshape(-1, 1) + outputs = np.concatenate(outputs, axis=0) + + # plot mislabeled test data + print(src.misc.time_stamp(), "plot mislabeled data") + # sort indices of test data according to classification error (descending) + target_vectors = np.array([np.eye(1, 10, k=int(t)).flatten() + for t in targets]) + idxs = np.argsort(np.linalg.norm(outputs-target_vectors, axis=1))[::-1] + # plot n test samples with largest classification error + n = 5 + src.mnist.plot_mnist_accuracy( + test_data, targets, outputs, idxs[:n], figsize=(10, 3*n)) + file_name = "../img/mnist_mislabeled_data.png" + plt.savefig(file_name, dpi=200) + print(src.misc.time_stamp(), f"save to: {file_name}") + + +if __name__ == "__main__": + main() diff --git a/nbs/MNIST_image_classification.ipynb b/nbs/MNIST_image_classification.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..6d8030341a9f424aba08949b1010fa8f6d222c46 --- /dev/null +++ b/nbs/MNIST_image_classification.ipynb @@ -0,0 +1,299 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-23T10:26:06.041437Z", + "start_time": "2022-06-23T10:26:05.249006Z" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import torch\n", + "import neural_networks_101.src as src\n", + "\n", + "%matplotlib inline\n", + "%reload_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-23T10:26:06.058708Z", + "start_time": "2022-06-23T10:26:06.043140Z" + } + }, + "outputs": [], + "source": [ + "# Random seed for reproducibility\n", + "torch.manual_seed(42)\n", + "\n", + "# Get CPU or GPU device for training\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "device = torch.device(device)\n", + "\n", + "# number of training epochs\n", + "n_epochs = 2" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-23T10:26:06.134729Z", + "start_time": "2022-06-23T10:26:06.062141Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2022-06-23 12:26:06] load MNIST data\n" + ] + } + ], + "source": [ + "print(src.misc.time_stamp(), \"load MNIST data\")\n", + "train_data, test_data = src.mnist.get_mnist_data()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-23T10:26:07.071093Z", + "start_time": "2022-06-23T10:26:06.136433Z" + } + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 720x432 with 15 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "shape = (3, 5)\n", + "idxs = torch.randint(len(train_data), size=(np.prod(shape),))\n", + "src.mnist.plot_mnist_data(train_data, idxs, shape=shape, figsize=(10, 6))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-23T10:26:07.094779Z", + "start_time": "2022-06-23T10:26:07.073070Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2022-06-23 12:26:07] setup NN model\n" + ] + } + ], + "source": [ + "print(src.misc.time_stamp(), \"setup NN model\")\n", + "# Send the model to the device (CPU or GPU)\n", + "model = src.mnist.NeuralNetwork().to(device)\n", + "# Define the optimizer to user for gradient descent\n", + "optimizer = torch.optim.Adadelta(model.parameters(), lr=1.0)\n", + "# Shrinks the learning rate by gamma every step_size\n", + "scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.7)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-23T10:29:26.390235Z", + "start_time": "2022-06-23T10:26:42.544241Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2022-06-23 12:26:42] training epoch 1\n", + "progress: 0.0 % -- loss: 2.3028767108917236\n", + "progress: 10.7 % -- loss: 1.6748236417770386\n", + "progress: 21.3 % -- loss: 1.5185235738754272\n", + "progress: 32.0 % -- loss: 1.5623918771743774\n", + "progress: 42.6 % -- loss: 1.4711004495620728\n", + "progress: 53.3 % -- loss: 1.4986964464187622\n", + "progress: 64.0 % -- loss: 1.5017988681793213\n", + "progress: 74.6 % -- loss: 1.4901072978973389\n", + "progress: 85.3 % -- loss: 1.5001312494277954\n", + "progress: 95.9 % -- loss: 1.5052735805511475\n", + "[2022-06-23 12:27:55] time: 72.96 s\n", + "[2022-06-23 12:28:01] test set avg. loss: -0.9677526263237 -- accuracy: 9683/10000 (96.8 %)\n", + "[2022-06-23 12:28:01] training epoch 2\n", + "progress: 0.0 % -- loss: 1.5741151571273804\n", + "progress: 10.7 % -- loss: 1.4860159158706665\n", + "progress: 21.3 % -- loss: 1.5614594221115112\n", + "progress: 32.0 % -- loss: 1.4640870094299316\n", + "progress: 42.6 % -- loss: 1.476805329322815\n", + "progress: 53.3 % -- loss: 1.4751007556915283\n", + "progress: 64.0 % -- loss: 1.4962173700332642\n", + "progress: 74.6 % -- loss: 1.4818075895309448\n", + "progress: 85.3 % -- loss: 1.5036966800689697\n", + "progress: 95.9 % -- loss: 1.477353572845459\n", + "[2022-06-23 12:29:20] time: 78.83 s\n", + "[2022-06-23 12:29:26] test set avg. loss: -0.9751591731071472 -- accuracy: 9759/10000 (97.6 %)\n" + ] + } + ], + "source": [ + "for epoch in range(n_epochs):\n", + " print(src.misc.time_stamp(), f\"training epoch {epoch+1}\")\n", + " with src.misc.timeit(\"time: {:4.2f} s\"):\n", + " src.mnist.train(model, device, train_data, optimizer, log_interval=100)\n", + " test_loss, correct = src.mnist.test(model, device, test_data)\n", + " rate = 100 * correct / len(test_data)\n", + " print(src.misc.time_stamp(),\n", + " f\"test set avg. loss: {test_loss}\",\n", + " f\" -- accuracy: {correct}/{len(test_data)} ({rate:4.1f} %)\")\n", + " scheduler.step()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-23T10:29:33.968008Z", + "start_time": "2022-06-23T10:29:29.863168Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2022-06-23 12:29:29] evaluate model on training set\n" + ] + } + ], + "source": [ + "# evaluate forward model\n", + "print(src.misc.time_stamp(), \"evaluate model on training set\")\n", + "model.eval() # enable evaluation mode, return to training mode with model.train()\n", + "test_loader = torch.utils.data.DataLoader(\n", + " test_data, batch_size=100, num_workers=1, pin_memory=True,\n", + " shuffle=True)\n", + "targets, outputs = [], []\n", + "with torch.no_grad(): # turn of gradient computation\n", + " for data, target in test_loader:\n", + " data, target = data.to(device), target.to(device)\n", + " output = model(data)\n", + " targets.append(np.array(target))\n", + " outputs.append(np.array(output))\n", + "targets = np.array(targets).reshape(-1, 1)\n", + "outputs = np.concatenate(outputs, axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-23T10:29:38.504693Z", + "start_time": "2022-06-23T10:29:35.328131Z" + }, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2022-06-23 12:29:35] plot mislabeled data\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 720x2160 with 20 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# plot mislabeled test data\n", + "print(src.misc.time_stamp(), \"plot mislabeled data\")\n", + "target_vectors = np.array([np.eye(1, 10, k=int(t)).flatten()\n", + " for t in targets])\n", + "idxs = np.argsort(np.linalg.norm(outputs-target_vectors, axis=1))[::-1][:10]\n", + "src.mnist.plot_mnist_accuracy(test_data, targets, outputs, idxs, figsize=(10, 3*len(idxs)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/requirements.txt b/requirements.txt index a16d0230d82e384a128195d8ced7dec77d1c8264..f987ffb7819f98b35bc326dcd53fd56a14447dba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ numpy >= 1.20.0 scipy >= 1.5.0 -torch >= 1.7.1 matplotlib >= 3.3.2 +torch >= 1.11.0 +torchvision >= 0.12.0 diff --git a/src/__init__.py b/src/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3a794adbb887b99e670852b28c06354c241b3c9c 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -0,0 +1,5 @@ +from . import ( + misc, + target_function, + mnist, +) diff --git a/src/mnist.py b/src/mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..ea0343487cfbf87f868e5d3c7c418d85415b21bd --- /dev/null +++ b/src/mnist.py @@ -0,0 +1,223 @@ +"""Utility functions for handling the MNIST data set.""" +from typing import Tuple, List, Optional +import numpy as np +import torch +import torchvision +import matplotlib.pyplot as plt + + +class NeuralNetwork(torch.nn.Module): + """Neural network used for MNIST image classification.""" + + def __init__(self) -> None: + """Initialize network layers.""" + super(NeuralNetwork, self).__init__() + self.conv1 = torch.nn.Conv2d( + 1, 32, kernel_size=3, stride=1, padding='valid') + self.conv2 = torch.nn.Conv2d( + 32, 64, kernel_size=3, stride=1, padding='valid') + self.dropout1 = torch.nn.Dropout(0.25) + self.dropout2 = torch.nn.Dropout(0.5) + self.fc1 = torch.nn.Linear(9216, 128) + self.fc2 = torch.nn.Linear(128, 10) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Evalute network in input data. + + This function defines the topology of the network. + + Parameters: + x : torch.Tensor + MNIST input data. + + Returns: + : + Output vector with probabilities for each class. + """ + x = self.conv1(x) + x = torch.nn.functional.relu(x) + x = self.conv2(x) + x = torch.nn.functional.relu(x) + x = torch.nn.functional.max_pool2d(x, 2) + x = self.dropout1(x) + x = torch.flatten(x, 1) + x = self.fc1(x) + x = torch.nn.functional.relu(x) + x = self.dropout2(x) + x = self.fc2(x) + output = torch.nn.functional.softmax(x, dim=1) + return output + + +def get_mnist_data() -> Tuple[torchvision.datasets.mnist.MNIST, + torchvision.datasets.mnist.MNIST]: + """Download the MNIST data set and return data loaders. + + The MNIST data are downloaded to 'data' subdirectory in repository root. + + Returns + ------- + train_loader : + DataLoader for training data set. + test_loader : + DataLoader for test data set. + """ + # The scaled mean and standard deviation of the MNIST dataset + # Note: This is precalculated. + data_mean = 0.1307 + data_std = 0.3081 + + # Convert input images to tensors and normalize + transform = torchvision.transforms.Compose([ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((data_mean,), (data_std,)) + ]) + + # Get the MNIST data from torchvision + train_data = torchvision.datasets.MNIST( + '../data', train=True, download=True, transform=transform) + test_data = torchvision.datasets.MNIST( + '../data', train=False, download=True, transform=transform) + + return train_data, test_data + + +def train(model: NeuralNetwork, + device: torch.device, + train_data: torchvision.datasets.mnist.MNIST, + optimizer: torch.optim.Optimizer, + log_interval=100 + ) -> None: + """Train the given model. + + Parameters + ---------- + model : NeuralNetwork + Neural network model. + device : torch.device + Hardware to train the model on. + train_data : torchvision.datasets.mnist.MNIST + Training data the model is trained on. + optimizer : torch.optim.Optimizer + Optimization employed to train the model. + log_interval : int + Number of steps the progress is printed after. + """ + kwargs = {'num_workers': 1, 'pin_memory': True, 'shuffle': True} + train_loader = torch.utils.data.DataLoader( + train_data, batch_size=64, **kwargs) + model.train() + for batch_idx, (data, target) in enumerate(train_loader): + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = torch.nn.functional.cross_entropy(output, target) + loss.backward() + optimizer.step() + if batch_idx % log_interval == 0: + percentage = 100. * batch_idx / len(train_loader) + print(f"progress: {percentage:>4.1f} % -- loss: {loss.item()}") + + +def test(model: NeuralNetwork, + device: torch.device, + test_data: torchvision.datasets.mnist.MNIST + ) -> Tuple[float, int]: + """Test the network on a test data set. + + Parameters + ---------- + model : NeuralNetwork + Neural network model. + device : torch.device + Hardware to train the model on. + test_data : torchvision.datasets.mnist.MNIST + Test data set. + + Returns + ------- + test_loss : float + Loss on the test data set. + correct : int + Number of correctly classified test data. + """ + kwargs = {'num_workers': 1, 'pin_memory': True, 'shuffle': True} + test_loader = torch.utils.data.DataLoader( + test_data, batch_size=16, **kwargs) + model.eval() + test_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + # sum up batch loss + test_loss += torch.nn.functional.nll_loss( + output, target, reduction='sum').item() + # get the index of the max log-probability + pred = output.argmax(dim=1, keepdim=True) + correct += pred.eq(target.view_as(pred)).sum().item() + test_loss /= len(test_loader.dataset) + return test_loss, correct + + +def plot_mnist_data(data: torchvision.datasets.mnist.MNIST, + idxs: List[int], + shape: Tuple, + **kwargs, + ) -> None: + """Plot MNIST images. + + Parameters + ---------- + data : torchvision.datasets.mnist.MNIST + MNIST data set. + idxs : list of int + Indices of data to plot. + shape : tuple + Specification of arrangement of subplots. + """ + assert np.prod(shape) == len(idxs) + layout = [[f"{row}-{col}" for col in range(shape[1])] + for row in range(shape[0])] + fig, axes = plt.subplot_mosaic(layout, constrained_layout=True, **kwargs) + for j, ax in enumerate(axes.values()): + img, label = data[idxs[j]] + ax.imshow(img.squeeze(), cmap="gray") + ax.set_title(f"label = {label}") + ax.axis("off") + + +def plot_mnist_accuracy(data: torchvision.datasets.mnist.MNIST, + targets: np.ndarray, + outputs: np.ndarray, + idxs: List[int], + **kwargs + ) -> None: + """Plot MNIST images and corresponding classification of the NN model. + + Parameters + ---------- + data : torchvision.datasets.mnist.MNIST + MNIST data set. + targets : np.ndarray + Target values. + outputs : np.ndarray + Output classifications of model. + idxs : list of int + Indices of data to plot. + """ + layout = [[f"img-{idx}", f"acc-{idx}"] for idx in idxs] + fig, axes = plt.subplot_mosaic(layout, constrained_layout=True, **kwargs) + for key, ax in axes.items(): + idx = int(key[4:]) + img, label = data[idx] + if key[:4] == "img-": + ax.imshow(img.squeeze(), cmap="gray") + ax.set_title(f"test point = {idx}") + ax.axis("off") + else: + ax.bar(range(10), outputs[idx], log=True, color="gray") + ax.axvline(x=label, color="k") + ax.set_title(f"target = {label}, " + + f"prediction={np.argmax(outputs[idx])}")