{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Convolutional Networks in PyTorch\n", "\n", "In this notebook we'll cover some of the basics for 2-d convolution for image classification using the MNIST dataset." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import torch \n", "import torch.nn as nn\n", "import torchvision\n", "import torchvision.transforms as transforms\n", "# device configuration\n", "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "num_epochs = 5\n", "num_classes = 10\n", "learning_rate = 0.001\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "train_dataset = torchvision.datasets.MNIST(root='./MNIST',\n", " train=True, \n", " transform=transforms.ToTensor(),\n", " download=True)\n", "test_dataset = torchvision.datasets.MNIST(root='./MNIST',\n", " train=False, \n", " transform=transforms.ToTensor())\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "batch_size = 100 # how many examples are processed at each step\n", "train_loader = torch.utils.data.DataLoader(dataset=train_dataset,\n", " batch_size=batch_size, \n", " shuffle=True)\n", "test_loader = torch.utils.data.DataLoader(dataset=test_dataset,\n", " batch_size=batch_size, \n", " shuffle=False)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "class ConvNet1(nn.Module):\n", " def __init__(self, num_classes=10):\n", " super(ConvNet1, self).__init__()\n", " self.layer1 = nn.Sequential(\n", " nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),\n", " nn.ReLU(),\n", " nn.MaxPool2d(kernel_size=2, stride=2))\n", " self.layer2 = nn.Sequential(\n", " nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),\n", " nn.ReLU(),\n", " nn.MaxPool2d(kernel_size=2, stride=2))\n", " self.fc = nn.Linear(7*7*32, num_classes)\n", " def forward(self, x):\n", " out = self.layer1(x)\n", " out = self.layer2(out)\n", " out = out.reshape(out.size(0), -1)\n", " out = self.fc(out)\n", " return out\n", " " ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "model = ConvNet1(num_classes).to(device)\n", "\n", "# Loss and optimizer\n", "criterion = nn.CrossEntropyLoss()\n", "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch [1/5], batch [100/600], loss: 0.3148\n", "epoch [1/5], batch [200/600], loss: 0.1325\n", "epoch [1/5], batch [300/600], loss: 0.2210\n", "epoch [1/5], batch [400/600], loss: 0.0793\n", "epoch [1/5], batch [500/600], loss: 0.2944\n", "epoch [1/5], batch [600/600], loss: 0.1435\n", "epoch [2/5], batch [100/600], loss: 0.0370\n", "epoch [2/5], batch [200/600], loss: 0.0773\n", "epoch [2/5], batch [300/600], loss: 0.0421\n", "epoch [2/5], batch [400/600], loss: 0.0519\n", "epoch [2/5], batch [500/600], loss: 0.0405\n", "epoch [2/5], batch [600/600], loss: 0.0763\n", "epoch [3/5], batch [100/600], loss: 0.0544\n", "epoch [3/5], batch [200/600], loss: 0.0341\n", "epoch [3/5], batch [300/600], loss: 0.1854\n", "epoch [3/5], batch [400/600], loss: 0.0307\n", "epoch [3/5], batch [500/600], loss: 0.0335\n", "epoch [3/5], batch [600/600], loss: 0.0378\n", "epoch [4/5], batch [100/600], loss: 0.1171\n", "epoch [4/5], batch [200/600], loss: 0.0608\n", "epoch [4/5], batch [300/600], loss: 0.0269\n", "epoch [4/5], batch [400/600], loss: 0.0056\n", "epoch [4/5], batch [500/600], loss: 0.0202\n", "epoch [4/5], batch [600/600], loss: 0.0112\n", "epoch [5/5], batch [100/600], loss: 0.0295\n", "epoch [5/5], batch [200/600], loss: 0.0119\n", "epoch [5/5], batch [300/600], loss: 0.0657\n", "epoch [5/5], batch [400/600], loss: 0.0099\n", "epoch [5/5], batch [500/600], loss: 0.0647\n", "epoch [5/5], batch [600/600], loss: 0.0025\n" ] } ], "source": [ "num_batches = len(train_loader)\n", "for epoch in range(num_epochs):\n", " for i, (images, labels) in enumerate(train_loader):\n", " images = images.to(device)\n", " labels = labels.to(device)\n", " \n", " # forward pass\n", " outputs = model(images)\n", " loss = criterion(outputs, labels)\n", " \n", " # backward and optimize\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", "\n", " if (i+1) % 100 == 0:\n", " print ('epoch [{}/{}], batch [{}/{}], loss: {:.4f}' \n", " .format(epoch+1, num_epochs, i+1, num_batches, loss.item()))\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ConvNet1(\n", " (layer1): Sequential(\n", " (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n", " (1): ReLU()\n", " (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", " )\n", " (layer2): Sequential(\n", " (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n", " (1): ReLU()\n", " (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", " )\n", " (fc): Linear(in_features=1568, out_features=10, bias=True)\n", ")" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" }, { "name": "stdout", "output_type": "stream", "text": [ "Test Accuracy of the model on the 10000 test images: 98.88 %\n" ] } ], "source": [ "# test the model\n", "model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)\n", "with torch.no_grad():\n", " correct = 0\n", " total = 0\n", " for images, labels in test_loader:\n", " images = images.to(device)\n", " labels = labels.to(device)\n", " outputs = model(images)\n", " _, predicted = torch.max(outputs.data, 1)\n", " total += labels.size(0)\n", " correct += (predicted == labels).sum().item()\n", "\n", " print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "class ConvNet2(nn.Module):\n", " def __init__(self, num_classes=10):\n", " super(ConvNet2, self).__init__()\n", " self.layer1 = nn.Sequential(\n", " nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),\n", " nn.BatchNorm2d(16),\n", " nn.ReLU(),\n", " nn.MaxPool2d(kernel_size=2, stride=2))\n", " self.layer2 = nn.Sequential(\n", " nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),\n", " nn.BatchNorm2d(32),\n", " nn.ReLU(),\n", " nn.MaxPool2d(kernel_size=2, stride=2))\n", " self.drop_out = nn.Dropout()\n", " self.fc = nn.Linear(7*7*32, num_classes)\n", " def forward(self, x):\n", " out = self.layer1(x)\n", " out = self.layer2(out)\n", " out = out.reshape(out.size(0), -1) # flatten\n", " out = self.drop_out(out)\n", " out = self.fc(out)\n", " return out\n", " " ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "model = ConvNet2(num_classes).to(device)\n", "\n", "# Loss and optimizer\n", "criterion = nn.CrossEntropyLoss()\n", "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch [1/5], batch [100/600], loss: 0.3027\n", "epoch [1/5], batch [200/600], loss: 0.2901\n", "epoch [1/5], batch [300/600], loss: 0.1391\n", "epoch [1/5], batch [400/600], loss: 0.0396\n", "epoch [1/5], batch [500/600], loss: 0.1175\n", "epoch [1/5], batch [600/600], loss: 0.0288\n", "epoch [2/5], batch [100/600], loss: 0.0374\n", "epoch [2/5], batch [200/600], loss: 0.0803\n", "epoch [2/5], batch [300/600], loss: 0.0305\n", "epoch [2/5], batch [400/600], loss: 0.0406\n", "epoch [2/5], batch [500/600], loss: 0.0463\n", "epoch [2/5], batch [600/600], loss: 0.0810\n", "epoch [3/5], batch [100/600], loss: 0.0653\n", "epoch [3/5], batch [200/600], loss: 0.0808\n", "epoch [3/5], batch [300/600], loss: 0.0590\n", "epoch [3/5], batch [400/600], loss: 0.0286\n", "epoch [3/5], batch [500/600], loss: 0.0262\n", "epoch [3/5], batch [600/600], loss: 0.0597\n", "epoch [4/5], batch [100/600], loss: 0.0544\n", "epoch [4/5], batch [200/600], loss: 0.0353\n", "epoch [4/5], batch [300/600], loss: 0.0306\n", "epoch [4/5], batch [400/600], loss: 0.0238\n", "epoch [4/5], batch [500/600], loss: 0.0596\n", "epoch [4/5], batch [600/600], loss: 0.0065\n", "epoch [5/5], batch [100/600], loss: 0.0402\n", "epoch [5/5], batch [200/600], loss: 0.0399\n", "epoch [5/5], batch [300/600], loss: 0.0305\n", "epoch [5/5], batch [400/600], loss: 0.0487\n", "epoch [5/5], batch [500/600], loss: 0.0386\n", "epoch [5/5], batch [600/600], loss: 0.0382\n" ] } ], "source": [ "num_batches = len(train_loader)\n", "for epoch in range(num_epochs):\n", " for i, (images, labels) in enumerate(train_loader):\n", " images = images.to(device)\n", " labels = labels.to(device)\n", " \n", " # forward pass\n", " outputs = model(images)\n", " loss = criterion(outputs, labels)\n", " \n", " # backward and optimize\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", "\n", " if (i+1) % 100 == 0:\n", " print ('epoch [{}/{}], batch [{}/{}], loss: {:.4f}' \n", " .format(epoch+1, num_epochs, i+1, num_batches, loss.item()))\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ConvNet2(\n", " (layer1): Sequential(\n", " (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n", " (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " (2): ReLU()\n", " (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", " )\n", " (layer2): Sequential(\n", " (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n", " (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " (2): ReLU()\n", " (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", " )\n", " (drop_out): Dropout(p=0.5)\n", " (fc): Linear(in_features=1568, out_features=10, bias=True)\n", ")" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "name": "stdout", "output_type": "stream", "text": [ "Test Accuracy of the model on the 10000 test images: 98.95 %\n" ] } ], "source": [ "# test the model\n", "model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)\n", "with torch.no_grad():\n", " correct = 0\n", " total = 0\n", " for images, labels in test_loader:\n", " images = images.to(device)\n", " labels = labels.to(device)\n", " outputs = model(images)\n", " _, predicted = torch.max(outputs.data, 1)\n", " total += labels.size(0)\n", " correct += (predicted == labels).sum().item()\n", "\n", " print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.5" } }, "nbformat": 4, "nbformat_minor": 1 }