# cnn on cifar-10 ''' convolutional net similar to ff net, but applies convolutional filters (mainly on images) also include pooling layers specifically max pooling downsamples image by getting max value in a region 12 20 30 0 8 12 2 0 20 30 34 70 37 4 -- 2x2 max-pool --> 112 37 112 100 25 12 helps avoid overfitting by providing abstract form of input ''' import torch import torch.nn as nn import torch.nn.functional as F import torchvision import torchvision.transforms as transforms import matplotlib.pyplot as plt import numpy as np # device config device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f'Device is {device}') # hyper parameters num_epochs = 10 batch_size = 10 learning_rate = 0.001 # dataset has PILImage images of range [0,1] # transform to tensors of normalized range [-1, 1] transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5)) ]) train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # can also get them from the data, probably # #implement conv net # class ConvNet(nn.Module): # def __init__(self): # super(ConvNet, self).__init__() # self.layers = nn.Sequential( # -> 32x32 # nn.Conv2d(3, 6, 5), # 3 color channels, 6 output channels, kernel size 5 -> image size shrinks by 2 pixels in each direction -> 28x28 # nn.ReLU(), # nn.MaxPool2d(2, 2), # kernel size 2, stride 2 -> shift by 2 pixels after each max-pooling -> image size shrinks to half size -> 14x14 # nn.Conv2d(6, 16, 5), # input size is output size of previous conv layer, -> image size shrinks by 2 pixels in each direction -> 10x10 # nn.ReLU(), # nn.MaxPool2d(2, 2), # kernel size 2, stride 2 -> shift by 2 pixels after each max-pooling -> image size shrinks to half size -> 5x5 # nn.Flatten(), # nn.Linear(16*5*5, 120), # 16 channels * 5px * 5px # nn.ReLU(), # nn.Linear(120, 84), # nn.ReLU(), # nn.Linear(84, 10) # output size 10 for 10 classes # ) # def forward(self, x): # return self.layers(x) # model = ConvNet().to(device) model = nn.Sequential( # -> 32x32 nn.Conv2d(3, 6, 5), # 3 color channels, 6 output channels, kernel size 5 -> image size shrinks by 2 pixels in each direction -> 28x28 nn.ReLU(), nn.MaxPool2d(2, 2), # kernel size 2, stride 2 -> shift by 2 pixels after each max-pooling -> image size shrinks to half size -> 14x14 nn.Conv2d(6, 16, 5), # input size is output size of previous conv layer, -> image size shrinks by 2 pixels in each direction -> 10x10 nn.ReLU(), nn.MaxPool2d(2, 2), # kernel size 2, stride 2 -> shift by 2 pixels after each max-pooling -> image size shrinks to half size -> 5x5 nn.Flatten(), nn.Linear(16*5*5, 120), # 16 channels * 5px * 5px nn.ReLU(), nn.Linear(120, 84), nn.ReLU(), nn.Linear(84, 10) # output size 10 for 10 classes ).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate) n_total_steps = len(train_loader) for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): # origin shape: [4, 3, 32, 32] = 4, 3, 1024 # input_layer: 3 input channels, 6 output channels, 5 kernel size images = images.to(device) labels = labels.to(device) #forward pass outputs = model(images) loss = criterion(outputs, labels) # backward pass + update optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 2000 == 0: print(f'Epoch {(epoch+1)}/{num_epochs}, Step {(i+1)}/{n_total_steps}, loss = {loss.item():.4f}') print('Finished Training') with torch.no_grad(): n_correct = 0 n_samples = 0 n_class_correct = [0 for _ in range(10)] n_class_samples = [0 for _ in range(10)] for images, labels in test_loader: images = images.to(device) labels = labels.to(device) outputs = model(images) # max returns (value, index) _, predicted = torch.max(outputs, 1) n_samples += labels.size(0) n_correct += (predicted == labels).sum().item() for i in range(batch_size): label = labels[i] pred = predicted[i] if (label == pred): n_class_correct[label] += 1 n_class_samples[label] += 1 acc = 100. * n_correct / n_samples print(f'Accuracy of network: {acc:.1f}%') for i in range(10): acc = 100. * n_class_correct[i]/n_class_samples[i] print(f'Accuracy of {classes[i]}: {acc:.1f}%')