# 13_feedforward with tensorboard import torch import torch.nn as nn import torchvision import torchvision.transforms as transforms import matplotlib.pyplot as plt import sys from torch.utils.tensorboard import SummaryWriter writer = SummaryWriter('runs/mnist') # device config device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f'Device is {device}') # hyper parameters input_size = 784 # 28x28 pixel images hidden_size = 100 # PLAY WITH THIS num_classes = 10 # digits 0..9 num_epochs = 2 # PLAY WITH THIS batch_size = 100 learning_rate = .001 # MNIST train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor()) test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor()) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size) examples = iter(train_loader) samples, labels = examples.next() print(samples.shape, labels.shape) for i in range(6): plt.subplot(2, 3, i+1) plt.imshow(samples[i][0], cmap='gray') # plt.show() img_grid = torchvision.utils.make_grid(samples) writer.add_image('mnist_images', img_grid) writer.close() # sys.exit() model = nn.Sequential( nn.Linear(input_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, num_classes) # no softmax because its included in the CE loss function ).to(device) # print(model) # loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) writer.add_graph(model.to('cpu'), samples.to('cpu').reshape(-1, 28*28)) writer.close() # sys.exit() # training loop num_total_steps = len(train_loader) running_loss = 0.0 running_correct = 0 for epoch in range(num_epochs): for batch, (images, labels) in enumerate(train_loader): # reshape 100, 1, 28, 28 -> 100, 784 images = images.reshape(-1, 28*28).to(device) # reshape and send to gpu if available labels = labels.to(device) model = model.to(device) # forward outputs = model(images) loss = criterion(outputs, labels) # backward + update optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() _, predictions = torch.max(outputs, 1) running_correct += (predictions == labels).sum().item() writer.add_scalar('training loss each', loss.item(), epoch * num_total_steps + batch) writer.add_scalar('accuracy each', (predictions == labels).sum().item(), epoch * num_total_steps + batch) if (batch+1) % 100 == 0: writer.add_scalar('training loss', running_loss/100, epoch * num_total_steps + batch) writer.add_scalar('accuracy', running_correct/100, epoch * num_total_steps + batch) print(f'Epoch {epoch+1}/{num_epochs}, step {batch+1}/{num_total_steps}, loss = {loss.item():.4f}') running_loss = 0.0 running_correct = 0 # test b_labels = [] b_preds = [] with torch.no_grad(): n_correct = 0 n_samples = 0 for images, labels in test_loader: images = images.reshape(-1, 28*28).to(device) labels = labels.to(device) outputs = model(images) # value, index (index is class label) _, predictions = torch.max(outputs.data, 1) n_samples += labels.shape[0] n_correct += (predictions==labels).sum().item() sm = nn.Softmax(dim=0) class_predictions = [sm(output) for output in outputs] b_preds.append(class_predictions) b_labels.append(predictions) b_preds = torch.cat([torch.stack(batch) for batch in b_preds]) b_labels = torch.cat(b_labels) acc = 100.*n_correct/n_samples print(f'Accuracy = {acc}%') classes = range(10) for i in classes: labels_i = b_labels == i preds_i = b_preds[:,i] writer.add_pr_curve(str(i), labels_i, preds_i, global_step=0) writer.close()