For simplicity, import train and eval functions from the train script from torchvision instead of copything them here - Tensorrt

Following the calibrate_quant_resnet50 example, now we fine tune the model

python

import datetime
import os
import sys
import time

import torch
import torch.utils.data
from torch import nn

from tqdm import tqdm

import torchvision
from torchvision import transforms

from pytorch_quantization import nn as quant_nn
from pytorch_quantization import calib
from pytorch_quantization.tensor_quant import QuantDescriptor

from absl import logging
logging.set_verbosity(logging.FATAL)  # Disable logging as they are too noisy in notebook

python

# For simplicity, import train and eval functions from the train script from torchvision instead of copything them here
sys.path.append("/raid/skyw/models/torchvision/references/classification/")
from train import evaluate, train_one_epoch, load_data

python

from pytorch_quantization import quant_modules
quant_modules.initialize()

# Create and load the calibrated model
model = torchvision.models.resnet50()
model.load_state_dict(torch.load("/tmp/quant_resnet50-calibrated.pth"))
model.cuda()

python

data_path = "/raid/data/imagenet/imagenet_pytorch"

traindir = os.path.join(data_path, 'train')
valdir = os.path.join(data_path, 'val')
dataset, dataset_test, train_sampler, test_sampler = load_data(traindir, valdir, False, False)

data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=256,
    sampler=train_sampler, num_workers=4, pin_memory=True)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=256,
    sampler=test_sampler, num_workers=4, pin_memory=True)

Quantized fine tuning

Let's fine tune the model with fake quantization. We only fine tune for 1 epoch as an example.

python

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=128,
    sampler=train_sampler, num_workers=16, pin_memory=True)

# Training takes about one and half hour per epoch on single V100
train_one_epoch(model, criterion, optimizer, data_loader, "cuda", 0, 100)

Evaluate the fine tuned model

python

with torch.no_grad():
    evaluate(model, criterion, data_loader_test, device="cuda")

After only 1 epoch of quantized fine tuning, top-1 improved from ~76.1 to 76.426. Train longer with lr anealing can improve accuracy further