From aeaf2182178433acdc2235358b3cc34d719e34bb Mon Sep 17 00:00:00 2001 From: KaiyangZhou Date: Wed, 27 Nov 2019 16:35:18 +0000 Subject: [PATCH] add visualize_actmap.py to tools --- tools/visualize_actmap.py | 144 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 tools/visualize_actmap.py diff --git a/tools/visualize_actmap.py b/tools/visualize_actmap.py new file mode 100644 index 0000000..d88ae8c --- /dev/null +++ b/tools/visualize_actmap.py @@ -0,0 +1,144 @@ +"""Visualizes CNN activation maps to see where the CNN focuses on to extract features. + +Reference: + - Zagoruyko and Komodakis. Paying more attention to attention: Improving the + performance of convolutional neural networks via attention transfer. ICLR, 2017 + - Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019. +""" +import os.path as osp +import numpy as np +import cv2 +import argparse + +import torch +from torch.nn import functional as F + +import torchreid +from torchreid.utils import mkdir_if_missing, check_isfile + + +IMAGENET_MEAN = [0.485, 0.456, 0.406] +IMAGENET_STD = [0.229, 0.224, 0.225] +GRID_SPACING = 10 + + +@torch.no_grad() +def visactmap(model, test_loader, save_dir, width, height, print_freq, use_gpu, img_mean=None, img_std=None): + if img_mean is None or img_std is None: + # use imagenet mean and std + img_mean = IMAGENET_MEAN + img_std = IMAGENET_STD + + model.eval() + + for target in list(test_loader.keys()): + data_loader = test_loader[target]['query'] # only process query images + # original images and activation maps are saved individually + actmap_dir = osp.join(save_dir, 'actmap_'+target) + mkdir_if_missing(actmap_dir) + print('Visualizing activation maps for {} ...'.format(target)) + + for batch_idx, data in enumerate(data_loader): + imgs, paths = data[0], data[3] + if use_gpu: + imgs = imgs.cuda() + + # forward to get convolutional feature maps + try: + outputs = model(imgs, return_featuremaps=True) + except TypeError: + raise TypeError('forward() got unexpected keyword argument "return_featuremaps". ' \ + 'Please add return_featuremaps as an input argument to forward(). When ' \ + 'return_featuremaps=True, return feature maps only.') + + if outputs.dim() != 4: + raise ValueError('The model output is supposed to have ' \ + 'shape of (b, c, h, w), i.e. 4 dimensions, but got {} dimensions. ' + 'Please make sure you set the model output at eval mode ' + 'to be the last convolutional feature maps'.format(outputs.dim())) + + # compute activation maps + outputs = (outputs**2).sum(1) + b, h, w = outputs.size() + outputs = outputs.view(b, h*w) + outputs = F.normalize(outputs, p=2, dim=1) + outputs = outputs.view(b, h, w) + + if use_gpu: + imgs, outputs = imgs.cpu(), outputs.cpu() + + for j in range(outputs.size(0)): + # get image name + path = paths[j] + imname = osp.basename(osp.splitext(path)[0]) + + # RGB image + img = imgs[j, ...] + for t, m, s in zip(img, img_mean, img_std): + t.mul_(s).add_(m).clamp_(0, 1) + img_np = np.uint8(np.floor(img.numpy() * 255)) + img_np = img_np.transpose((1, 2, 0)) # (c, h, w) -> (h, w, c) + + # activation map + am = outputs[j, ...].numpy() + am = cv2.resize(am, (width, height)) + am = 255 * (am - np.max(am)) / (np.max(am) - np.min(am) + 1e-12) + am = np.uint8(np.floor(am)) + am = cv2.applyColorMap(am, cv2.COLORMAP_JET) + + # overlapped + overlapped = img_np * 0.3 + am * 0.7 + overlapped[overlapped>255] = 255 + overlapped = overlapped.astype(np.uint8) + + # save images in a single figure (add white spacing between images) + # from left to right: original image, activation map, overlapped image + grid_img = 255 * np.ones((height, 3*width+2*GRID_SPACING, 3), dtype=np.uint8) + grid_img[:, :width, :] = img_np[:, :, ::-1] + grid_img[:, width+GRID_SPACING: 2*width+GRID_SPACING, :] = am + grid_img[:, 2*width+2*GRID_SPACING:, :] = overlapped + cv2.imwrite(osp.join(actmap_dir, imname+'.jpg'), grid_img) + + if (batch_idx+1) % 10 == 0: + print('- done batch {}/{}'.format(batch_idx+1, len(data_loader))) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--root', type=str) + parser.add_argument('-d', '--dataset', type=str, default='market1501') + parser.add_argument('-m', '--model', type=str, default='osnet_x1_0') + parser.add_argument('--weights', type=str) + parser.add_argument('--save-dir', type=str, default='log') + parser.add_argument('--height', type=int, default=256) + parser.add_argument('--width', type=int, default=128) + args = parser.parse_args() + + use_gpu = torch.cuda.is_available() + + datamanager = torchreid.data.ImageDataManager( + root=args.root, + sources=args.dataset, + height=args.height, + width=args.width, + batch_size_train=100, + batch_size_test=100, + transforms=None, + train_sampler='SequentialSampler' + ) + test_loader = datamanager.test_loader + + model = torchreid.models.build_model( + name=args.model, + num_classes=datamanager.num_train_pids, + use_gpu=use_gpu + ) + + if args.weights and check_isfile(args.weights): + load_pretrained_weights(model, args.weights) + + visactmap(model, test_loader, args.save_dir, args.width, args.height, use_gpu) + + +if __name__ == '__main__': + main()