diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index f70de0d..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index dfe0770..0000000 --- a/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -# Auto detect text files and perform LF normalization -* text=auto diff --git a/CUB_test.py b/CUB_test.py deleted file mode 100644 index a5bb45f..0000000 --- a/CUB_test.py +++ /dev/null @@ -1,115 +0,0 @@ -#oding=utf-8 -import os -import pandas as pd -from sklearn.model_selection import train_test_split -from dataset.dataset_CUB_test import collate_fn1, collate_fn2, dataset -import torch -import torch.nn as nn -from torchvision import datasets, models -from transforms import transforms -from torch.nn import CrossEntropyLoss -from models.resnet_swap_2loss_add import resnet_swap_2loss_add -from math import ceil -from torch.autograd import Variable -from tqdm import tqdm -import numpy as np - -cfg = {} -cfg['dataset'] = 'CUB' -# prepare dataset -if cfg['dataset'] == 'CUB': - rawdata_root = './datasets/CUB_200_2011/all' - train_pd = pd.read_csv("./datasets/CUB_200_2011/train.txt",sep=" ",header=None, names=['ImageName', 'label']) - train_pd, val_pd = train_test_split(train_pd, test_size=0.90, random_state=43,stratify=train_pd['label']) - test_pd = pd.read_csv("./datasets/CUB_200_2011/test.txt",sep=" ",header=None, names=['ImageName', 'label']) - cfg['numcls'] = 200 - numimage = 6033 -if cfg['dataset'] == 'STCAR': - rawdata_root = './datasets/st_car/all' - train_pd = pd.read_csv("./datasets/st_car/train.txt",sep=" ",header=None, names=['ImageName', 'label']) - test_pd = pd.read_csv("./datasets/st_car/test.txt",sep=" ",header=None, names=['ImageName', 'label']) - cfg['numcls'] = 196 - numimage = 8144 -if cfg['dataset'] == 'AIR': - rawdata_root = './datasets/aircraft/all' - train_pd = pd.read_csv("./datasets/aircraft/train.txt",sep=" ",header=None, names=['ImageName', 'label']) - test_pd = pd.read_csv("./datasets/aircraft/test.txt",sep=" ",header=None, names=['ImageName', 'label']) - cfg['numcls'] = 100 - numimage = 6667 - -print('Set transform') -data_transforms = { - 'totensor': transforms.Compose([ - transforms.Resize((448,448)), - transforms.ToTensor(), - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), - ]), - 'None': transforms.Compose([ - transforms.Resize((512,512)), - transforms.CenterCrop((448,448)), - ]), - - } -data_set = {} -data_set['val'] = dataset(cfg,imgroot=rawdata_root,anno_pd=test_pd, - unswap=data_transforms["None"],swap=data_transforms["None"],totensor=data_transforms["totensor"],train=False - ) -dataloader = {} -dataloader['val']=torch.utils.data.DataLoader(data_set['val'], batch_size=4, - shuffle=False, num_workers=4,collate_fn=collate_fn1) -model = resnet_swap_2loss_add(num_classes=cfg['numcls']) -model.cuda() -model = nn.DataParallel(model) -resume = './cub_model.pth' -pretrained_dict=torch.load(resume) -model_dict=model.state_dict() -pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} -model_dict.update(pretrained_dict) -model.load_state_dict(model_dict) -criterion = CrossEntropyLoss() -model.train(False) -val_corrects1 = 0 -val_corrects2 = 0 -val_corrects3 = 0 -val_size = ceil(len(data_set['val']) / dataloader['val'].batch_size) -for batch_cnt_val, data_val in tqdm(enumerate(dataloader['val'])): - #print('testing') - inputs, labels, labels_swap = data_val - inputs = Variable(inputs.cuda()) - labels = Variable(torch.from_numpy(np.array(labels)).long().cuda()) - labels_swap = Variable(torch.from_numpy(np.array(labels_swap)).long().cuda()) - # forward - if len(inputs)==1: - inputs = torch.cat((inputs,inputs)) - labels = torch.cat((labels,labels)) - labels_swap = torch.cat((labels_swap,labels_swap)) - - outputs = model(inputs) - - outputs1 = outputs[0] + outputs[1][:,0:cfg['numcls']] + outputs[1][:,cfg['numcls']:2*cfg['numcls']] - outputs2 = outputs[0] - outputs3 = outputs[1][:,0:cfg['numcls']] + outputs[1][:,cfg['numcls']:2*cfg['numcls']] - - _, preds1 = torch.max(outputs1, 1) - _, preds2 = torch.max(outputs2, 1) - _, preds3 = torch.max(outputs3, 1) - batch_corrects1 = torch.sum((preds1 == labels)).data.item() - batch_corrects2 = torch.sum((preds2 == labels)).data.item() - batch_corrects3 = torch.sum((preds3 == labels)).data.item() - - val_corrects1 += batch_corrects1 - val_corrects2 += batch_corrects2 - val_corrects3 += batch_corrects3 -val_acc1 = 0.5 * val_corrects1 / len(data_set['val']) -val_acc2 = 0.5 * val_corrects2 / len(data_set['val']) -val_acc3 = 0.5 * val_corrects3 / len(data_set['val']) -print("cls&adv acc:", val_acc1, "cls acc:", val_acc2,"adv acc:", val_acc1) - - - - - - - - - diff --git a/LICENSE b/LICENSE.txt similarity index 100% rename from LICENSE rename to LICENSE.txt diff --git a/README.md b/README.md index 087a7c9..5b8c43e 100644 --- a/README.md +++ b/README.md @@ -2,57 +2,105 @@ By Yue Chen, Yalong Bai, Wei Zhang, Tao Mei -### Introduction +## Introduction -This code is relative to the [DCL](https://arxiv.org/), which is accepted on CVPR 2019. +This project is a DCL pytorch implementation of [*Destruction and Construction Learning for Fine-grained Image Recognition*](http://openaccess.thecvf.com/content_CVPR_2019/html/Chen_Destruction_and_Construction_Learning_for_Fine-Grained_Image_Recognition_CVPR_2019_paper.html) accepted by CVPR2019. -This DCL code in this repo is written based on Pytorch 0.4.0. -This code has been tested on Ubuntu 16.04.3 LTS with Python 3.6.5 and CUDA 9.0. +## Requirements -Yuo can use this public docker image as the test environment: +1. Python 3.6 + +2. Pytorch 0.4.0 or 0.4.1 + +3. CUDA 8.0 or higher + +For docker environment: ```shell -docker pull pytorch/pytorch:0.4-cuda9-cudnn7-devel +docker: pull pytorch/pytorch:0.4-cuda9-cudnn7-devel ``` -### Citing DCL +For conda environment: -If you find this repo useful in your research, please consider citing: +```shell +conda create --name DCL file conda_list.txt +``` - @article{chen2019dcl, - title={Destruction and Construction Learning for Fine-grained Image Recognition}, - author={Chen Yue and Bai, Yalong and Zhang Wei and Mei Tao}, - journal={arXiv preprint arXiv:}, - year={2019} - } +## Datasets Prepare -### Requirements +1. Download correspond dataset to folder 'datasets' -0. Pytorch 0.4.0 +2. Data organization: eg. CUB -0. Numpy, Pillow, Pandas + All the image data are in './datasets/CUB/data/' + e.g. './datasets/CUB/data/*.jpg' -0. GPU: P40, etc. (May have bugs on the latest V100 GPU) + The annotation files are in './datasets/CUB/anno/' + e.g. './dataset/CUB/data/train.txt' -### Datasets Prepare + In annotations: -0. Download CUB-200-2011 dataset form [Caltech-UCSD Birds-200-2011](http://www.vision.caltech.edu/visipedia/CUB-200-2011.html) + ```shell + name_of_image.jpg label_num\n + ``` -0. Unzip the dataset file under the folder 'datasets' + e.g. for CUB in repository: -0. Run ./datasets/CUB_pre.py to generate annotation files 'train.txt', 'test.txt' and image folder 'all' for CUB-200-2011 dataset + ```shell + Black_Footed_Albatross_0009_34.jpg 0 + Black_Footed_Albatross_0014_89.jpg 0 + Laysan_Albatross_0044_784.jpg 1 + Sooty_Albatross_0021_796339.jpg 2 + ... + ``` -### Testing Demo +Some examples of datasets like CUB, Stanford Car, etc. are already given in our repository. You can use DCL to your datasets by simply converting annotations to train.txt/val.txt/test.txt and modify the class number in `config.py` as in line67: numcls=200. -0. Download `CUB_model.pth` from [Google Drive](https://drive.google.com/file/d/1xWMOi5hADm1xMUl5dDLeP6cfjZit6nQi/view?usp=sharing). +## Training -0. Run `CUB_test.py` +Run `train.py` to train DCL. -### Training on CUB-200-2011 +For training CUB / STCAR / AIR from scratch -0. Run `train.py` to train and test the CUB-200-2011 datasets. Wait about half day for training and testing. +```shell +python train.py --data CUB --epoch 360 --backbone resnet50 \ + --tb 16 --tnw 16 --vb 512 --vnw 16 \ + --lr 0.008 --lr_step 60 \ + --cls_lr_ratio 10 --start_epoch 0 \ + --detail training_descibe --size 512 \ + --crop 448 --cls_mul --swap_num 7 7 +``` -0. Hopefully it would give the evaluation results around ~87.8% acc after running. +For training CUB / STCAR / AIR from trained checkpoint -**Support for other datasets will be updated later** +```shell +python train.py --data CUB --epoch 360 --backbone resnet50 \ + --tb 16 --tnw 16 --vb 512 --vnw 16 \ + --lr 0.008 --lr_step 60 \ + --cls_lr_ratio 10 --start_epoch $LAST_EPOCH \ + --detail training_descibe4checkpoint --size 512 \ + --crop 448 --cls_mul --swap_num 7 7 +``` + +For training FGVC product datasets from scratch + +```shell + python train.py --data product --epoch 60 --backbone senet154 \ + --tb 96 --tnw 32 --vb 512 --vnw 32 \ + --lr 0.01 --lr_step 12 \ + --cls_lr_ratio 10 --start_epoch 0 \ + --detail training_descibe --size 512 \ + --crop 448 --cls_2 --swap_num 7 7 +``` + +For training FGVC datasets from trained checkpoint + +```shell + python train.py --data product --epoch 60 --backbone senet154 \ + --tb 96 --tnw 32 --vb 512 --vnw 32 \ + --lr 0.01 --lr_step 12 \ + --cls_lr_ratio 10 --start_epoch $LAST_EPOCH \ + --detail training_descibe4checkpoint --size 512 \ + --crop 448 --cls_2 --swap_num 7 7 +``` diff --git a/config.py b/config.py new file mode 100644 index 0000000..87d9a6f --- /dev/null +++ b/config.py @@ -0,0 +1,131 @@ +import os +import pandas as pd +import torch + +from transforms import transforms +from utils.autoaugment import ImageNetPolicy + +# pretrained model checkpoints +pretrained_model = {'resnet50' : './models/pretrained/resnet50-19c8e357.pth',} + +# transforms dict +def load_data_transformers(resize_reso=512, crop_reso=448, swap_num=[7, 7]): + center_resize = 600 + Normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + data_transforms = { + 'swap': transforms.Compose([ + transforms.Randomswap((swap_num[0], swap_num[1])), + ]), + 'common_aug': transforms.Compose([ + transforms.Resize((resize_reso, resize_reso)), + transforms.RandomRotation(degrees=15), + transforms.RandomCrop((crop_reso,crop_reso)), + transforms.RandomHorizontalFlip(), + ]), + 'train_totensor': transforms.Compose([ + transforms.Resize((crop_reso, crop_reso)), + # ImageNetPolicy(), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + ]), + 'val_totensor': transforms.Compose([ + transforms.Resize((crop_reso, crop_reso)), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + ]), + 'test_totensor': transforms.Compose([ + transforms.Resize((resize_reso, resize_reso)), + transforms.CenterCrop((crop_reso, crop_reso)), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + ]), + 'None': None, + } + return data_transforms + + +class LoadConfig(object): + def __init__(self, args, version): + if version == 'train': + get_list = ['train', 'val'] + elif version == 'val': + get_list = ['val'] + elif version == 'test': + get_list = ['test'] + else: + raise Exception("train/val/test ???\n") + + ############################### + #### add dataset info here #### + ############################### + + # put image data in $PATH/data + # put annotation txt file in $PATH/anno + + if args.dataset == 'product': + self.dataset = args.dataset + self.rawdata_root = './../FGVC_product/data' + self.anno_root = './../FGVC_product/anno' + self.numcls = 2019 + if args.dataset == 'CUB': + self.dataset = args.dataset + self.rawdata_root = './dataset/CUB_200_2011/data' + self.anno_root = './dataset/CUB_200_2011/anno' + self.numcls = 200 + elif args.dataset == 'STCAR': + self.dataset = args.dataset + self.rawdata_root = './dataset/st_car/data' + self.anno_root = './dataset/st_car/anno' + self.numcls = 196 + elif args.dataset == 'AIR': + self.dataset = args.dataset + self.rawdata_root = './dataset/aircraft/data' + self.anno_root = './dataset/aircraft/anno' + self.numcls = 100 + else: + raise Exception('dataset not defined ???') + + # annotation file organized as : + # path/image_name cls_num\n + + if 'train' in get_list: + self.train_anno = pd.read_csv(os.path.join(self.anno_root, 'ct_train.txt'),\ + sep=" ",\ + header=None,\ + names=['ImageName', 'label']) + + if 'val' in get_list: + self.val_anno = pd.read_csv(os.path.join(self.anno_root, 'ct_val.txt'),\ + sep=" ",\ + header=None,\ + names=['ImageName', 'label']) + + if 'test' in get_list: + self.test_anno = pd.read_csv(os.path.join(self.anno_root, 'ct_test.txt'),\ + sep=" ",\ + header=None,\ + names=['ImageName', 'label']) + + self.swap_num = args.swap_num + + self.save_dir = './net_model' + self.backbone = args.backbone + + self.use_dcl = True + self.use_backbone = False if self.use_dcl else True + self.use_Asoftmax = False + self.use_focal_loss = False + self.use_fpn = False + self.use_hier = False + + self.weighted_sample = False + self.cls_2 = True + self.cls_2xmul = False + + self.log_folder = './logs' + if not os.path.exists(self.log_folder): + os.mkdir(self.log_folder) + + + + diff --git a/dataset/dataset_CUB_test.py b/dataset/dataset_CUB_test.py deleted file mode 100644 index 4140902..0000000 --- a/dataset/dataset_CUB_test.py +++ /dev/null @@ -1,65 +0,0 @@ -# coding=utf8 -from __future__ import division -import os -import torch -import torch.utils.data as data -import PIL.Image as Image -from PIL import ImageStat -class dataset(data.Dataset): - def __init__(self, cfg, imgroot, anno_pd, unswap=None, swap=None, totensor=None, train=False): - self.root_path = imgroot - self.paths = anno_pd['ImageName'].tolist() - self.labels = anno_pd['label'].tolist() - self.unswap = unswap - self.swap = swap - self.totensor = totensor - self.cfg = cfg - self.train = train - - def __len__(self): - return len(self.paths) - - def __getitem__(self, item): - img_path = os.path.join(self.root_path, self.paths[item]) - img = self.pil_loader(img_path) - img_unswap = self.unswap(img) - img_unswap = self.totensor(img_unswap) - img_swap = img_unswap - label = self.labels[item]-1 - label_swap = label - return img_unswap, img_swap, label, label_swap - - def pil_loader(self,imgpath): - with open(imgpath, 'rb') as f: - with Image.open(f) as img: - return img.convert('RGB') - -def collate_fn1(batch): - imgs = [] - label = [] - label_swap = [] - swap_law = [] - for sample in batch: - imgs.append(sample[0]) - imgs.append(sample[1]) - label.append(sample[2]) - label.append(sample[2]) - label_swap.append(sample[2]) - label_swap.append(sample[3]) - # swap_law.append(sample[4]) - # swap_law.append(sample[5]) - return torch.stack(imgs, 0), label, label_swap # , swap_law - -def collate_fn2(batch): - imgs = [] - label = [] - label_swap = [] - swap_law = [] - for sample in batch: - imgs.append(sample[0]) - label.append(sample[2]) - swap_law.append(sample[4]) - return torch.stack(imgs, 0), label, label_swap, swap_law - - - diff --git a/dataset/dataset_DCL.py b/dataset/dataset_DCL.py deleted file mode 100644 index 2ad27ff..0000000 --- a/dataset/dataset_DCL.py +++ /dev/null @@ -1,95 +0,0 @@ -# coding=utf8 -from __future__ import division -import os -import torch -import torch.utils.data as data -import PIL.Image as Image -from PIL import ImageStat -class dataset(data.Dataset): - def __init__(self, cfg, imgroot, anno_pd, unswap=None, swap=None, totensor=None, train=False): - self.root_path = imgroot - self.paths = anno_pd['ImageName'].tolist() - self.labels = anno_pd['label'].tolist() - self.unswap = unswap - self.swap = swap - self.totensor = totensor - self.cfg = cfg - self.train = train - - def __len__(self): - return len(self.paths) - - def __getitem__(self, item): - img_path = os.path.join(self.root_path, self.paths[item]) - img = self.pil_loader(img_path) - crop_num = [7, 7] - img_unswap = self.unswap(img) - - image_unswap_list = self.crop_image(img_unswap,crop_num) - - img_unswap = self.totensor(img_unswap) - swap_law1 = [(i-24)/49 for i in range(crop_num[0]*crop_num[1])] - - if self.train: - img_swap = self.swap(img) - - image_swap_list = self.crop_image(img_swap,crop_num) - unswap_stats = [sum(ImageStat.Stat(im).mean) for im in image_unswap_list] - swap_stats = [sum(ImageStat.Stat(im).mean) for im in image_swap_list] - swap_law2 = [] - for swap_im in swap_stats: - distance = [abs(swap_im - unswap_im) for unswap_im in unswap_stats] - index = distance.index(min(distance)) - swap_law2.append((index-24)/49) - img_swap = self.totensor(img_swap) - label = self.labels[item]-1 - label_swap = label + self.cfg['numcls'] - else: - img_swap = img_unswap - label = self.labels[item]-1 - label_swap = label - swap_law2 = [(i-24)/49 for i in range(crop_num[0]*crop_num[1])] - return img_unswap, img_swap, label, label_swap, swap_law1, swap_law2 - - def pil_loader(self,imgpath): - with open(imgpath, 'rb') as f: - with Image.open(f) as img: - return img.convert('RGB') - - def crop_image(self, image, cropnum): - width, high = image.size - crop_x = [int((width / cropnum[0]) * i) for i in range(cropnum[0] + 1)] - crop_y = [int((high / cropnum[1]) * i) for i in range(cropnum[1] + 1)] - im_list = [] - for j in range(len(crop_y) - 1): - for i in range(len(crop_x) - 1): - im_list.append(image.crop((crop_x[i], crop_y[j], min(crop_x[i + 1], width), min(crop_y[j + 1], high)))) - return im_list - - -def collate_fn1(batch): - imgs = [] - label = [] - label_swap = [] - swap_law = [] - for sample in batch: - imgs.append(sample[0]) - imgs.append(sample[1]) - label.append(sample[2]) - label.append(sample[2]) - label_swap.append(sample[2]) - label_swap.append(sample[3]) - swap_law.append(sample[4]) - swap_law.append(sample[5]) - return torch.stack(imgs, 0), label, label_swap, swap_law - -def collate_fn2(batch): - imgs = [] - label = [] - label_swap = [] - swap_law = [] - for sample in batch: - imgs.append(sample[0]) - label.append(sample[2]) - swap_law.append(sample[4]) - return torch.stack(imgs, 0), label, label_swap, swap_law diff --git a/datasets/CUB_pre.py b/datasets/CUB_pre.py deleted file mode 100644 index 4693067..0000000 --- a/datasets/CUB_pre.py +++ /dev/null @@ -1,66 +0,0 @@ -import shutil -import os - -train_test_set_file = open('./CUB_200_2011/train_test_split.txt') -train_list = [] -test_list = [] -for line in train_test_set_file: - tmp = line.strip().split() - if tmp[1] == '1': - train_list.append(tmp[0]) - else: - test_list.append(tmp[0]) -# print(len(train_list)) -# print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^') -# print(len(test_list)) -train_test_set_file.close() - -images_file = open('./CUB_200_2011/images.txt') -images_dict = {} -for line in images_file: - tmp = line.strip().split() - images_dict[tmp[0]] = tmp[1] -# print(images_dict) -images_file.close() - -# prepare for train subset -for image_id in train_list: - read_path = './CUB_200_2011/images/' - train_write_path = './CUB_200_2011/all/' - read_path = read_path + images_dict[image_id] - train_write_path = train_write_path + os.path.split(images_dict[image_id])[1] - # print(train_write_path) - shutil.copyfile(read_path, train_write_path) - -# prepare for test subset -for image_id in test_list: - read_path = './CUB_200_2011/images/' - test_write_path = './CUB_200_2011/all/' - read_path = read_path + images_dict[image_id] - test_write_path = test_write_path + os.path.split(images_dict[image_id])[1] - # print(train_write_path) - shutil.copyfile(read_path, test_write_path) - -class_file = open('./CUB_200_2011/image_class_labels.txt') -class_dict = {} -for line in class_file: - tmp = line.strip().split() - class_dict[tmp[0]] = tmp[1] -class_file.close() - -# create train.txt -train_file = open('./CUB_200_2011/train.txt', 'a') -for image_id in train_list: - train_file.write(os.path.split(images_dict[image_id])[1]) - train_file.write(' ') - train_file.write(class_dict[image_id]) - train_file.write('\n') -train_file.close() - -test_file = open('./CUB_200_2011/test.txt', 'a') -for image_id in test_list: - test_file.write(os.path.split(images_dict[image_id])[1]) - test_file.write(' ') - test_file.write(class_dict[image_id]) - test_file.write('\n') -test_file.close() diff --git a/models/Asoftmax_linear.py b/models/Asoftmax_linear.py new file mode 100644 index 0000000..734c9e4 --- /dev/null +++ b/models/Asoftmax_linear.py @@ -0,0 +1,59 @@ +import torch +import torch.nn as nn +from torch.autograd import Variable +import torch.nn.functional as F +from torch.nn import Parameter +import math + +def myphi(x,m): + x = x * m + return 1-x**2/math.factorial(2)+x**4/math.factorial(4)-x**6/math.factorial(6) + \ + x**8/math.factorial(8) - x**9/math.factorial(9) + +class AngleLinear(nn.Module): + def __init__(self, in_features, out_features, m = 4, phiflag=True): + super(AngleLinear, self).__init__() + self.in_features = in_features + self.out_features = out_features + self.weight = Parameter(torch.Tensor(in_features,out_features)) + self.weight.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5) + self.phiflag = phiflag + self.m = m + self.mlambda = [ + lambda x: x**0, + lambda x: x**1, + lambda x: 2*x**2-1, + lambda x: 4*x**3-3*x, + lambda x: 8*x**4-8*x**2+1, + lambda x: 16*x**5-20*x**3+5*x + ] + + def forward(self, input): + x = input # size=(B,F) F is feature len + w = self.weight # size=(F,Classnum) F=in_features Classnum=out_features + + ww = w.renorm(2,1,1e-5).mul(1e5) + xlen = x.pow(2).sum(1).pow(0.5) # size=B + wlen = ww.pow(2).sum(0).pow(0.5) # size=Classnum + + cos_theta = x.mm(ww) # size=(B,Classnum) + cos_theta = cos_theta / xlen.view(-1,1) / wlen.view(1,-1) + cos_theta = cos_theta.clamp(-1,1) + + if self.phiflag: + cos_m_theta = self.mlambda[self.m](cos_theta) + theta = Variable(cos_theta.data.acos()) + k = (self.m*theta/3.14159265).floor() + n_one = k*0.0 - 1 + phi_theta = (n_one**k) * cos_m_theta - 2*k + else: + theta = cos_theta.acos() + phi_theta = myphi(theta,self.m) + phi_theta = phi_theta.clamp(-1*self.m,1) + + cos_theta = cos_theta * xlen.view(-1,1) + phi_theta = phi_theta * xlen.view(-1,1) + output = (cos_theta,phi_theta) + return output # size=(B,Classnum,2) + + diff --git a/models/LoadModel.py b/models/LoadModel.py new file mode 100644 index 0000000..884a09c --- /dev/null +++ b/models/LoadModel.py @@ -0,0 +1,80 @@ +import numpy as np +from torch import nn +import torch +from torchvision import models, transforms, datasets +import torch.nn.functional as F +import pretrainedmodels + +from config import pretrained_model + +import pdb + +class MainModel(nn.Module): + def __init__(self, config): + super(MainModel, self).__init__() + self.use_dcl = config.use_dcl + self.num_classes = config.numcls + self.backbone_arch = config.backbone + self.use_Asoftmax = config.use_Asoftmax + print(self.backbone_arch) + + if self.backbone_arch in dir(models): + self.model = getattr(models, self.backbone_arch)() + if self.backbone_arch in pretrained_model: + self.model.load_state_dict(torch.load(pretrained_model[self.backbone_arch])) + else: + if self.backbone_arch in pretrained_model: + self.model = pretrainedmodels.__dict__[self.backbone_arch](num_classes=1000, pretrained=None) + else: + self.model = pretrainedmodels.__dict__[self.backbone_arch](num_classes=1000) + + if self.backbone_arch == 'resnet50' or self.backbone_arch == 'se_resnet50': + self.model = nn.Sequential(*list(self.model.children())[:-2]) + if self.backbone_arch == 'senet154': + self.model = nn.Sequential(*list(self.model.children())[:-3]) + if self.backbone_arch == 'se_resnext101_32x4d': + self.model = nn.Sequential(*list(self.model.children())[:-2]) + if self.backbone_arch == 'se_resnet101': + self.model = nn.Sequential(*list(self.model.children())[:-2]) + self.avgpool = nn.AdaptiveAvgPool2d(output_size=1) + self.classifier = nn.Linear(2048, self.num_classes, bias=False) + + if self.use_dcl: + if config.cls_2: + self.classifier_swap = nn.Linear(2048, 2, bias=False) + if config.cls_2xmul: + self.classifier_swap = nn.Linear(2048, 2*self.num_classes, bias=False) + self.Convmask = nn.Conv2d(2048, 1, 1, stride=1, padding=0, bias=True) + self.avgpool2 = nn.AvgPool2d(2, stride=2) + + if self.use_Asoftmax: + self.Aclassifier = AngleLinear(2048, self.num_classes, bias=False) + + def forward(self, x, last_cont=None): + x = self.model(x) + if self.use_dcl: + mask = self.Convmask(x) + mask = self.avgpool2(mask) + mask = torch.tanh(mask) + mask = mask.view(mask.size(0), -1) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + out = [] + out.append(self.classifier(x)) + + if self.use_dcl: + out.append(self.classifier_swap(x)) + out.append(mask) + + if self.use_Asoftmax: + if last_cont is None: + x_size = x.size(0) + out.append(self.Aclassifier(x[0:x_size:2])) + else: + last_x = self.model(last_cont) + last_x = self.avgpool(last_x) + last_x = last_x.view(last_x.size(0), -1) + out.append(self.Aclassifier(last_x)) + + return out diff --git a/models/__pycache__/LoadModel.cpython-36.pyc b/models/__pycache__/LoadModel.cpython-36.pyc new file mode 100644 index 0000000..a3b0993 Binary files /dev/null and b/models/__pycache__/LoadModel.cpython-36.pyc differ diff --git a/models/__pycache__/focal_loss.cpython-36.pyc b/models/__pycache__/focal_loss.cpython-36.pyc new file mode 100644 index 0000000..6e6ac18 Binary files /dev/null and b/models/__pycache__/focal_loss.cpython-36.pyc differ diff --git a/models/focal_loss.py b/models/focal_loss.py new file mode 100644 index 0000000..fd56be3 --- /dev/null +++ b/models/focal_loss.py @@ -0,0 +1,48 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +class FocalLoss(nn.Module): #1d and 2d + + def __init__(self, gamma=2, size_average=True): + super(FocalLoss, self).__init__() + self.gamma = gamma + self.size_average = size_average + + + def forward(self, logit, target, class_weight=None, type='softmax'): + target = target.view(-1, 1).long() + if type=='sigmoid': + if class_weight is None: + class_weight = [1]*2 #[0.5, 0.5] + + prob = torch.sigmoid(logit) + prob = prob.view(-1, 1) + prob = torch.cat((1-prob, prob), 1) + select = torch.FloatTensor(len(prob), 2).zero_().cuda() + select.scatter_(1, target, 1.) + + elif type=='softmax': + B,C = logit.size() + if class_weight is None: + class_weight =[1]*C #[1/C]*C + + #logit = logit.permute(0, 2, 3, 1).contiguous().view(-1, C) + prob = F.softmax(logit,1) + select = torch.FloatTensor(len(prob), C).zero_().cuda() + select.scatter_(1, target, 1.) + + class_weight = torch.FloatTensor(class_weight).cuda().view(-1,1) + class_weight = torch.gather(class_weight, 0, target) + + prob = (prob*select).sum(1).view(-1,1) + prob = torch.clamp(prob,1e-8,1-1e-8) + batch_loss = - class_weight *(torch.pow((1-prob), self.gamma))*prob.log() + + if self.size_average: + loss = batch_loss.mean() + else: + loss = batch_loss + + return loss + diff --git a/resnet_swap_2loss_add.py b/resnet_swap_2loss_add.py deleted file mode 100644 index 64074c3..0000000 --- a/resnet_swap_2loss_add.py +++ /dev/null @@ -1,42 +0,0 @@ -from torch import nn -import torch -from torchvision import models, transforms, datasets -import torch.nn.functional as F - - -class resnet_swap_2loss_add(nn.Module): - def __init__(self, num_classes): - super(resnet_swap_2loss_add,self).__init__() - resnet50 = models.resnet50(pretrained=True) - self.stage1_img = nn.Sequential(*list(resnet50.children())[:5]) - self.stage2_img = nn.Sequential(*list(resnet50.children())[5:6]) - self.stage3_img = nn.Sequential(*list(resnet50.children())[6:7]) - self.stage4_img = nn.Sequential(*list(resnet50.children())[7]) - - self.avgpool = nn.AdaptiveAvgPool2d(output_size=1) - self.classifier = nn.Linear(2048, num_classes) - self.classifier_swap = nn.Linear(2048, 2*num_classes) - # self.classifier_swap = nn.Linear(2048, 2) - self.Convmask = nn.Conv2d(2048, 1, 1, stride=1, padding=0, bias=False) - self.avgpool2 = nn.AvgPool2d(2,stride=2) - - def forward(self, x): - x2 = self.stage1_img(x) - x3 = self.stage2_img(x2) - x4 = self.stage3_img(x3) - x5 = self.stage4_img(x4) - - x = x5 - mask = self.Convmask(x) - mask = self.avgpool2(mask) - mask = F.tanh(mask) - mask = mask.view(mask.size(0),-1) - - x = self.avgpool(x) - x = x.view(x.size(0), -1) - out = [] - out.append(self.classifier(x)) - out.append(self.classifier_swap(x)) - out.append(mask) - - return out diff --git a/test.py b/test.py new file mode 100644 index 0000000..a19b01a --- /dev/null +++ b/test.py @@ -0,0 +1,152 @@ +#coding=utf-8 +import os +import json +import csv +import argparse +import pandas as pd +import numpy as np +from math import ceil +from tqdm import tqdm +import pickle +import shutil + +import torch +import torch.nn as nn +from torch.autograd import Variable +from torch.nn import CrossEntropyLoss +from torchvision import datasets, models +import torch.backends.cudnn as cudnn +import torch.nn.functional as F + +from transforms import transforms +from models.LoadModel import MainModel +from utils.dataset_DCL import collate_fn4train, collate_fn4test, collate_fn4val, dataset +from config import LoadConfig, load_data_transformers +from utils.test_tool import set_text, save_multi_img, cls_base_acc + +import pdb + +os.environ['CUDA_DEVICE_ORDRE'] = 'PCI_BUS_ID' +os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' + +def parse_args(): + parser = argparse.ArgumentParser(description='dcl parameters') + parser.add_argument('--data', dest='dataset', + default='CUB', type=str) + parser.add_argument('--backbone', dest='backbone', + default='resnet50', type=str) + parser.add_argument('--b', dest='batch_size', + default=16, type=int) + parser.add_argument('--nw', dest='num_workers', + default=16, type=int) + parser.add_argument('--ver', dest='version', + default='val', type=str) + parser.add_argument('--save', dest='resume', + default=None, type=str) + parser.add_argument('--size', dest='resize_resolution', + default=512, type=int) + parser.add_argument('--crop', dest='crop_resolution', + default=448, type=int) + parser.add_argument('--ss', dest='save_suffix', + default=None, type=str) + parser.add_argument('--acc_report', dest='acc_report', + action='store_true') + parser.add_argument('--swap_num', default=[7, 7], + nargs=2, metavar=('swap1', 'swap2'), + type=int, help='specify a range') + args = parser.parse_args() + return args + +if __name__ == '__main__': + args = parse_args() + print(args) + if args.submit: + args.version = 'test' + if args.save_suffix == '': + raise Exception('**** miss --ss save suffix is needed. ') + + Config = LoadConfig(args, args.version) + transformers = load_data_transformers(args.resize_resolution, args.crop_resolution, args.swap_num) + data_set = dataset(Config,\ + anno=Config.val_anno if args.version == 'val' else Config.test_anno ,\ + unswap=transformers["None"],\ + swap=transformers["None"],\ + totensor=transformers['test_totensor'],\ + test=True) + + dataloader = torch.utils.data.DataLoader(data_set,\ + batch_size=args.batch_size,\ + shuffle=False,\ + num_workers=args.num_workers,\ + collate_fn=collate_fn4test) + + setattr(dataloader, 'total_item_len', len(data_set)) + + save_result = Submit_result(args.dataset) + cudnn.benchmark = True + + model = MainModel(Config) + model_dict=model.state_dict() + pretrained_dict=torch.load(resume) + pretrained_dict = {k[7:]: v for k, v in pretrained_dict.items() if k[7:] in model_dict} + model_dict.update(pretrained_dict) + model.load_state_dict(model_dict) + model.cuda() + model = nn.DataParallel(model) + + model.train(False) + with torch.no_grad(): + val_corrects1 = 0 + val_corrects2 = 0 + val_corrects3 = 0 + val_size = ceil(len(data_set) / dataloader.batch_size) + result_gather = {} + count_bar = tqdm(total=dataloader.__len__()) + for batch_cnt_val, data_val in enumerate(dataloader): + count_bar.update(1) + inputs, labels, img_name = data_val + inputs = Variable(inputs.cuda()) + labels = Variable(torch.from_numpy(np.array(labels)).long().cuda()) + + outputs = model(inputs) + outputs_pred = outputs[0] + outputs[1][:,0:Config.numcls] + outputs[1][:,Config.numcls:2*Config.numcls] + + top3_val, top3_pos = torch.topk(outputs_pred, 3) + + if args.version == 'val': + batch_corrects1 = torch.sum((top3_pos[:, 0] == labels)).data.item() + val_corrects1 += batch_corrects1 + batch_corrects2 = torch.sum((top3_pos[:, 1] == labels)).data.item() + val_corrects2 += (batch_corrects2 + batch_corrects1) + batch_corrects3 = torch.sum((top3_pos[:, 2] == labels)).data.item() + val_corrects3 += (batch_corrects3 + batch_corrects2 + batch_corrects1) + + if args.acc_report: + for sub_name, sub_cat, sub_val, sub_label in zip(img_name, top3_pos.tolist(), top3_val.tolist(), labels.tolist()): + result_gather[sub_name] = {'top1_cat': sub_cat[0], 'top2_cat': sub_cat[1], 'top3_cat': sub_cat[2], + 'top1_val': sub_val[0], 'top2_val': sub_val[1], 'top3_val': sub_val[2], + 'label': sub_label} + if args.acc_report: + torch.save(result_gather, 'result_gather_%s'%resume.split('/')[-1][:-4]+ '.pt') + + count_bar.close() + + if args.acc_report: + + val_acc1 = val_corrects1 / len(data_set) + val_acc2 = val_corrects2 / len(data_set) + val_acc3 = val_corrects3 / len(data_set) + print('%sacc1 %f%s\n%sacc2 %f%s\n%sacc3 %f%s\n'%(8*'-', val_acc1, 8*'-', 8*'-', val_acc2, 8*'-', 8*'-', val_acc3, 8*'-')) + + cls_top1, cls_top3, cls_count = cls_base_acc(result_gather) + + acc_report_io = open('acc_report_%s_%s.json'%(args.save_suffix, resume.split('/')[-1]), 'w') + json.dump({'val_acc1':val_acc1, + 'val_acc2':val_acc2, + 'val_acc3':val_acc3, + 'cls_top1':cls_top1, + 'cls_top3':cls_top3, + 'cls_count':cls_count}, acc_report_io) + acc_report_io.close() + + diff --git a/train.py b/train.py new file mode 100644 index 0000000..f1bcfa8 --- /dev/null +++ b/train.py @@ -0,0 +1,230 @@ +#coding=utf-8 +import os +import datetime +import argparse +import logging +import pandas as pd + +import torch +import torch.nn as nn +from torch.nn import CrossEntropyLoss +import torch.utils.data as torchdata +from torchvision import datasets, models +import torch.optim as optim +from torch.optim import lr_scheduler +import torch.backends.cudnn as cudnn + +from transforms import transforms +from utils.train_model import train +from models.LoadModel import MainModel +from config import LoadConfig, load_data_transformers +from utils.dataset_DCL import collate_fn4train, collate_fn4val, collate_fn4test, collate_fn4backbone, dataset + +import pdb + +os.environ['CUDA_DEVICE_ORDRE'] = 'PCI_BUS_ID' +os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' + +# parameters setting +def parse_args(): + parser = argparse.ArgumentParser(description='dcl parameters') + parser.add_argument('--data', dest='dataset', + default='CUB', type=str) + parser.add_argument('--save', dest='resume', + default=None, + type=str) + parser.add_argument('--backbone', dest='backbone', + default='resnet50', type=str) + parser.add_argument('--auto_resume', dest='auto_resume', + action='store_true') + parser.add_argument('--epoch', dest='epoch', + default=360, type=int) + parser.add_argument('--tb', dest='train_batch', + default=16, type=int) + parser.add_argument('--vb', dest='val_batch', + default=512, type=int) + parser.add_argument('--sp', dest='save_point', + default=5000, type=int) + parser.add_argument('--cp', dest='check_point', + default=5000, type=int) + parser.add_argument('--lr', dest='base_lr', + default=0.0008, type=float) + parser.add_argument('--lr_step', dest='decay_step', + default=60, type=int) + parser.add_argument('--cls_lr_ratio', dest='cls_lr_ratio', + default=10.0, type=float) + parser.add_argument('--start_epoch', dest='start_epoch', + default=0, type=int) + parser.add_argument('--tnw', dest='train_num_workers', + default=16, type=int) + parser.add_argument('--vnw', dest='val_num_workers', + default=32, type=int) + parser.add_argument('--detail', dest='discribe', + default='', type=str) + parser.add_argument('--size', dest='resize_resolution', + default=512, type=int) + parser.add_argument('--crop', dest='crop_resolution', + default=448, type=int) + parser.add_argument('--cls_2', dest='cls_2', + action='store_true') + parser.add_argument('--cls_mul', dest='cls_mul', + action='store_true') + parser.add_argument('--swap_num', default=[7, 7], + nargs=2, metavar=('swap1', 'swap2'), + type=int, help='specify a range') + args = parser.parse_args() + return args + +def auto_load_resume(load_dir): + folders = os.listdir(load_dir) + date_list = [int(x.split('_')[1].replace(' ',0)) for x in folders] + choosed = folders[date_list.index(max(date_list))] + weight_list = os.listdir(os.path.join(load_dir, choosed)) + acc_list = [x[:-4].split('_')[-1] if x[:7]=='weights' else 0 for x in weight_list] + acc_list = [float(x) for x in acc_list] + choosed_w = weight_list[acc_list.index(max(acc_list))] + return os.path.join(load_dir, choosed, choosed_w) + + +if __name__ == '__main__': + args = parse_args() + print(args, flush=True) + Config = LoadConfig(args, 'train') + Config.cls_2 = args.cls_2 + Config.cls_2xmul = args.cls_mul + assert Config.cls_2 ^ Config.cls_2xmul + + transformers = load_data_transformers(args.resize_resolution, args.crop_resolution, args.swap_num) + + # inital dataloader + train_set = dataset(Config = Config,\ + anno = Config.train_anno,\ + common_aug = transformers["common_aug"],\ + swap = transformers["swap"],\ + totensor = transformers["train_totensor"],\ + train = True) + + trainval_set = dataset(Config = Config,\ + anno = Config.train_anno,\ + common_aug = transformers["None"],\ + swap = transformers["None"],\ + totensor = transformers["val_totensor"],\ + train = False, + train_val = True) + + val_set = dataset(Config = Config,\ + anno = Config.val_anno,\ + common_aug = transformers["None"],\ + swap = transformers["None"],\ + totensor = transformers["test_totensor"],\ + test=True) + + dataloader = {} + dataloader['train'] = torch.utils.data.DataLoader(train_set,\ + batch_size=args.train_batch,\ + shuffle=True,\ + num_workers=args.train_num_workers,\ + collate_fn=collate_fn4train if not Config.use_backbone else collate_fn4backbone, + drop_last=False, + pin_memory=True) + + setattr(dataloader['train'], 'total_item_len', len(train_set)) + + dataloader['trainval'] = torch.utils.data.DataLoader(trainval_set,\ + batch_size=args.val_batch,\ + shuffle=False,\ + num_workers=args.val_num_workers,\ + collate_fn=collate_fn4val if not Config.use_backbone else collate_fn4backbone, + drop_last=False, + pin_memory=True) + + setattr(dataloader['trainval'], 'total_item_len', len(trainval_set)) + setattr(dataloader['trainval'], 'num_cls', Config.numcls) + + dataloader['val'] = torch.utils.data.DataLoader(val_set,\ + batch_size=args.val_batch,\ + shuffle=False,\ + num_workers=args.val_num_workers,\ + collate_fn=collate_fn4test if not Config.use_backbone else collate_fn4backbone, + drop_last=False, + pin_memory=True) + + setattr(dataloader['val'], 'total_item_len', len(val_set)) + setattr(dataloader['val'], 'num_cls', Config.numcls) + + + cudnn.benchmark = True + + print('Choose model and train set', flush=True) + model = MainModel(Config) + + # load model + if (args.resume is None) and (not args.auto_resume): + print('train from imagenet pretrained models ...', flush=True) + else: + if not args.resume is None: + resume = args.resume + print('load from pretrained checkpoint %s ...'% resume, flush=True) + elif args.auto_resume: + resume = auto_load_resume(Config.save_dir) + print('load from %s ...'%resume, flush=True) + else: + raise Exception("no checkpoints to load") + + model_dict = model.state_dict() + pretrained_dict = torch.load(resume) + pretrained_dict = {k[7:]: v for k, v in pretrained_dict.items() if k[7:] in model_dict} + model_dict.update(pretrained_dict) + model.load_state_dict(model_dict) + + print('Set cache dir', flush=True) + time = datetime.datetime.now() + filename = '%s_%d%d%d_%s'%(args.discribe, time.month, time.day, time.hour, Config.dataset) + save_dir = os.path.join(Config.save_dir, filename) + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + model.cuda() + model = nn.DataParallel(model) + + # optimizer prepare + if Config.use_backbone: + ignored_params = list(map(id, model.module.classifier.parameters())) + else: + ignored_params1 = list(map(id, model.module.classifier.parameters())) + ignored_params2 = list(map(id, model.module.classifier_swap.parameters())) + ignored_params3 = list(map(id, model.module.Convmask.parameters())) + + ignored_params = ignored_params1 + ignored_params2 + ignored_params3 + print('the num of new layers:', len(ignored_params), flush=True) + base_params = filter(lambda p: id(p) not in ignored_params, model.module.parameters()) + + lr_ratio = args.cls_lr_ratio + base_lr = args.base_lr + if Config.use_backbone: + optimizer = optim.SGD([{'params': base_params}, + {'params': model.module.classifier.parameters(), 'lr': base_lr}], lr = base_lr, momentum=0.9) + else: + optimizer = optim.SGD([{'params': base_params}, + {'params': model.module.classifier.parameters(), 'lr': lr_ratio*base_lr}, + {'params': model.module.classifier_swap.parameters(), 'lr': lr_ratio*base_lr}, + {'params': model.module.Convmask.parameters(), 'lr': lr_ratio*base_lr}, + ], lr = base_lr, momentum=0.9) + + + exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=args.decay_step, gamma=0.1) + + # train entry + train(Config, + model, + epoch_num=args.epoch, + start_epoch=args.start_epoch, + optimizer=optimizer, + exp_lr_scheduler=exp_lr_scheduler, + data_loader=dataloader, + save_dir=save_dir, + data_size=args.crop_resolution, + savepoint=args.save_point, + checkpoint=args.check_point) + + diff --git a/train_rel.py b/train_rel.py deleted file mode 100644 index 66c6830..0000000 --- a/train_rel.py +++ /dev/null @@ -1,136 +0,0 @@ -#oding=utf-8 -import os -import datetime -import pandas as pd -from dataset.dataset_DCL import collate_fn1, collate_fn2, dataset -import torch -import torch.nn as nn -import torch.utils.data as torchdata -from torchvision import datasets, models -from transforms import transforms -import torch.optim as optim -from torch.optim import lr_scheduler -from utils.train_util_DCL import train, trainlog -from torch.nn import CrossEntropyLoss -import logging -from models.resnet_swap_2loss_add import resnet_swap_2loss_add - -cfg = {} -time = datetime.datetime.now() -# set dataset, include{CUB, STCAR, AIR} -cfg['dataset'] = 'CUB' -# prepare dataset -if cfg['dataset'] == 'CUB': - rawdata_root = './datasets/CUB_200_2011/all' - train_pd = pd.read_csv("./datasets/CUB_200_2011/train.txt",sep=" ",header=None, names=['ImageName', 'label']) - test_pd = pd.read_csv("./datasets/CUB_200_2011/test.txt",sep=" ",header=None, names=['ImageName', 'label']) - cfg['numcls'] = 200 - numimage = 6033 -if cfg['dataset'] == 'STCAR': - rawdata_root = './datasets/st_car/all' - train_pd = pd.read_csv("./datasets/st_car/train.txt",sep=" ",header=None, names=['ImageName', 'label']) - test_pd = pd.read_csv("./datasets/st_car/test.txt",sep=" ",header=None, names=['ImageName', 'label']) - cfg['numcls'] = 196 - numimage = 8144 -if cfg['dataset'] == 'AIR': - rawdata_root = './datasets/aircraft/all' - train_pd = pd.read_csv("./datasets/aircraft/train.txt",sep=" ",header=None, names=['ImageName', 'label']) - test_pd = pd.read_csv("./datasets/aircraft/test.txt",sep=" ",header=None, names=['ImageName', 'label']) - cfg['numcls'] = 100 - numimage = 6667 - -print('Dataset:',cfg['dataset']) -print('train images:', train_pd.shape) -print('test images:', test_pd.shape) -print('num classes:', cfg['numcls']) - -print('Set transform') - -cfg['swap_num'] = 7 - -data_transforms = { - 'swap': transforms.Compose([ - transforms.Resize((512,512)), - transforms.RandomRotation(degrees=15), - transforms.RandomCrop((448,448)), - transforms.RandomHorizontalFlip(), - transforms.Randomswap((cfg['swap_num'],cfg['swap_num'])), - ]), - 'unswap': transforms.Compose([ - transforms.Resize((512,512)), - transforms.RandomRotation(degrees=15), - transforms.RandomCrop((448,448)), - transforms.RandomHorizontalFlip(), - ]), - 'totensor': transforms.Compose([ - transforms.Resize((448,448)), - transforms.ToTensor(), - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), - ]), - 'None': transforms.Compose([ - transforms.Resize((512,512)), - transforms.CenterCrop((448,448)), - ]), - - } -data_set = {} -data_set['train'] = dataset(cfg,imgroot=rawdata_root,anno_pd=train_pd, - unswap=data_transforms["unswap"],swap=data_transforms["swap"],totensor=data_transforms["totensor"],train=True - ) -data_set['val'] = dataset(cfg,imgroot=rawdata_root,anno_pd=test_pd, - unswap=data_transforms["None"],swap=data_transforms["None"],totensor=data_transforms["totensor"],train=False - ) -dataloader = {} -dataloader['train']=torch.utils.data.DataLoader(data_set['train'], batch_size=16, - shuffle=True, num_workers=16,collate_fn=collate_fn1) -dataloader['val']=torch.utils.data.DataLoader(data_set['val'], batch_size=16, - shuffle=True, num_workers=16,collate_fn=collate_fn1) - -print('Set cache dir') -filename = str(time.month) + str(time.day) + str(time.hour) + '_' + cfg['dataset'] -save_dir = './net_model/' + filename -if not os.path.exists(save_dir): - os.makedirs(save_dir) -logfile = save_dir + '/' + filename +'.log' -trainlog(logfile) - -print('Choose model and train set') -device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -model = resnet_swap_2loss_add(num_classes=cfg['numcls']) -base_lr = 0.0008 -resume = None -if resume: - logging.info('resuming finetune from %s'%resume) - model.load_state_dict(torch.load(resume)) -model.cuda() -model = nn.DataParallel(model) -model.to(device) - -# set new layer's lr -ignored_params1 = list(map(id, model.module.classifier.parameters())) -ignored_params2 = list(map(id, model.module.classifier_swap.parameters())) -ignored_params3 = list(map(id, model.module.Convmask.parameters())) - -ignored_params = ignored_params1 + ignored_params2 + ignored_params3 -print('the num of new layers:', len(ignored_params)) -base_params = filter(lambda p: id(p) not in ignored_params, model.module.parameters()) -optimizer = optim.SGD([{'params': base_params}, - {'params': model.module.classifier.parameters(), 'lr': base_lr*10}, - {'params': model.module.classifier_swap.parameters(), 'lr': base_lr*10}, - {'params': model.module.Convmask.parameters(), 'lr': base_lr*10}, - ], lr = base_lr, momentum=0.9) - -criterion = CrossEntropyLoss() -exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=60, gamma=0.1) -train(cfg, - model, - epoch_num=360, - start_epoch=0, - optimizer=optimizer, - criterion=criterion, - exp_lr_scheduler=exp_lr_scheduler, - data_set=data_set, - data_loader=dataloader, - save_dir=save_dir, - print_inter=int(numimage/(4*16)), - val_inter=int(numimage/(16)),) diff --git a/transforms/__pycache__/__init__.cpython-36.pyc b/transforms/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..0577480 Binary files /dev/null and b/transforms/__pycache__/__init__.cpython-36.pyc differ diff --git a/transforms/__pycache__/functional.cpython-36.pyc b/transforms/__pycache__/functional.cpython-36.pyc new file mode 100644 index 0000000..440c440 Binary files /dev/null and b/transforms/__pycache__/functional.cpython-36.pyc differ diff --git a/transforms/__pycache__/transforms.cpython-36.pyc b/transforms/__pycache__/transforms.cpython-36.pyc new file mode 100644 index 0000000..613f791 Binary files /dev/null and b/transforms/__pycache__/transforms.cpython-36.pyc differ diff --git a/utils/Asoftmax_loss.py b/utils/Asoftmax_loss.py new file mode 100644 index 0000000..44327eb --- /dev/null +++ b/utils/Asoftmax_loss.py @@ -0,0 +1,47 @@ +import torch +import torch.nn as nn +from torch.autograd import Variable +import torch.nn.functional as F +from torch.nn import Parameter +import math + +import pdb + +class AngleLoss(nn.Module): + def __init__(self, gamma=0): + super(AngleLoss, self).__init__() + self.gamma = gamma + self.it = 0 + self.LambdaMin = 50.0 + self.LambdaMax = 1500.0 + self.lamb = 1500.0 + + def forward(self, input, target, decay=None): + self.it += 1 + cos_theta,phi_theta = input + target = target.view(-1,1) #size=(B,1) + + index = cos_theta.data * 0.0 #size=(B,Classnum) + index.scatter_(1,target.data.view(-1,1),1) + index = index.byte() + index = Variable(index) + + if decay is None: + self.lamb = max(self.LambdaMin,self.LambdaMax/(1+0.1*self.it )) + else: + self.LambdaMax *= decay + self.lamb = max(self.LambdaMin, self.LambdaMax) + output = cos_theta * 1.0 #size=(B,Classnum) + output[index] -= cos_theta[index]*(1.0+0)/(1+self.lamb) + output[index] += phi_theta[index]*(1.0+0)/(1+self.lamb) + + logpt = F.log_softmax(output, 1) + logpt = logpt.gather(1,target) + logpt = logpt.view(-1) + pt = Variable(logpt.data.exp()) + + loss = -1 * (1-pt)**self.gamma * logpt + loss = loss.mean() + + return loss + diff --git a/utils/__pycache__/Asoftmax_loss.cpython-36.pyc b/utils/__pycache__/Asoftmax_loss.cpython-36.pyc new file mode 100644 index 0000000..5eb9f2d Binary files /dev/null and b/utils/__pycache__/Asoftmax_loss.cpython-36.pyc differ diff --git a/utils/__pycache__/autoaugment.cpython-36.pyc b/utils/__pycache__/autoaugment.cpython-36.pyc new file mode 100644 index 0000000..2d3a569 Binary files /dev/null and b/utils/__pycache__/autoaugment.cpython-36.pyc differ diff --git a/utils/__pycache__/dataset_DCL.cpython-36.pyc b/utils/__pycache__/dataset_DCL.cpython-36.pyc new file mode 100644 index 0000000..e7167fd Binary files /dev/null and b/utils/__pycache__/dataset_DCL.cpython-36.pyc differ diff --git a/utils/__pycache__/eval_model.cpython-36.pyc b/utils/__pycache__/eval_model.cpython-36.pyc new file mode 100644 index 0000000..1840e92 Binary files /dev/null and b/utils/__pycache__/eval_model.cpython-36.pyc differ diff --git a/utils/__pycache__/train_model.cpython-36.pyc b/utils/__pycache__/train_model.cpython-36.pyc new file mode 100644 index 0000000..038caf5 Binary files /dev/null and b/utils/__pycache__/train_model.cpython-36.pyc differ diff --git a/utils/__pycache__/utils.cpython-36.pyc b/utils/__pycache__/utils.cpython-36.pyc new file mode 100644 index 0000000..dbc4098 Binary files /dev/null and b/utils/__pycache__/utils.cpython-36.pyc differ diff --git a/utils/autoaugment.py b/utils/autoaugment.py new file mode 100644 index 0000000..9f39b51 --- /dev/null +++ b/utils/autoaugment.py @@ -0,0 +1,232 @@ +from PIL import Image, ImageEnhance, ImageOps +import numpy as np +import random + + +class ImageNetPolicy(object): + """ Randomly choose one of the best 24 Sub-policies on ImageNet. + Example: + >>> policy = ImageNetPolicy() + >>> transformed = policy(image) + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> ImageNetPolicy(), + >>> transforms.ToTensor()]) + """ + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.4, "posterize", 8, 0.6, "rotate", 9, fillcolor), + SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.6, "posterize", 7, 0.6, "posterize", 6, fillcolor), + SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), + + SubPolicy(0.4, "equalize", 4, 0.8, "rotate", 8, fillcolor), + SubPolicy(0.6, "solarize", 3, 0.6, "equalize", 7, fillcolor), + SubPolicy(0.8, "posterize", 5, 1.0, "equalize", 2, fillcolor), + SubPolicy(0.2, "rotate", 3, 0.6, "solarize", 8, fillcolor), + SubPolicy(0.6, "equalize", 8, 0.4, "posterize", 6, fillcolor), + + SubPolicy(0.8, "rotate", 8, 0.4, "color", 0, fillcolor), + SubPolicy(0.4, "rotate", 9, 0.6, "equalize", 2, fillcolor), + SubPolicy(0.0, "equalize", 7, 0.8, "equalize", 8, fillcolor), + SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), + SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor), + + SubPolicy(0.8, "rotate", 8, 1.0, "color", 2, fillcolor), + SubPolicy(0.8, "color", 8, 0.8, "solarize", 7, fillcolor), + SubPolicy(0.4, "sharpness", 7, 0.6, "invert", 8, fillcolor), + SubPolicy(0.6, "shearX", 5, 1.0, "equalize", 9, fillcolor), + SubPolicy(0.4, "color", 0, 0.6, "equalize", 3, fillcolor), + + SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), + SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor), + SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), + SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor) + ] + + + def __call__(self, img): + policy_idx = random.randint(0, len(self.policies) - 1) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment ImageNet Policy" + + +class CIFAR10Policy(object): + """ Randomly choose one of the best 25 Sub-policies on CIFAR10. + Example: + >>> policy = CIFAR10Policy() + >>> transformed = policy(image) + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> CIFAR10Policy(), + >>> transforms.ToTensor()]) + """ + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.1, "invert", 7, 0.2, "contrast", 6, fillcolor), + SubPolicy(0.7, "rotate", 2, 0.3, "translateX", 9, fillcolor), + SubPolicy(0.8, "sharpness", 1, 0.9, "sharpness", 3, fillcolor), + SubPolicy(0.5, "shearY", 8, 0.7, "translateY", 9, fillcolor), + SubPolicy(0.5, "autocontrast", 8, 0.9, "equalize", 2, fillcolor), + + SubPolicy(0.2, "shearY", 7, 0.3, "posterize", 7, fillcolor), + SubPolicy(0.4, "color", 3, 0.6, "brightness", 7, fillcolor), + SubPolicy(0.3, "sharpness", 9, 0.7, "brightness", 9, fillcolor), + SubPolicy(0.6, "equalize", 5, 0.5, "equalize", 1, fillcolor), + SubPolicy(0.6, "contrast", 7, 0.6, "sharpness", 5, fillcolor), + + SubPolicy(0.7, "color", 7, 0.5, "translateX", 8, fillcolor), + SubPolicy(0.3, "equalize", 7, 0.4, "autocontrast", 8, fillcolor), + SubPolicy(0.4, "translateY", 3, 0.2, "sharpness", 6, fillcolor), + SubPolicy(0.9, "brightness", 6, 0.2, "color", 8, fillcolor), + SubPolicy(0.5, "solarize", 2, 0.0, "invert", 3, fillcolor), + + SubPolicy(0.2, "equalize", 0, 0.6, "autocontrast", 0, fillcolor), + SubPolicy(0.2, "equalize", 8, 0.8, "equalize", 4, fillcolor), + SubPolicy(0.9, "color", 9, 0.6, "equalize", 6, fillcolor), + SubPolicy(0.8, "autocontrast", 4, 0.2, "solarize", 8, fillcolor), + SubPolicy(0.1, "brightness", 3, 0.7, "color", 0, fillcolor), + + SubPolicy(0.4, "solarize", 5, 0.9, "autocontrast", 3, fillcolor), + SubPolicy(0.9, "translateY", 9, 0.7, "translateY", 9, fillcolor), + SubPolicy(0.9, "autocontrast", 2, 0.8, "solarize", 3, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.1, "invert", 3, fillcolor), + SubPolicy(0.7, "translateY", 9, 0.9, "autocontrast", 1, fillcolor) + ] + + + def __call__(self, img): + policy_idx = random.randint(0, len(self.policies) - 1) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment CIFAR10 Policy" + + +class SVHNPolicy(object): + """ Randomly choose one of the best 25 Sub-policies on SVHN. + Example: + >>> policy = SVHNPolicy() + >>> transformed = policy(image) + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> SVHNPolicy(), + >>> transforms.ToTensor()]) + """ + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.9, "shearX", 4, 0.2, "invert", 3, fillcolor), + SubPolicy(0.9, "shearY", 8, 0.7, "invert", 5, fillcolor), + SubPolicy(0.6, "equalize", 5, 0.6, "solarize", 6, fillcolor), + SubPolicy(0.9, "invert", 3, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.6, "equalize", 1, 0.9, "rotate", 3, fillcolor), + + SubPolicy(0.9, "shearX", 4, 0.8, "autocontrast", 3, fillcolor), + SubPolicy(0.9, "shearY", 8, 0.4, "invert", 5, fillcolor), + SubPolicy(0.9, "shearY", 5, 0.2, "solarize", 6, fillcolor), + SubPolicy(0.9, "invert", 6, 0.8, "autocontrast", 1, fillcolor), + SubPolicy(0.6, "equalize", 3, 0.9, "rotate", 3, fillcolor), + + SubPolicy(0.9, "shearX", 4, 0.3, "solarize", 3, fillcolor), + SubPolicy(0.8, "shearY", 8, 0.7, "invert", 4, fillcolor), + SubPolicy(0.9, "equalize", 5, 0.6, "translateY", 6, fillcolor), + SubPolicy(0.9, "invert", 4, 0.6, "equalize", 7, fillcolor), + SubPolicy(0.3, "contrast", 3, 0.8, "rotate", 4, fillcolor), + + SubPolicy(0.8, "invert", 5, 0.0, "translateY", 2, fillcolor), + SubPolicy(0.7, "shearY", 6, 0.4, "solarize", 8, fillcolor), + SubPolicy(0.6, "invert", 4, 0.8, "rotate", 4, fillcolor), + SubPolicy(0.3, "shearY", 7, 0.9, "translateX", 3, fillcolor), + SubPolicy(0.1, "shearX", 6, 0.6, "invert", 5, fillcolor), + + SubPolicy(0.7, "solarize", 2, 0.6, "translateY", 7, fillcolor), + SubPolicy(0.8, "shearY", 4, 0.8, "invert", 8, fillcolor), + SubPolicy(0.7, "shearX", 9, 0.8, "translateY", 3, fillcolor), + SubPolicy(0.8, "shearY", 5, 0.7, "autocontrast", 3, fillcolor), + SubPolicy(0.7, "shearX", 2, 0.1, "invert", 5, fillcolor) + ] + + + def __call__(self, img): + policy_idx = random.randint(0, len(self.policies) - 1) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment SVHN Policy" + + +class SubPolicy(object): + def __init__(self, p1, operation1, magnitude_idx1, p2, operation2, magnitude_idx2, fillcolor=(128, 128, 128)): + ranges = { + "shearX": np.linspace(0, 0.3, 10), + "shearY": np.linspace(0, 0.3, 10), + "translateX": np.linspace(0, 150 / 331, 10), + "translateY": np.linspace(0, 150 / 331, 10), + "rotate": np.linspace(0, 30, 10), + "color": np.linspace(0.0, 0.9, 10), + "posterize": np.round(np.linspace(8, 4, 10), 0).astype(np.int), + "solarize": np.linspace(256, 0, 10), + "contrast": np.linspace(0.0, 0.9, 10), + "sharpness": np.linspace(0.0, 0.9, 10), + "brightness": np.linspace(0.0, 0.9, 10), + "autocontrast": [0] * 10, + "equalize": [0] * 10, + "invert": [0] * 10 + } + + # from https://stackoverflow.com/questions/5252170/specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand + def rotate_with_fill(img, magnitude): + rot = img.convert("RGBA").rotate(magnitude) + return Image.composite(rot, Image.new("RGBA", rot.size, (128,) * 4), rot).convert(img.mode) + + func = { + "shearX": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0), + Image.BICUBIC, fillcolor=fillcolor), + "shearY": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0), + Image.BICUBIC, fillcolor=fillcolor), + "translateX": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, 0, magnitude * img.size[0] * random.choice([-1, 1]), 0, 1, 0), + fillcolor=fillcolor), + "translateY": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, 0, 0, 0, 1, magnitude * img.size[1] * random.choice([-1, 1])), + fillcolor=fillcolor), + "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude), + # "rotate": lambda img, magnitude: img.rotate(magnitude * random.choice([-1, 1])), + "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(1 + magnitude * random.choice([-1, 1])), + "posterize": lambda img, magnitude: ImageOps.posterize(img, magnitude), + "solarize": lambda img, magnitude: ImageOps.solarize(img, magnitude), + "contrast": lambda img, magnitude: ImageEnhance.Contrast(img).enhance( + 1 + magnitude * random.choice([-1, 1])), + "sharpness": lambda img, magnitude: ImageEnhance.Sharpness(img).enhance( + 1 + magnitude * random.choice([-1, 1])), + "brightness": lambda img, magnitude: ImageEnhance.Brightness(img).enhance( + 1 + magnitude * random.choice([-1, 1])), + "autocontrast": lambda img, magnitude: ImageOps.autocontrast(img), + "equalize": lambda img, magnitude: ImageOps.equalize(img), + "invert": lambda img, magnitude: ImageOps.invert(img) + } + + # self.name = "{}_{:.2f}_and_{}_{:.2f}".format( + # operation1, ranges[operation1][magnitude_idx1], + # operation2, ranges[operation2][magnitude_idx2]) + self.p1 = p1 + self.operation1 = func[operation1] + self.magnitude1 = ranges[operation1][magnitude_idx1] + self.p2 = p2 + self.operation2 = func[operation2] + self.magnitude2 = ranges[operation2][magnitude_idx2] + + + def __call__(self, img): + if random.random() < self.p1: img = self.operation1(img, self.magnitude1) + if random.random() < self.p2: img = self.operation2(img, self.magnitude2) + return img diff --git a/utils/dataset_DCL.py b/utils/dataset_DCL.py new file mode 100644 index 0000000..54200a6 --- /dev/null +++ b/utils/dataset_DCL.py @@ -0,0 +1,181 @@ +# coding=utf8 +from __future__ import division +import os +import torch +import torch.utils.data as data +import pandas +import random +import PIL.Image as Image +from PIL import ImageStat + +import pdb + +def random_sample(img_names, labels): + anno_dict = {} + img_list = [] + anno_list = [] + for img, anno in zip(img_names, labels): + if not anno in anno_dict: + anno_dict[anno] = [img] + else: + anno_dict[anno].append(img) + + for anno in anno_dict.keys(): + anno_len = len(anno_dict[anno]) + fetch_keys = random.sample(list(range(anno_len)), anno_len//10) + img_list.extend([anno_dict[anno][x] for x in fetch_keys]) + anno_list.extend([anno for x in fetch_keys]) + return img_list, anno_list + + + +class dataset(data.Dataset): + def __init__(self, Config, anno, swap_size=[7,7], common_aug=None, swap=None, totensor=None, train=False, train_val=False, test=False): + self.root_path = Config.rawdata_root + self.numcls = Config.numcls + self.dataset = Config.dataset + self.use_cls_2 = Config.cls_2 + self.use_cls_mul = Config.cls_2xmul + if isinstance(anno, pandas.core.frame.DataFrame): + self.paths = anno['ImageName'].tolist() + self.labels = anno['label'].tolist() + elif isinstance(anno, dict): + self.paths = anno['img_name'] + self.labels = anno['label'] + + if train_val: + self.paths, self.labels = random_sample(self.paths, self.labels) + self.common_aug = common_aug + self.swap = swap + self.totensor = totensor + self.cfg = Config + self.train = train + self.swap_size = swap_size + self.test = test + + def __len__(self): + return len(self.paths) + + def __getitem__(self, item): + img_path = os.path.join(self.root_path, self.paths[item]) + img = self.pil_loader(img_path) + if self.test: + img = self.totensor(img) + label = self.labels[item] + return img, label, self.paths[item] + img_unswap = self.common_aug(img) if not self.common_aug is None else img + + image_unswap_list = self.crop_image(img_unswap, self.swap_size) + + swap_range = self.swap_size[0] * self.swap_size[1] + swap_law1 = [(i-(swap_range//2))/swap_range for i in range(swap_range)] + + if self.train: + img_swap = self.swap(img_unswap) + image_swap_list = self.crop_image(img_swap, self.swap_size) + unswap_stats = [sum(ImageStat.Stat(im).mean) for im in image_unswap_list] + swap_stats = [sum(ImageStat.Stat(im).mean) for im in image_swap_list] + swap_law2 = [] + for swap_im in swap_stats: + distance = [abs(swap_im - unswap_im) for unswap_im in unswap_stats] + index = distance.index(min(distance)) + swap_law2.append((index-(swap_range//2))/swap_range) + img_swap = self.totensor(img_swap) + label = self.labels[item] + if self.use_cls_mul: + label_swap = label + self.numcls + if self.use_cls_2: + label_swap = -1 + img_unswap = self.totensor(img_unswap) + return img_unswap, img_swap, label, label_swap, swap_law1, swap_law2, self.paths[item] + else: + label = self.labels[item] + swap_law2 = [(i-(swap_range//2))/swap_range for i in range(swap_range)] + label_swap = label + img_unswap = self.totensor(img_unswap) + return img_unswap, label, label_swap, swap_law1, swap_law2, self.paths[item] + + def pil_loader(self,imgpath): + with open(imgpath, 'rb') as f: + with Image.open(f) as img: + return img.convert('RGB') + + def crop_image(self, image, cropnum): + width, high = image.size + crop_x = [int((width / cropnum[0]) * i) for i in range(cropnum[0] + 1)] + crop_y = [int((high / cropnum[1]) * i) for i in range(cropnum[1] + 1)] + im_list = [] + for j in range(len(crop_y) - 1): + for i in range(len(crop_x) - 1): + im_list.append(image.crop((crop_x[i], crop_y[j], min(crop_x[i + 1], width), min(crop_y[j + 1], high)))) + return im_list + + + def get_weighted_sampler(self): + img_nums = len(self.labels) + weights = [self.labels.count(x) for x in range(self.numcls)] + return torch.utils.data.sampler.WeightedRandomSampler(weights, num_samples=img_nums) + + +def collate_fn4train(batch): + imgs = [] + label = [] + label_swap = [] + law_swap = [] + img_name = [] + for sample in batch: + imgs.append(sample[0]) + imgs.append(sample[1]) + label.append(sample[2]) + label.append(sample[2]) + if sample[3] == -1: + label_swap.append(1) + label_swap.append(0) + else: + label_swap.append(sample[2]) + label_swap.append(sample[3]) + law_swap.append(sample[4]) + law_swap.append(sample[5]) + img_name.append(sample[-1]) + return torch.stack(imgs, 0), label, label_swap, law_swap, img_name + +def collate_fn4val(batch): + imgs = [] + label = [] + label_swap = [] + law_swap = [] + img_name = [] + for sample in batch: + imgs.append(sample[0]) + label.append(sample[1]) + if sample[3] == -1: + label_swap.append(1) + else: + label_swap.append(sample[2]) + law_swap.append(sample[3]) + img_name.append(sample[-1]) + return torch.stack(imgs, 0), label, label_swap, law_swap, img_name + +def collate_fn4backbone(batch): + imgs = [] + label = [] + img_name = [] + for sample in batch: + imgs.append(sample[0]) + if len(sample) == 5: + label.append(sample[1]) + else: + label.append(sample[2]) + img_name.append(sample[-1]) + return torch.stack(imgs, 0), label, img_name + + +def collate_fn4test(batch): + imgs = [] + label = [] + img_name = [] + for sample in batch: + imgs.append(sample[0]) + label.append(sample[1]) + img_name.append(sample[-1]) + return torch.stack(imgs, 0), label, img_name diff --git a/utils/eval_model.py b/utils/eval_model.py new file mode 100644 index 0000000..811ffe9 --- /dev/null +++ b/utils/eval_model.py @@ -0,0 +1,81 @@ +#coding=utf8 +from __future__ import print_function, division +import os,time,datetime +import numpy as np +import datetime +from math import ceil + +import torch +from torch import nn +from torch.autograd import Variable +import torch.nn.functional as F + +from utils.utils import LossRecord + +import pdb + +def dt(): + return datetime.datetime.now().strftime("%Y-%m-%d-%H_%M_%S") + +def eval_turn(model, data_loader, val_version, epoch_num, log_file): + + model.train(False) + + val_corrects1 = 0 + val_corrects2 = 0 + val_corrects3 = 0 + val_size = data_loader.__len__() + item_count = data_loader.total_item_len + t0 = time.time() + get_l1_loss = nn.L1Loss() + get_ce_loss = nn.CrossEntropyLoss() + + val_batch_size = data_loader.batch_size + val_epoch_step = data_loader.__len__() + num_cls = data_loader.num_cls + + val_loss_recorder = LossRecord(val_batch_size) + val_celoss_recorder = LossRecord(val_batch_size) + print('evaluating %s ...'%val_version, flush=True) + with torch.no_grad(): + for batch_cnt_val, data_val in enumerate(data_loader): + inputs = Variable(data_val[0].cuda()) + labels = Variable(torch.from_numpy(np.array(data_val[1])).long().cuda()) + outputs = model(inputs) + loss = 0 + + ce_loss = get_ce_loss(outputs[0], labels).item() + loss += ce_loss + + val_loss_recorder.update(loss) + val_celoss_recorder.update(ce_loss) + + if outputs[1].size(1) != 2: + outputs_pred = outputs[0] + outputs[1][:,0:num_cls] + outputs[1][:,num_cls:2*num_cls] + else: + outputs_pred = outputs[0] + top3_val, top3_pos = torch.topk(outputs_pred, 3) + + print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format(val_version, batch_cnt_val, val_epoch_step, loss), flush=True) + + batch_corrects1 = torch.sum((top3_pos[:, 0] == labels)).data.item() + val_corrects1 += batch_corrects1 + batch_corrects2 = torch.sum((top3_pos[:, 1] == labels)).data.item() + val_corrects2 += (batch_corrects2 + batch_corrects1) + batch_corrects3 = torch.sum((top3_pos[:, 2] == labels)).data.item() + val_corrects3 += (batch_corrects3 + batch_corrects2 + batch_corrects1) + + val_acc1 = val_corrects1 / item_count + val_acc2 = val_corrects2 / item_count + val_acc3 = val_corrects3 / item_count + + log_file.write(val_version + '\t' +str(val_loss_recorder.get_val())+'\t' + str(val_celoss_recorder.get_val()) + '\t' + str(val_acc1) + '\t' + str(val_acc3) + '\n') + + t1 = time.time() + since = t1-t0 + print('--'*30, flush=True) + print('% 3d %s %s %s-loss: %.4f ||%s-acc@1: %.4f %s-acc@2: %.4f %s-acc@3: %.4f ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, val_acc1,val_version, val_acc2, val_version, val_acc3, since), flush=True) + print('--' * 30, flush=True) + + return val_acc1, val_acc2, val_acc3 + diff --git a/utils/test_tool.py b/utils/test_tool.py new file mode 100644 index 0000000..3c409e0 --- /dev/null +++ b/utils/test_tool.py @@ -0,0 +1,99 @@ +import os +import math +import numpy as np +import cv2 +import datetime + +import torch +from torchvision.utils import save_image, make_grid + +import pdb + +def dt(): + return datetime.datetime.now().strftime("%Y-%m-%d-%H_%M_%S") + +def set_text(text, img): + font = cv2.FONT_HERSHEY_SIMPLEX + if isinstance(text, str): + cont = text + cv2.putText(img, cont, (20, 50), font, 0.5, (0, 0, 255), 1, cv2.LINE_AA) + if isinstance(text, float): + cont = '%.4f'%text + cv2.putText(img, cont, (20, 50), font, 0.5, (0, 0, 255), 1, cv2.LINE_AA) + if isinstance(text, list): + for count in range(len(img)): + cv2.putText(img[count], text[count], (20, 50), font, 0.5, (0, 0, 255), 1, cv2.LINE_AA) + return img + +def save_multi_img(img_list, text_list, grid_size=[5,5], sub_size=200, save_dir='./', save_name=None): + if len(img_list) > grid_size[0]*grid_size[1]: + merge_height = math.ceil(len(img_list) / grid_size[0]) * sub_size + else: + merge_height = grid_size[1]*sub_size + merged_img = np.zeros((merge_height, grid_size[0]*sub_size, 3)) + + if isinstance(img_list[0], str): + img_name_list = img_list + img_list = [] + for img_name in img_name_list: + img_list.append(cv2.imread(img_name)) + + img_counter = 0 + for img, txt in zip(img_list, text_list): + img = cv2.resize(img, (sub_size, sub_size)) + img = set_text(txt, img) + pos = [img_counter // grid_size[1], img_counter % grid_size[1]] + sub_pos = [pos[0]*sub_size, (pos[0]+1)*sub_size, + pos[1]*sub_size, (pos[1]+1)*sub_size] + merged_img[sub_pos[0]:sub_pos[1], sub_pos[2]:sub_pos[3], :] = img + img_counter += 1 + + if save_name is None: + img_save_path = os.path.join(save_dir, dt()+'.png') + else: + img_save_path = os.path.join(save_dir, save_name+'.png') + cv2.imwrite(img_save_path, merged_img) + print('saved img in %s ...'%img_save_path) + + +def cls_base_acc(result_gather): + top1_acc = {} + top3_acc = {} + cls_count = {} + for img_item in result_gather.keys(): + acc_case = result_gather[img_item] + + if acc_case['label'] in cls_count: + cls_count[acc_case['label']] += 1 + if acc_case['top1_cat'] == acc_case['label']: + top1_acc[acc_case['label']] += 1 + if acc_case['label'] in [acc_case['top1_cat'], acc_case['top2_cat'], acc_case['top3_cat']]: + top3_acc[acc_case['label']] += 1 + else: + cls_count[acc_case['label']] = 1 + if acc_case['top1_cat'] == acc_case['label']: + top1_acc[acc_case['label']] = 1 + else: + top1_acc[acc_case['label']] = 0 + + if acc_case['label'] in [acc_case['top1_cat'], acc_case['top2_cat'], acc_case['top3_cat']]: + top3_acc[acc_case['label']] = 1 + else: + top3_acc[acc_case['label']] = 0 + + for label_item in cls_count: + top1_acc[label_item] /= max(1.0*cls_count[label_item], 0.001) + top3_acc[label_item] /= max(1.0*cls_count[label_item], 0.001) + + print('top1_acc:', top1_acc) + print('top3_acc:', top3_acc) + print('cls_count', cls_count) + + return top1_acc, top3_acc, cls_count + + + + + + + diff --git a/utils/train_model.py b/utils/train_model.py new file mode 100644 index 0000000..2dfd5c1 --- /dev/null +++ b/utils/train_model.py @@ -0,0 +1,164 @@ +#coding=utf8 +from __future__ import print_function, division + +import os,time,datetime +import numpy as np +from math import ceil +import datetime + +import torch +from torch import nn +from torch.autograd import Variable +#from torchvision.utils import make_grid, save_image + +from utils.utils import LossRecord, clip_gradient +from models.focal_loss import FocalLoss +from utils.eval_model import eval_turn +from utils.Asoftmax_loss import AngleLoss + +import pdb + +def dt(): + return datetime.datetime.now().strftime("%Y-%m-%d-%H_%M_%S") + + +def train(Config, + model, + epoch_num, + start_epoch, + optimizer, + exp_lr_scheduler, + data_loader, + save_dir, + data_size=448, + savepoint=500, + checkpoint=1000 + ): + # savepoint: save without evalution + # checkpoint: save with evaluation + + step = 0 + eval_train_flag = False + rec_loss = [] + checkpoint_list = [] + + train_batch_size = data_loader['train'].batch_size + train_epoch_step = data_loader['train'].__len__() + train_loss_recorder = LossRecord(train_batch_size) + + if savepoint > train_epoch_step: + savepoint = 1*train_epoch_step + checkpoint = savepoint + + date_suffix = dt() + log_file = open(os.path.join(Config.log_folder, 'formal_log_r50_dcl_%s_%s.log'%(str(data_size), date_suffix)), 'a') + + add_loss = nn.L1Loss() + get_ce_loss = nn.CrossEntropyLoss() + get_focal_loss = FocalLoss() + get_angle_loss = AngleLoss() + + for epoch in range(start_epoch,epoch_num-1): + exp_lr_scheduler.step(epoch) + model.train(True) + + save_grad = [] + for batch_cnt, data in enumerate(data_loader['train']): + step += 1 + loss = 0 + model.train(True) + if Config.use_backbone: + inputs, labels, img_names = data + inputs = Variable(inputs.cuda()) + labels = Variable(torch.from_numpy(np.array(labels)).cuda()) + + if Config.use_dcl: + inputs, labels, labels_swap, swap_law, img_names = data + + inputs = Variable(inputs.cuda()) + labels = Variable(torch.from_numpy(np.array(labels)).cuda()) + labels_swap = Variable(torch.from_numpy(np.array(labels_swap)).cuda()) + swap_law = Variable(torch.from_numpy(np.array(swap_law)).float().cuda()) + + optimizer.zero_grad() + + if inputs.size(0) < 2*train_batch_size: + outputs = model(inputs, inputs[0:-1:2]) + else: + outputs = model(inputs, None) + + if Config.use_focal_loss: + ce_loss = get_focal_loss(outputs[0], labels) + else: + ce_loss = get_ce_loss(outputs[0], labels) + + if Config.use_Asoftmax: + fetch_batch = labels.size(0) + if batch_cnt % (train_epoch_step // 5) == 0: + angle_loss = get_angle_loss(outputs[3], labels[0:fetch_batch:2], decay=0.9) + else: + angle_loss = get_angle_loss(outputs[3], labels[0:fetch_batch:2]) + loss += angle_loss + + loss += ce_loss + + alpha_ = 1 + beta_ = 1 + gamma_ = 0.01 if Config.dataset == 'STCAR' or Config.dataset == 'AIR' else 1 + if Config.use_dcl: + swap_loss = get_ce_loss(outputs[1], labels_swap) * beta_ + loss += swap_loss + law_loss = add_loss(outputs[2], swap_law) * gamma_ + loss += law_loss + + loss.backward() + torch.cuda.synchronize() + + optimizer.step() + torch.cuda.synchronize() + + if Config.use_dcl: + print('step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} + {:6.4f} + {:6.4f} '.format(step, train_epoch_step, loss.detach().item(), ce_loss.detach().item(), swap_loss.detach().item(), law_loss.detach().item()), flush=True) + if Config.use_backbone: + print('step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} '.format(step, train_epoch_step, loss.detach().item(), ce_loss.detach().item()), flush=True) + rec_loss.append(loss.detach().item()) + + train_loss_recorder.update(loss.detach().item()) + + # evaluation & save + if step % checkpoint == 0: + rec_loss = [] + print(32*'-', flush=True) + print('step: {:d} / {:d} global_step: {:8.2f} train_epoch: {:04d} rec_train_loss: {:6.4f}'.format(step, train_epoch_step, 1.0*step/train_epoch_step, epoch, train_loss_recorder.get_val()), flush=True) + print('current lr:%s' % exp_lr_scheduler.get_lr(), flush=True) + if eval_train_flag: + trainval_acc1, trainval_acc2, trainval_acc3 = eval_turn(model, data_loader['trainval'], 'trainval', epoch, log_file) + if abs(trainval_acc1 - trainval_acc3) < 0.01: + eval_train_flag = False + + val_acc1, val_acc2, val_acc3 = eval_turn(model, data_loader['val'], 'val', epoch, log_file) + + save_path = os.path.join(save_dir, 'weights_%d_%d_%.4f_%.4f.pth'%(epoch, batch_cnt, val_acc1, val_acc3)) + torch.cuda.synchronize() + torch.save(model.state_dict(), save_path) + print('saved model to %s' % (save_path), flush=True) + torch.cuda.empty_cache() + + # save only + elif step % savepoint == 0: + train_loss_recorder.update(rec_loss) + rec_loss = [] + save_path = os.path.join(save_dir, 'savepoint_weights-%d-%s.pth'%(step, dt())) + + checkpoint_list.append(save_path) + if len(checkpoint_list) == 6: + os.remove(checkpoint_list[0]) + del checkpoint_list[0] + torch.save(model.state_dict(), save_path) + torch.cuda.empty_cache() + + + log_file.close() + + + diff --git a/utils/train_util_DCL.py b/utils/train_util_DCL.py deleted file mode 100644 index 2a9c0a4..0000000 --- a/utils/train_util_DCL.py +++ /dev/null @@ -1,128 +0,0 @@ -#coding=utf8 -from __future__ import division -import torch -import os,time,datetime -from torch.autograd import Variable -import logging -import numpy as np -from math import ceil -from torch.nn import L1Loss -from torch import nn - -def dt(): - return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') - -def trainlog(logfilepath, head='%(message)s'): - logger = logging.getLogger('mylogger') - logging.basicConfig(filename=logfilepath, level=logging.INFO, format=head) - console = logging.StreamHandler() - console.setLevel(logging.INFO) - formatter = logging.Formatter(head) - console.setFormatter(formatter) - logging.getLogger('').addHandler(console) - -def train(cfg, - model, - epoch_num, - start_epoch, - optimizer, - criterion, - exp_lr_scheduler, - data_set, - data_loader, - save_dir, - print_inter=200, - val_inter=3500 - ): - - step = 0 - add_loss = L1Loss() - for epoch in range(start_epoch,epoch_num-1): - # train phase - exp_lr_scheduler.step(epoch) - model.train(True) # Set model to training mode - - for batch_cnt, data in enumerate(data_loader['train']): - - step+=1 - model.train(True) - inputs, labels, labels_swap, swap_law = data - inputs = Variable(inputs.cuda()) - labels = Variable(torch.from_numpy(np.array(labels)).cuda()) - labels_swap = Variable(torch.from_numpy(np.array(labels_swap)).cuda()) - swap_law = Variable(torch.from_numpy(np.array(swap_law)).float().cuda()) - - # zero the parameter gradients - optimizer.zero_grad() - - outputs = model(inputs) - if isinstance(outputs, list): - loss = criterion(outputs[0], labels) - loss += criterion(outputs[1], labels_swap) - loss += add_loss(outputs[2], swap_law) - loss.backward() - optimizer.step() - - if step % val_inter == 0: - logging.info('current lr:%s' % exp_lr_scheduler.get_lr()) - # val phase - model.train(False) # Set model to evaluate mode - - val_loss = 0 - val_corrects1 = 0 - val_corrects2 = 0 - val_corrects3 = 0 - val_size = ceil(len(data_set['val']) / data_loader['val'].batch_size) - - t0 = time.time() - - for batch_cnt_val, data_val in enumerate(data_loader['val']): - # print data - inputs, labels, labels_swap, swap_law = data_val - - inputs = Variable(inputs.cuda()) - labels = Variable(torch.from_numpy(np.array(labels)).long().cuda()) - labels_swap = Variable(torch.from_numpy(np.array(labels_swap)).long().cuda()) - # forward - if len(inputs)==1: - inputs = torch.cat((inputs,inputs)) - labels = torch.cat((labels,labels)) - labels_swap = torch.cat((labels_swap,labels_swap)) - outputs = model(inputs) - - if isinstance(outputs, list): - outputs1 = outputs[0] + outputs[1][:,0:cfg['numcls']] + outputs[1][:,cfg['numcls']:2*cfg['numcls']] - outputs2 = outputs[0] - outputs3 = outputs[1][:,0:cfg['numcls']] + outputs[1][:,cfg['numcls']:2*cfg['numcls']] - _, preds1 = torch.max(outputs1, 1) - _, preds2 = torch.max(outputs2, 1) - _, preds3 = torch.max(outputs3, 1) - - - batch_corrects1 = torch.sum((preds1 == labels)).data.item() - val_corrects1 += batch_corrects1 - batch_corrects2 = torch.sum((preds2 == labels)).data.item() - val_corrects2 += batch_corrects2 - batch_corrects3 = torch.sum((preds3 == labels)).data.item() - val_corrects3 += batch_corrects3 - - - # val_acc = 0.5 * val_corrects / len(data_set['val']) - val_acc1 = 0.5 * val_corrects1 / len(data_set['val']) - val_acc2 = 0.5 * val_corrects2 / len(data_set['val']) - val_acc3 = 0.5 * val_corrects3 / len(data_set['val']) - - t1 = time.time() - since = t1-t0 - logging.info('--'*30) - logging.info('current lr:%s' % exp_lr_scheduler.get_lr()) - logging.info('%s epoch[%d]-val-loss: %.4f ||val-acc@1: c&a: %.4f c: %.4f a: %.4f||time: %d' - % (dt(), epoch, val_loss, val_acc1, val_acc2, val_acc3, since)) - - # save model - save_path = os.path.join(save_dir, - 'weights-%d-%d-[%.4f].pth'%(epoch,batch_cnt,val_acc1)) - torch.save(model.state_dict(), save_path) - logging.info('saved model to %s' % (save_path)) - logging.info('--' * 30) - diff --git a/utils/utils.py b/utils/utils.py new file mode 100644 index 0000000..7552510 --- /dev/null +++ b/utils/utils.py @@ -0,0 +1,124 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +import pdb + + +class LossRecord(object): + def __init__(self, batch_size): + self.rec_loss = 0 + self.count = 0 + self.batch_size = batch_size + + def update(self, loss): + if isinstance(loss, list): + avg_loss = sum(loss) + avg_loss /= (len(loss)*self.batch_size) + self.rec_loss += avg_loss + self.count += 1 + if isinstance(loss, float): + self.rec_loss += loss/self.batch_size + self.count += 1 + + def get_val(self, init=False): + pop_loss = self.rec_loss / self.count + if init: + self.rec_loss = 0 + self.count = 0 + return pop_loss + + +def weights_normal_init(model, dev=0.01): + if isinstance(model, list): + for m in model: + weights_normal_init(m, dev) + else: + for m in model.modules(): + if isinstance(m, nn.Conv2d): + m.weight.data.normal_(0.0, dev) + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0.0, dev) + + +def clip_gradient(model, clip_norm): + """Computes a gradient clipping coefficient based on gradient norm.""" + totalnorm = 0 + for p in model.parameters(): + if p.requires_grad: + modulenorm = p.grad.data.norm() + totalnorm += modulenorm ** 2 + totalnorm = torch.sqrt(totalnorm).item() + norm = (clip_norm / max(totalnorm, clip_norm)) + for p in model.parameters(): + if p.requires_grad: + p.grad.mul_(norm) + + +def Linear(in_features, out_features, bias=True): + """Weight-normalized Linear layer (input: N x T x C)""" + m = nn.Linear(in_features, out_features, bias=bias) + m.weight.data.uniform_(-0.1, 0.1) + if bias: + m.bias.data.uniform_(-0.1, 0.1) + return m + + +class convolution(nn.Module): + def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True): + super(convolution, self).__init__() + + pad = (k - 1) // 2 + self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn) + self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential() + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + conv = self.conv(x) + bn = self.bn(conv) + relu = self.relu(bn) + return relu + +class fully_connected(nn.Module): + def __init__(self, inp_dim, out_dim, with_bn=True): + super(fully_connected, self).__init__() + self.with_bn = with_bn + + self.linear = nn.Linear(inp_dim, out_dim) + if self.with_bn: + self.bn = nn.BatchNorm1d(out_dim) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + linear = self.linear(x) + bn = self.bn(linear) if self.with_bn else linear + relu = self.relu(bn) + return relu + +class residual(nn.Module): + def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True): + super(residual, self).__init__() + + self.conv1 = nn.Conv2d(inp_dim, out_dim, (3, 3), padding=(1, 1), stride=(stride, stride), bias=False) + self.bn1 = nn.BatchNorm2d(out_dim) + self.relu1 = nn.ReLU(inplace=True) + + self.conv2 = nn.Conv2d(out_dim, out_dim, (3, 3), padding=(1, 1), bias=False) + self.bn2 = nn.BatchNorm2d(out_dim) + + self.skip = nn.Sequential( + nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False), + nn.BatchNorm2d(out_dim) + ) if stride != 1 or inp_dim != out_dim else nn.Sequential() + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + conv1 = self.conv1(x) + bn1 = self.bn1(conv1) + relu1 = self.relu1(bn1) + + conv2 = self.conv2(relu1) + bn2 = self.bn2(conv2) + + skip = self.skip(x) + return self.relu(bn2 + skip)