From 872e370142280d132c1bc55080ceba9e00c5fa7d Mon Sep 17 00:00:00 2001 From: liaoxingyu Date: Wed, 7 Aug 2019 16:54:50 +0800 Subject: [PATCH] add new feature Finish interpreter --- .gitignore | 3 +- config/defaults.py | 10 +- configs/softmax_triplet.yml | 2 +- data/build.py | 16 +- data/datasets/eval_threshold.py | 64 +++++++ data/transforms/__init__.py | 3 +- data/transforms/build.py | 25 +++ data/transforms/transforms.py | 2 + datasets | 1 + engine/inference.py | 3 +- engine/interpreter.py | 126 ++++++++++---- engine/trainer.py | 17 +- scripts/test_model.sh | 4 +- scripts/train_market.sh | 2 +- scripts/train_market_duke_cuhk_bj.sh | 4 +- solver/__init__.py | 2 +- solver/adabound.py | 239 +++++++++++++++++++++++++++ tests/dataset_test.py | 10 +- tools/test.py | 2 +- tools/train.py | 13 +- 20 files changed, 480 insertions(+), 68 deletions(-) create mode 100644 data/datasets/eval_threshold.py create mode 100644 data/transforms/build.py create mode 120000 datasets create mode 100644 solver/adabound.py diff --git a/.gitignore b/.gitignore index 174feca..d9b15b6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,6 @@ __pycache__ .DS_Store .vscode -datasets csrc/eval_cylib/build/ logs/ -*.ipynb +.ipynb_checkpoints diff --git a/config/defaults.py b/config/defaults.py index 3b248eb..4c75c7c 100644 --- a/config/defaults.py +++ b/config/defaults.py @@ -30,14 +30,20 @@ _C.INPUT.SIZE_TRAIN = [256, 128] # Size of the image during test _C.INPUT.SIZE_TEST = [256, 128] # Random probability for image horizontal flip -_C.INPUT.PROB = 0.5 +_C.INPUT.DO_FLIP = True +_C.INPUT.FLIP_PROB = 0.5 # Values to be used for image normalization _C.INPUT.PIXEL_MEAN = [0.485, 0.456, 0.406] # Values to be used for image normalization _C.INPUT.PIXEL_STD = [0.229, 0.224, 0.225] # Value of padding size +_C.INPUT.DO_PAD = True +_C.INPUT.PADDING_MODE = 'zeros' _C.INPUT.PADDING = 10 - +# Random lightning and contrast change +_C.INPUT.DO_LIGHTING = True +_C.INPUT.MAX_LIGHTING = 0.2 +_C.INPUT.P_LIGHTING=0.75 # ----------------------------------------------------------------------------- # Dataset # ----------------------------------------------------------------------------- diff --git a/configs/softmax_triplet.yml b/configs/softmax_triplet.yml index e589276..b1af8f6 100644 --- a/configs/softmax_triplet.yml +++ b/configs/softmax_triplet.yml @@ -5,7 +5,7 @@ MODEL: INPUT: SIZE_TRAIN: [256, 128] SIZE_TEST: [256, 128] - PROB: 0.5 # random horizontal flip + FLIP_PROB: 0.5 # random horizontal flip PADDING: 10 DATASETS: diff --git a/data/build.py b/data/build.py index cd78253..008bbd0 100644 --- a/data/build.py +++ b/data/build.py @@ -9,19 +9,14 @@ import os import re from fastai.vision import * -from .transforms import RandomErasing -from .samplers import RandomIdentitySampler + from .datasets import CUHK03 +from .samplers import RandomIdentitySampler +from .transforms import build_transforms def get_data_bunch(cfg): - ds_tfms = ( - [flip_lr(p=0.5), - *rand_pad(padding=cfg.INPUT.PADDING, size=cfg.INPUT.SIZE_TRAIN, mode='zeros'), - RandomErasing() - ], - None - ) + ds_tfms = build_transforms(cfg) def _process_dir(dir_path, recursive=False): img_paths = [] @@ -67,6 +62,8 @@ def get_data_bunch(cfg): query_names = _process_dir(bj_query_path) gallery_names = _process_dir(bj_gallery_path, True) + # query_names = _process_dir(market_query_path) + # gallery_names = _process_dir(marker_gallery_path) test_fnames = [] test_labels = [] @@ -97,3 +94,4 @@ def get_data_bunch(cfg): data_bunch.normalize(imagenet_stats) return data_bunch, test_labels, len(query_names) + diff --git a/data/datasets/eval_threshold.py b/data/datasets/eval_threshold.py new file mode 100644 index 0000000..00db86d --- /dev/null +++ b/data/datasets/eval_threshold.py @@ -0,0 +1,64 @@ +# encoding: utf-8 +""" +@author: liaoxingyu +@contact: sherlockliao01@gmail.com +""" + + +import os +import sys + +import numpy as np +import torch +import torch.nn.functional as F +from torch import nn +from torch.backends import cudnn +from torch.utils.data import Dataset + + +def eval_roc(distmat, q_pids, g_pids, q_cmaids, g_camids, t_start=0.1, t_end=0.9): + # sort cosine dist from large to small + indices = np.argsort(distmat, axis=1)[:, ::-1] + # query id and gallery id match + matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32) + + new_dist = [] + new_matches = [] + # Remove the same identity in the same camera. + num_q = distmat.shape[0] + for q_idx in range(num_q): + q_pid = q_pids[q_idx] + q_camid = q_cmaids[q_idx] + + order = indices[q_idx] + remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid) + keep = np.invert(remove) + new_matches.extend(matches[q_idx][keep].tolist()) + new_dist.extend(distmat[q_idx][indices[q_idx]][keep].tolist()) + + fpr = [] + tpr = [] + fps = [] + tps = [] + thresholds = np.arange(t_start, t_end, 0.02) + + # get number of positive and negative examples in the dataset + p = sum(new_matches) + n = len(new_matches) - p + + # iteration through all thresholds and determine fraction of true positives + # and false positives found at this threshold + for t in thresholds: + fp = 0 + tp = 0 + for i in range(len(new_dist)): + if new_dist[i] > t: + if new_matches[i] == 1: + tp += 1 + else: + fp += 1 + fpr.append(fp / float(n)) + tpr.append(tp / float(p)) + fps.append(fp) + tps.append(tp) + return fpr, tpr, fps, tps, p, n, thresholds diff --git a/data/transforms/__init__.py b/data/transforms/__init__.py index 373f006..45f0240 100644 --- a/data/transforms/__init__.py +++ b/data/transforms/__init__.py @@ -4,4 +4,5 @@ @contact: sherlockliao01@gmail.com """ -from .transforms import * + +from .build import build_transforms \ No newline at end of file diff --git a/data/transforms/build.py b/data/transforms/build.py new file mode 100644 index 0000000..9a8b6a8 --- /dev/null +++ b/data/transforms/build.py @@ -0,0 +1,25 @@ +# encoding: utf-8 +""" +@author: liaoxingyu +@contact: sherlockliao01@gmail.com +""" +from .transforms import * +from fastai.vision.transform import * + + +def build_transforms(cfg): + # do_flip:bool=True, max_rotate:float=10., max_zoom:float=1.1, + # max_lighting:float=0.2, max_warp:float=0.2, p_affine:float=0.75, + # p_lighting:float=0.75, xtra_tfms:Optional[Collection[Transform]]=None): + "Utility func to easily create a list of flip, rotate, `zoom`, warp, lighting transforms." + res = [] + if cfg.INPUT.DO_FLIP: res.append(flip_lr(p=cfg.INPUT.FLIP_PROB)) + if cfg.INPUT.DO_PAD: res.extend(rand_pad(padding=cfg.INPUT.PADDING, + size=cfg.INPUT.SIZE_TRAIN, + mode=cfg.INPUT.PADDING_MODE)) + if cfg.INPUT.DO_LIGHTING: + res.append(brightness(change=(0.5*(1-cfg.INPUT.MAX_LIGHTING), 0.5*(1+cfg.INPUT.MAX_LIGHTING)), p=cfg.INPUT.P_LIGHTING)) + res.append(contrast(scale=(1-cfg.INPUT.MAX_LIGHTING, 1/(1-cfg.INPUT.MAX_LIGHTING)), p=cfg.INPUT.P_LIGHTING)) + res.append(RandomErasing()) + # train , valid + return (res, [crop_pad()]) \ No newline at end of file diff --git a/data/transforms/transforms.py b/data/transforms/transforms.py index ebbf7ff..95147b1 100644 --- a/data/transforms/transforms.py +++ b/data/transforms/transforms.py @@ -6,9 +6,11 @@ import numpy as np + from fastai.vision import * from fastai.vision.image import * +__all__ = ['RandomErasing'] def _random_erasing(x, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=(np.array(imagenet_stats[1]) + 1) * imagenet_stats[0]): diff --git a/datasets b/datasets new file mode 120000 index 0000000..306a491 --- /dev/null +++ b/datasets @@ -0,0 +1 @@ +/export/home/datasets/ \ No newline at end of file diff --git a/engine/inference.py b/engine/inference.py index 2459332..676c871 100644 --- a/engine/inference.py +++ b/engine/inference.py @@ -6,6 +6,7 @@ import logging import torch +import numpy as np import torch.nn.functional as F from data.datasets.eval_reid import evaluate from data.datasets.eval_threshold import eval_roc @@ -62,7 +63,7 @@ def inference( # Compute ROC and AUC logger.info("Compute ROC Curve...") - fpr, tpr, fps, tps, p, n, thresholds = eval_roc(distmat, q_pids, g_pids, q_camids, g_camids) + fpr, tpr, fps, tps, p, n, thresholds = eval_roc(distmat, q_pids, g_pids, q_camids, g_camids, 0.1, 0.5) logger.info("positive samples: {}, negative samples: {}".format(p, n)) for i, thresh in enumerate(thresholds): logger.info("threshold: {:.2f}, FP: {:.0f}({:.3f}), TP: {:.0f}({:.3f})". diff --git a/engine/interpreter.py b/engine/interpreter.py index 1fb84e9..b5de9ff 100644 --- a/engine/interpreter.py +++ b/engine/interpreter.py @@ -10,60 +10,126 @@ import matplotlib.pyplot as plt from fastai.train import * from fastai.torch_core import * from fastai.basic_data import * +from fastai.basic_train import Learner +from fastai.vision import * -class ReidInterpretation(Interpretation): +class ReidInterpretation(): "Interpretation methods for reid models." - def __init__(self, learn, preds, y_true, losses, ds_type=DatasetType.Valid): - super().__init__(learn, preds, y_true, losses, ds_type=ds_type) + def __init__(self, learn, test_labels, num_q): + self.test_labels,self.num_q = test_labels,num_q + self.test_dl = learn.data.test_dl + self.model = learn.model + + self.get_distmat() - def get_distmat(self, test_labels, num_query): + def get_distmat(self): + self.model.eval() + feats = [] pids = [] camids = [] - for p, c in test_labels: + for p,c in self.test_labels: pids.append(p) camids.append(c) - self.q_pids = np.asarray(pids[:num_query]) - self.g_pids = np.asarray(pids[num_query:]) - self.q_camids = np.asarray(camids[:num_query]) - self.g_camids = np.asarray(camids[num_query:]) + self.q_pids = np.asarray(pids[:self.num_q]) + self.g_pids = np.asarray(pids[self.num_q:]) + self.q_camids = np.asarray(camids[:self.num_q]) + self.g_camids = np.asarray(camids[self.num_q:]) - qf = self.preds[:num_query] - gf = self.preds[num_query:] + for imgs, _ in self.test_dl: + with torch.no_grad(): + feat = self.model(imgs) + feats.append(feat) + feats = torch.cat(feats, dim=0) + feats = F.normalize(feats) + qf = feats[:self.num_q] + gf = feats[self.num_q:] m, n = qf.shape[0], gf.shape[0] - self.num_q=num_query + # Cosine distance - distmat = torch.mm(F.normalize(qf), F.normalize(gf).t()) + distmat = torch.mm(qf, gf.t()) self.distmat = to_np(distmat) - - self.indices = np.argsort(self.distmat, axis=1)[:, ::-1] + self.indices = np.argsort(-self.distmat, axis=1) self.matches = (self.g_pids[self.indices] == self.q_pids[:, np.newaxis]).astype(np.int32) - def plot_rank_result(self, q_idx, top=5, title="Rank result"): - q_pid = self.q_pids[q_idx] - q_camid = self.q_camids[q_idx] + def get_matched_result(self, q_index): + q_pid = self.q_pids[q_index] + q_camid = self.q_camids[q_index] - order = self.indices[q_idx] + order = self.indices[q_index] remove = (self.g_pids[order] == q_pid) & (self.g_camids[order] == q_camid) keep = np.invert(remove) + cmc = self.matches[q_index][keep] + matched_idx = order[keep] + return cmc, matched_idx + + def plot_rank_result(self, q_idx, top=5, title="Rank result"): + cmc,matched_idx = self.get_matched_result(q_idx) - raw_cmc = self.matches[q_idx][keep] - matched_idx = self.indices[q_idx][keep] - - fig,axes = plt.subplots(1, top+1, figsize=(12,5)) - fig.suptitle('query/sim/true(false)') - query_im,cl=self.learn.data.dl(DatasetType.Test).dataset[q_idx] - query_im.show(ax=axes.flat[0],title='query') + fig,axes = plt.subplots(1, top+1, figsize=(15, 5)) + fig.suptitle('query similarity/true(false)') + query_im,cl=self.test_dl.dataset[q_idx] + query_im.show(ax=axes.flat[0], title='query') for i in range(top): - if raw_cmc[i] == 1: + g_idx = self.num_q + matched_idx[i] + im,cl = self.test_dl.dataset[g_idx] + if cmc[i] == 1: label='true' + axes.flat[i+1].add_patch(plt.Rectangle(xy=(0, 0), width=im.size[1]-1, height=im.size[0]-1, + edgecolor=(1, 0, 0), fill=False, linewidth=5)) else: label='false' - im_idx=self.num_q+matched_idx[i]+1 - im,cl = self.learn.data.dl(DatasetType.Test).dataset[im_idx] - im.show(ax=axes.flat[i+1],title=f'{self.distmat[q_idx, im_idx]:.3f} / {label}') + axes.flat[i+1].add_patch(plt.Rectangle(xy=(0, 0), width=im.size[1]-1, height=im.size[0]-1, + edgecolor=(0, 0, 1), fill=False, linewidth=5)) + im.show(ax=axes.flat[i+1], title=f'{self.distmat[q_idx, matched_idx[i]]:.3f} / {label}') return fig + def get_top_error(self): + # Iteration over query ids and store query gallery similarity + similarity_score = namedtuple('similarityScore', 'query gallery sim cmc') + storeCorrect = [] + storeWrong = [] + for q_index in range(self.num_q): + cmc,matched_idx = self.get_matched_result(q_index) + single_item = similarity_score(query=q_index, gallery=[self.num_q + matched_idx[i] for i in range(5)], + sim=[self.distmat[q_index, matched_idx[i]] for i in range(5)], + cmc=cmc[:5]) + if cmc[0] == 1: + storeCorrect.append(single_item) + else: + storeWrong.append(single_item) + storeCorrect.sort(key=lambda x: x.sim[0]) + storeWrong.sort(key=lambda x: x.sim[0], reverse=True) + self.storeCorrect = storeCorrect + self.storeWrong = storeWrong + def plot_top_error(self, topK=5, positive=True): + if not hasattr(self, 'storeCorrect'): + self.get_top_error() + if positive: + img_list = self.storeCorrect + else: + img_list = self.storeWrong + # Rank top error results, which means negative sample with largest similarity + # and positive sample with smallest similarity + fig,axes = plt.subplots(topK, 6, figsize=(15, 4*topK)) + fig.suptitle('query similarity/true(false)') + for i in range(topK): + q_idx,g_idxs,sim,cmc = img_list[i] + query_im,cl = self.test_dl.dataset[q_idx] + query_im.show(ax=axes[i, 0], title='query') + for j,g_idx in enumerate(g_idxs): + im,cl = self.test_dl.dataset[g_idx] + if cmc[j] == 1: + label='true' + axes[i,j+1].add_patch(plt.Rectangle(xy=(0, 0), width=im.size[1]-1, height=im.size[0]-1, + edgecolor=(1, 0, 0), fill=False, linewidth=5)) + else: + label='false' + axes[i, j+1].add_patch(plt.Rectangle(xy=(0, 0), width=im.size[1]-1, height=im.size[0]-1, + edgecolor=(0, 0, 1), fill=False, linewidth=5)) + im.show(ax=axes[i,j+1], title=f'{sim[j]:.3f} / {label}') + + return fig \ No newline at end of file diff --git a/engine/trainer.py b/engine/trainer.py index 36b6db1..9996e60 100644 --- a/engine/trainer.py +++ b/engine/trainer.py @@ -4,14 +4,15 @@ @contact: sherlockliao01@gmail.com """ -import os import logging +import os + +import matplotlib.pyplot as plt +import torch.nn.functional as F from data.datasets.eval_reid import evaluate +from fastai.basic_data import DatasetType from fastai.vision import * -import torch.nn.functional as F -import matplotlib.pyplot as plt -from fastai.basic_train import Learner @dataclass @@ -20,8 +21,8 @@ class TrackValue(Callback): total_iter: int # def on_batch_end(self, num_batch, last_loss, **kwargs): - # if (num_batch+1) % (self.total_iter//3) == 0: - # self.logger.info('Iter [{}/{}], loss: {:.4f}'.format(num_batch, self.total_iter, last_loss.item())) + # if (num_batch+1) % (self.total_iter//3) == 0: + # self.logger.info('Iter [{}/{}], loss: {:.4f}'.format(num_batch, self.total_iter, last_loss.item())) def on_epoch_end(self, epoch, smooth_loss, **kwargs): self.logger.info('Epoch {}[Iter {}], loss: {:.4f}'.format(epoch, self.total_iter, smooth_loss.item())) @@ -116,8 +117,8 @@ def do_train( partial(TestModel, test_labels=test_labels, eval_period=eval_period, num_query=num_query, logger=logger)], callbacks=[TrackValue(logger, total_iter)]) - learn.fit(epochs, wd=cfg.SOLVER.WEIGHT_DECAY) + learn.fit(epochs, lr=cfg.SOLVER.BASE_LR, wd=cfg.SOLVER.WEIGHT_DECAY) learn.recorder.plot_losses() plt.savefig(os.path.join(output_dir, "loss.jpg")) learn.recorder.plot_lr() - plt.savefig(os.path.join(output_dir, "lr.jpg")) \ No newline at end of file + plt.savefig(os.path.join(output_dir, "lr.jpg")) diff --git a/scripts/test_model.sh b/scripts/test_model.sh index 2c78134..2da262a 100644 --- a/scripts/test_model.sh +++ b/scripts/test_model.sh @@ -1,7 +1,7 @@ -gpu=1 +gpu=0 CUDA_VISIBLE_DEVICES=$gpu python tools/test.py -cfg='configs/softmax_triplet.yml' \ INPUT.SIZE_TRAIN '(256, 128)' \ DATASETS.NAMES '("market1501","duke","beijing")' \ OUTPUT_DIR 'logs/test' \ -TEST.WEIGHT 'logs/beijing/market+duke+bj/models/model_149.pth' \ No newline at end of file +TEST.WEIGHT 'logs/beijing/market_duke_cuhk03_beijing_revise_bs64/models/model_149.pth' \ No newline at end of file diff --git a/scripts/train_market.sh b/scripts/train_market.sh index 5742eb3..ad7d45c 100644 --- a/scripts/train_market.sh +++ b/scripts/train_market.sh @@ -2,4 +2,4 @@ gpu=3 CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \ DATASETS.NAMES '("market1501",)' \ -OUTPUT_DIR 'logs/market/softmax_triplet_256_128_bs512' \ No newline at end of file +OUTPUT_DIR 'logs/market/adabound_256_128_bs64' \ No newline at end of file diff --git a/scripts/train_market_duke_cuhk_bj.sh b/scripts/train_market_duke_cuhk_bj.sh index d7864c3..76673a5 100644 --- a/scripts/train_market_duke_cuhk_bj.sh +++ b/scripts/train_market_duke_cuhk_bj.sh @@ -1,6 +1,6 @@ -gpu=2 +gpu=0 CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \ DATASETS.NAMES '("market1501","duke","cuhk03","beijing")' \ SOLVER.IMS_PER_BATCH '64' \ -OUTPUT_DIR 'logs/beijing/market_duke_cuhk03_beijing_bs64' \ No newline at end of file +OUTPUT_DIR 'logs/beijing/market_duke_cuhk03_beijing_revise_bs64_light' \ No newline at end of file diff --git a/solver/__init__.py b/solver/__init__.py index b8a6a6d..0925e56 100644 --- a/solver/__init__.py +++ b/solver/__init__.py @@ -4,4 +4,4 @@ @contact: sherlockliao01@gmail.com """ -from .build import * +from .adabound import * \ No newline at end of file diff --git a/solver/adabound.py b/solver/adabound.py new file mode 100644 index 0000000..413e36e --- /dev/null +++ b/solver/adabound.py @@ -0,0 +1,239 @@ +import math +import torch +from torch.optim import Optimizer + + +__all__ = ['AdaBound', 'AdaBoundW'] + +class AdaBound(Optimizer): + """Implements AdaBound algorithm. + It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. + Arguments: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional): Adam learning rate (default: 1e-3) + betas (Tuple[float, float], optional): coefficients used for computing + running averages of gradient and its square (default: (0.9, 0.999)) + final_lr (float, optional): final (SGD) learning rate (default: 0.1) + gamma (float, optional): convergence speed of the bound functions (default: 1e-3) + eps (float, optional): term added to the denominator to improve + numerical stability (default: 1e-8) + weight_decay (float, optional): weight decay (L2 penalty) (default: 0) + amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm + .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: + https://openreview.net/forum?id=Bkg3g2R9FX + """ + + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, + eps=1e-8, weight_decay=0, amsbound=False): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + if not 0.0 <= final_lr: + raise ValueError("Invalid final learning rate: {}".format(final_lr)) + if not 0.0 <= gamma < 1.0: + raise ValueError("Invalid gamma parameter: {}".format(gamma)) + defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, + weight_decay=weight_decay, amsbound=amsbound) + super(AdaBound, self).__init__(params, defaults) + + self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) + + def __setstate__(self, state): + super(AdaBound, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('amsbound', False) + + def step(self, closure=None): + """Performs a single optimization step. + Arguments: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + + for group, base_lr in zip(self.param_groups, self.base_lrs): + base_lr = group['lr'] + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data + if grad.is_sparse: + raise RuntimeError( + 'Adam does not support sparse gradients, please consider SparseAdam instead') + amsbound = group['amsbound'] + + state = self.state[p] + + # State initialization + if len(state) == 0: + state['step'] = 0 + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p.data) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p.data) + if amsbound: + # Maintains max of all exp. moving avg. of sq. grad. values + state['max_exp_avg_sq'] = torch.zeros_like(p.data) + + exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] + if amsbound: + max_exp_avg_sq = state['max_exp_avg_sq'] + beta1, beta2 = group['betas'] + + state['step'] += 1 + + if group['weight_decay'] != 0: + grad = grad.add(group['weight_decay'], p.data) + + # Decay the first and second moment running average coefficient + exp_avg.mul_(beta1).add_(1 - beta1, grad) + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + if amsbound: + # Maintains the maximum of all 2nd moment running avg. till now + torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) + # Use the max. for normalizing running avg. of gradient + denom = max_exp_avg_sq.sqrt().add_(group['eps']) + else: + denom = exp_avg_sq.sqrt().add_(group['eps']) + + bias_correction1 = 1 - beta1 ** state['step'] + bias_correction2 = 1 - beta2 ** state['step'] + step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 + + # Applies bounds on actual learning rate + # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay + final_lr = group['final_lr'] * group['lr'] / base_lr + lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1)) + upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step'])) + step_size = torch.full_like(denom, step_size) + step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg) + + p.data.add_(-step_size) + + return loss + + +class AdaBoundW(Optimizer): + """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101) + It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. + Arguments: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional): Adam learning rate (default: 1e-3) + betas (Tuple[float, float], optional): coefficients used for computing + running averages of gradient and its square (default: (0.9, 0.999)) + final_lr (float, optional): final (SGD) learning rate (default: 0.1) + gamma (float, optional): convergence speed of the bound functions (default: 1e-3) + eps (float, optional): term added to the denominator to improve + numerical stability (default: 1e-8) + weight_decay (float, optional): weight decay (L2 penalty) (default: 0) + amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm + .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: + https://openreview.net/forum?id=Bkg3g2R9FX + """ + + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, + eps=1e-8, weight_decay=0, amsbound=False): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + if not 0.0 <= final_lr: + raise ValueError("Invalid final learning rate: {}".format(final_lr)) + if not 0.0 <= gamma < 1.0: + raise ValueError("Invalid gamma parameter: {}".format(gamma)) + defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, + weight_decay=weight_decay, amsbound=amsbound) + super(AdaBoundW, self).__init__(params, defaults) + + self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) + + def __setstate__(self, state): + super(AdaBoundW, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('amsbound', False) + + def step(self, closure=None): + """Performs a single optimization step. + Arguments: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + + for group, base_lr in zip(self.param_groups, self.base_lrs): + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data + if grad.is_sparse: + raise RuntimeError( + 'Adam does not support sparse gradients, please consider SparseAdam instead') + amsbound = group['amsbound'] + + state = self.state[p] + + # State initialization + if len(state) == 0: + state['step'] = 0 + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p.data) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p.data) + if amsbound: + # Maintains max of all exp. moving avg. of sq. grad. values + state['max_exp_avg_sq'] = torch.zeros_like(p.data) + + exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] + if amsbound: + max_exp_avg_sq = state['max_exp_avg_sq'] + beta1, beta2 = group['betas'] + + state['step'] += 1 + + # Decay the first and second moment running average coefficient + exp_avg.mul_(beta1).add_(1 - beta1, grad) + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + if amsbound: + # Maintains the maximum of all 2nd moment running avg. till now + torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) + # Use the max. for normalizing running avg. of gradient + denom = max_exp_avg_sq.sqrt().add_(group['eps']) + else: + denom = exp_avg_sq.sqrt().add_(group['eps']) + + bias_correction1 = 1 - beta1 ** state['step'] + bias_correction2 = 1 - beta2 ** state['step'] + step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 + + # Applies bounds on actual learning rate + # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay + final_lr = group['final_lr'] * group['lr'] / base_lr + lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1)) + upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step'])) + step_size = torch.full_like(denom, step_size) + step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg) + + if group['weight_decay'] != 0: + decayed_weights = torch.mul(p.data, group['weight_decay']) + p.data.add_(-step_size) + p.data.sub_(decayed_weights) + else: + p.data.add_(-step_size) + + return loss + diff --git a/tests/dataset_test.py b/tests/dataset_test.py index 1cb3e50..e0b6b2b 100644 --- a/tests/dataset_test.py +++ b/tests/dataset_test.py @@ -5,10 +5,18 @@ """ import sys +from fastai.vision import * sys.path.append('.') from data import get_data_bunch from config import cfg if __name__ == '__main__': - data = get_data_bunch(cfg) \ No newline at end of file + # cfg.INPUT.SIZE_TRAIN = (384, 128) + data, label, num_q = get_data_bunch(cfg) + # def get_ex(): return open_image('datasets/beijingStation/query/000245_c10s2_1561732033722.000000.jpg') + # im = get_ex() + print(data.train_ds[0]) + print(data.test_ds[0]) + from ipdb import set_trace; set_trace() + # im.apply_tfms(crop_pad(size=(300, 300))) diff --git a/tools/test.py b/tools/test.py index 781a000..abb66c4 100644 --- a/tools/test.py +++ b/tools/test.py @@ -52,7 +52,7 @@ def main(): data_bunch, test_labels, num_query = get_data_bunch(cfg) model = build_model(cfg, data_bunch.c) state_dict = torch.load(cfg.TEST.WEIGHT) - model.load_state_dict(state_dict['model']) + model.load_params_wo_fc(state_dict['model']) model.cuda() # model = torch.jit.load("/export/home/lxy/reid_baseline/pcb_model_v0.2.pt") diff --git a/tools/train.py b/tools/train.py index e5f1461..cc0082c 100644 --- a/tools/train.py +++ b/tools/train.py @@ -5,20 +5,22 @@ """ import argparse -import sys import os +import sys from bisect import bisect_right from torch.backends import cudnn -sys.path.append('.') +import sys +sys.path.append(".") from config import cfg from data import get_data_bunch from engine.trainer import do_train +from fastai.vision import * from layers import make_loss from modeling import build_model +from solver import * from utils.logger import setup_logger -from fastai.vision import * def train(cfg): @@ -27,8 +29,6 @@ def train(cfg): # prepare model model = build_model(cfg, data_bunch.c) - # state_dict = torch.load("logs/beijing/market_duke_softmax_triplet_256_128_bs256/models/model_149.pth") - # model.load_params_wo_fc(state_dict['model']) opt_func = partial(torch.optim.Adam) @@ -42,7 +42,8 @@ def train(cfg): warmup_factor = cfg.SOLVER.WARMUP_FACTOR * (1 - alpha) + alpha return start * warmup_factor * gamma ** bisect_right(milestones, pct) - lr = cfg.SOLVER.BASE_LR * (cfg.SOLVER.IMS_PER_BATCH // 64) + # lr = cfg.SOLVER.BASE_LR * (cfg.SOLVER.IMS_PER_BATCH // 64) + lr = cfg.SOLVER.BASE_LR lr_sched = Scheduler(lr, cfg.SOLVER.MAX_EPOCHS, warmup_multistep) loss_func = make_loss(cfg)