mirror of https://github.com/JDAI-CV/fast-reid.git
parent
710d40aec0
commit
872e370142
|
@ -2,7 +2,6 @@
|
|||
__pycache__
|
||||
.DS_Store
|
||||
.vscode
|
||||
datasets
|
||||
csrc/eval_cylib/build/
|
||||
logs/
|
||||
*.ipynb
|
||||
.ipynb_checkpoints
|
||||
|
|
|
@ -30,14 +30,20 @@ _C.INPUT.SIZE_TRAIN = [256, 128]
|
|||
# Size of the image during test
|
||||
_C.INPUT.SIZE_TEST = [256, 128]
|
||||
# Random probability for image horizontal flip
|
||||
_C.INPUT.PROB = 0.5
|
||||
_C.INPUT.DO_FLIP = True
|
||||
_C.INPUT.FLIP_PROB = 0.5
|
||||
# Values to be used for image normalization
|
||||
_C.INPUT.PIXEL_MEAN = [0.485, 0.456, 0.406]
|
||||
# Values to be used for image normalization
|
||||
_C.INPUT.PIXEL_STD = [0.229, 0.224, 0.225]
|
||||
# Value of padding size
|
||||
_C.INPUT.DO_PAD = True
|
||||
_C.INPUT.PADDING_MODE = 'zeros'
|
||||
_C.INPUT.PADDING = 10
|
||||
|
||||
# Random lightning and contrast change
|
||||
_C.INPUT.DO_LIGHTING = True
|
||||
_C.INPUT.MAX_LIGHTING = 0.2
|
||||
_C.INPUT.P_LIGHTING=0.75
|
||||
# -----------------------------------------------------------------------------
|
||||
# Dataset
|
||||
# -----------------------------------------------------------------------------
|
||||
|
|
|
@ -5,7 +5,7 @@ MODEL:
|
|||
INPUT:
|
||||
SIZE_TRAIN: [256, 128]
|
||||
SIZE_TEST: [256, 128]
|
||||
PROB: 0.5 # random horizontal flip
|
||||
FLIP_PROB: 0.5 # random horizontal flip
|
||||
PADDING: 10
|
||||
|
||||
DATASETS:
|
||||
|
|
|
@ -9,19 +9,14 @@ import os
|
|||
import re
|
||||
|
||||
from fastai.vision import *
|
||||
from .transforms import RandomErasing
|
||||
from .samplers import RandomIdentitySampler
|
||||
|
||||
from .datasets import CUHK03
|
||||
from .samplers import RandomIdentitySampler
|
||||
from .transforms import build_transforms
|
||||
|
||||
|
||||
def get_data_bunch(cfg):
|
||||
ds_tfms = (
|
||||
[flip_lr(p=0.5),
|
||||
*rand_pad(padding=cfg.INPUT.PADDING, size=cfg.INPUT.SIZE_TRAIN, mode='zeros'),
|
||||
RandomErasing()
|
||||
],
|
||||
None
|
||||
)
|
||||
ds_tfms = build_transforms(cfg)
|
||||
|
||||
def _process_dir(dir_path, recursive=False):
|
||||
img_paths = []
|
||||
|
@ -67,6 +62,8 @@ def get_data_bunch(cfg):
|
|||
|
||||
query_names = _process_dir(bj_query_path)
|
||||
gallery_names = _process_dir(bj_gallery_path, True)
|
||||
# query_names = _process_dir(market_query_path)
|
||||
# gallery_names = _process_dir(marker_gallery_path)
|
||||
|
||||
test_fnames = []
|
||||
test_labels = []
|
||||
|
@ -97,3 +94,4 @@ def get_data_bunch(cfg):
|
|||
data_bunch.normalize(imagenet_stats)
|
||||
|
||||
return data_bunch, test_labels, len(query_names)
|
||||
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
# encoding: utf-8
|
||||
"""
|
||||
@author: liaoxingyu
|
||||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from torch.backends import cudnn
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
|
||||
def eval_roc(distmat, q_pids, g_pids, q_cmaids, g_camids, t_start=0.1, t_end=0.9):
|
||||
# sort cosine dist from large to small
|
||||
indices = np.argsort(distmat, axis=1)[:, ::-1]
|
||||
# query id and gallery id match
|
||||
matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32)
|
||||
|
||||
new_dist = []
|
||||
new_matches = []
|
||||
# Remove the same identity in the same camera.
|
||||
num_q = distmat.shape[0]
|
||||
for q_idx in range(num_q):
|
||||
q_pid = q_pids[q_idx]
|
||||
q_camid = q_cmaids[q_idx]
|
||||
|
||||
order = indices[q_idx]
|
||||
remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)
|
||||
keep = np.invert(remove)
|
||||
new_matches.extend(matches[q_idx][keep].tolist())
|
||||
new_dist.extend(distmat[q_idx][indices[q_idx]][keep].tolist())
|
||||
|
||||
fpr = []
|
||||
tpr = []
|
||||
fps = []
|
||||
tps = []
|
||||
thresholds = np.arange(t_start, t_end, 0.02)
|
||||
|
||||
# get number of positive and negative examples in the dataset
|
||||
p = sum(new_matches)
|
||||
n = len(new_matches) - p
|
||||
|
||||
# iteration through all thresholds and determine fraction of true positives
|
||||
# and false positives found at this threshold
|
||||
for t in thresholds:
|
||||
fp = 0
|
||||
tp = 0
|
||||
for i in range(len(new_dist)):
|
||||
if new_dist[i] > t:
|
||||
if new_matches[i] == 1:
|
||||
tp += 1
|
||||
else:
|
||||
fp += 1
|
||||
fpr.append(fp / float(n))
|
||||
tpr.append(tp / float(p))
|
||||
fps.append(fp)
|
||||
tps.append(tp)
|
||||
return fpr, tpr, fps, tps, p, n, thresholds
|
|
@ -4,4 +4,5 @@
|
|||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
|
||||
from .transforms import *
|
||||
|
||||
from .build import build_transforms
|
|
@ -0,0 +1,25 @@
|
|||
# encoding: utf-8
|
||||
"""
|
||||
@author: liaoxingyu
|
||||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
from .transforms import *
|
||||
from fastai.vision.transform import *
|
||||
|
||||
|
||||
def build_transforms(cfg):
|
||||
# do_flip:bool=True, max_rotate:float=10., max_zoom:float=1.1,
|
||||
# max_lighting:float=0.2, max_warp:float=0.2, p_affine:float=0.75,
|
||||
# p_lighting:float=0.75, xtra_tfms:Optional[Collection[Transform]]=None):
|
||||
"Utility func to easily create a list of flip, rotate, `zoom`, warp, lighting transforms."
|
||||
res = []
|
||||
if cfg.INPUT.DO_FLIP: res.append(flip_lr(p=cfg.INPUT.FLIP_PROB))
|
||||
if cfg.INPUT.DO_PAD: res.extend(rand_pad(padding=cfg.INPUT.PADDING,
|
||||
size=cfg.INPUT.SIZE_TRAIN,
|
||||
mode=cfg.INPUT.PADDING_MODE))
|
||||
if cfg.INPUT.DO_LIGHTING:
|
||||
res.append(brightness(change=(0.5*(1-cfg.INPUT.MAX_LIGHTING), 0.5*(1+cfg.INPUT.MAX_LIGHTING)), p=cfg.INPUT.P_LIGHTING))
|
||||
res.append(contrast(scale=(1-cfg.INPUT.MAX_LIGHTING, 1/(1-cfg.INPUT.MAX_LIGHTING)), p=cfg.INPUT.P_LIGHTING))
|
||||
res.append(RandomErasing())
|
||||
# train , valid
|
||||
return (res, [crop_pad()])
|
|
@ -6,9 +6,11 @@
|
|||
|
||||
|
||||
import numpy as np
|
||||
|
||||
from fastai.vision import *
|
||||
from fastai.vision.image import *
|
||||
|
||||
__all__ = ['RandomErasing']
|
||||
|
||||
def _random_erasing(x, probability=0.5, sl=0.02, sh=0.4, r1=0.3,
|
||||
mean=(np.array(imagenet_stats[1]) + 1) * imagenet_stats[0]):
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
import logging
|
||||
|
||||
import torch
|
||||
import numpy as np
|
||||
import torch.nn.functional as F
|
||||
from data.datasets.eval_reid import evaluate
|
||||
from data.datasets.eval_threshold import eval_roc
|
||||
|
@ -62,7 +63,7 @@ def inference(
|
|||
|
||||
# Compute ROC and AUC
|
||||
logger.info("Compute ROC Curve...")
|
||||
fpr, tpr, fps, tps, p, n, thresholds = eval_roc(distmat, q_pids, g_pids, q_camids, g_camids)
|
||||
fpr, tpr, fps, tps, p, n, thresholds = eval_roc(distmat, q_pids, g_pids, q_camids, g_camids, 0.1, 0.5)
|
||||
logger.info("positive samples: {}, negative samples: {}".format(p, n))
|
||||
for i, thresh in enumerate(thresholds):
|
||||
logger.info("threshold: {:.2f}, FP: {:.0f}({:.3f}), TP: {:.0f}({:.3f})".
|
||||
|
|
|
@ -10,60 +10,126 @@ import matplotlib.pyplot as plt
|
|||
from fastai.train import *
|
||||
from fastai.torch_core import *
|
||||
from fastai.basic_data import *
|
||||
from fastai.basic_train import Learner
|
||||
from fastai.vision import *
|
||||
|
||||
|
||||
class ReidInterpretation(Interpretation):
|
||||
class ReidInterpretation():
|
||||
"Interpretation methods for reid models."
|
||||
def __init__(self, learn, preds, y_true, losses, ds_type=DatasetType.Valid):
|
||||
super().__init__(learn, preds, y_true, losses, ds_type=ds_type)
|
||||
def __init__(self, learn, test_labels, num_q):
|
||||
self.test_labels,self.num_q = test_labels,num_q
|
||||
self.test_dl = learn.data.test_dl
|
||||
self.model = learn.model
|
||||
|
||||
self.get_distmat()
|
||||
|
||||
def get_distmat(self, test_labels, num_query):
|
||||
def get_distmat(self):
|
||||
self.model.eval()
|
||||
feats = []
|
||||
pids = []
|
||||
camids = []
|
||||
for p, c in test_labels:
|
||||
for p,c in self.test_labels:
|
||||
pids.append(p)
|
||||
camids.append(c)
|
||||
self.q_pids = np.asarray(pids[:num_query])
|
||||
self.g_pids = np.asarray(pids[num_query:])
|
||||
self.q_camids = np.asarray(camids[:num_query])
|
||||
self.g_camids = np.asarray(camids[num_query:])
|
||||
self.q_pids = np.asarray(pids[:self.num_q])
|
||||
self.g_pids = np.asarray(pids[self.num_q:])
|
||||
self.q_camids = np.asarray(camids[:self.num_q])
|
||||
self.g_camids = np.asarray(camids[self.num_q:])
|
||||
|
||||
qf = self.preds[:num_query]
|
||||
gf = self.preds[num_query:]
|
||||
for imgs, _ in self.test_dl:
|
||||
with torch.no_grad():
|
||||
feat = self.model(imgs)
|
||||
feats.append(feat)
|
||||
feats = torch.cat(feats, dim=0)
|
||||
feats = F.normalize(feats)
|
||||
qf = feats[:self.num_q]
|
||||
gf = feats[self.num_q:]
|
||||
m, n = qf.shape[0], gf.shape[0]
|
||||
self.num_q=num_query
|
||||
|
||||
# Cosine distance
|
||||
distmat = torch.mm(F.normalize(qf), F.normalize(gf).t())
|
||||
distmat = torch.mm(qf, gf.t())
|
||||
self.distmat = to_np(distmat)
|
||||
|
||||
self.indices = np.argsort(self.distmat, axis=1)[:, ::-1]
|
||||
self.indices = np.argsort(-self.distmat, axis=1)
|
||||
self.matches = (self.g_pids[self.indices] == self.q_pids[:, np.newaxis]).astype(np.int32)
|
||||
|
||||
def plot_rank_result(self, q_idx, top=5, title="Rank result"):
|
||||
q_pid = self.q_pids[q_idx]
|
||||
q_camid = self.q_camids[q_idx]
|
||||
def get_matched_result(self, q_index):
|
||||
q_pid = self.q_pids[q_index]
|
||||
q_camid = self.q_camids[q_index]
|
||||
|
||||
order = self.indices[q_idx]
|
||||
order = self.indices[q_index]
|
||||
remove = (self.g_pids[order] == q_pid) & (self.g_camids[order] == q_camid)
|
||||
keep = np.invert(remove)
|
||||
cmc = self.matches[q_index][keep]
|
||||
matched_idx = order[keep]
|
||||
return cmc, matched_idx
|
||||
|
||||
def plot_rank_result(self, q_idx, top=5, title="Rank result"):
|
||||
cmc,matched_idx = self.get_matched_result(q_idx)
|
||||
|
||||
raw_cmc = self.matches[q_idx][keep]
|
||||
matched_idx = self.indices[q_idx][keep]
|
||||
|
||||
fig,axes = plt.subplots(1, top+1, figsize=(12,5))
|
||||
fig.suptitle('query/sim/true(false)')
|
||||
query_im,cl=self.learn.data.dl(DatasetType.Test).dataset[q_idx]
|
||||
query_im.show(ax=axes.flat[0],title='query')
|
||||
fig,axes = plt.subplots(1, top+1, figsize=(15, 5))
|
||||
fig.suptitle('query similarity/true(false)')
|
||||
query_im,cl=self.test_dl.dataset[q_idx]
|
||||
query_im.show(ax=axes.flat[0], title='query')
|
||||
for i in range(top):
|
||||
if raw_cmc[i] == 1:
|
||||
g_idx = self.num_q + matched_idx[i]
|
||||
im,cl = self.test_dl.dataset[g_idx]
|
||||
if cmc[i] == 1:
|
||||
label='true'
|
||||
axes.flat[i+1].add_patch(plt.Rectangle(xy=(0, 0), width=im.size[1]-1, height=im.size[0]-1,
|
||||
edgecolor=(1, 0, 0), fill=False, linewidth=5))
|
||||
else:
|
||||
label='false'
|
||||
im_idx=self.num_q+matched_idx[i]+1
|
||||
im,cl = self.learn.data.dl(DatasetType.Test).dataset[im_idx]
|
||||
im.show(ax=axes.flat[i+1],title=f'{self.distmat[q_idx, im_idx]:.3f} / {label}')
|
||||
axes.flat[i+1].add_patch(plt.Rectangle(xy=(0, 0), width=im.size[1]-1, height=im.size[0]-1,
|
||||
edgecolor=(0, 0, 1), fill=False, linewidth=5))
|
||||
im.show(ax=axes.flat[i+1], title=f'{self.distmat[q_idx, matched_idx[i]]:.3f} / {label}')
|
||||
return fig
|
||||
|
||||
def get_top_error(self):
|
||||
# Iteration over query ids and store query gallery similarity
|
||||
similarity_score = namedtuple('similarityScore', 'query gallery sim cmc')
|
||||
storeCorrect = []
|
||||
storeWrong = []
|
||||
for q_index in range(self.num_q):
|
||||
cmc,matched_idx = self.get_matched_result(q_index)
|
||||
single_item = similarity_score(query=q_index, gallery=[self.num_q + matched_idx[i] for i in range(5)],
|
||||
sim=[self.distmat[q_index, matched_idx[i]] for i in range(5)],
|
||||
cmc=cmc[:5])
|
||||
if cmc[0] == 1:
|
||||
storeCorrect.append(single_item)
|
||||
else:
|
||||
storeWrong.append(single_item)
|
||||
storeCorrect.sort(key=lambda x: x.sim[0])
|
||||
storeWrong.sort(key=lambda x: x.sim[0], reverse=True)
|
||||
|
||||
self.storeCorrect = storeCorrect
|
||||
self.storeWrong = storeWrong
|
||||
|
||||
def plot_top_error(self, topK=5, positive=True):
|
||||
if not hasattr(self, 'storeCorrect'):
|
||||
self.get_top_error()
|
||||
|
||||
if positive:
|
||||
img_list = self.storeCorrect
|
||||
else:
|
||||
img_list = self.storeWrong
|
||||
# Rank top error results, which means negative sample with largest similarity
|
||||
# and positive sample with smallest similarity
|
||||
fig,axes = plt.subplots(topK, 6, figsize=(15, 4*topK))
|
||||
fig.suptitle('query similarity/true(false)')
|
||||
for i in range(topK):
|
||||
q_idx,g_idxs,sim,cmc = img_list[i]
|
||||
query_im,cl = self.test_dl.dataset[q_idx]
|
||||
query_im.show(ax=axes[i, 0], title='query')
|
||||
for j,g_idx in enumerate(g_idxs):
|
||||
im,cl = self.test_dl.dataset[g_idx]
|
||||
if cmc[j] == 1:
|
||||
label='true'
|
||||
axes[i,j+1].add_patch(plt.Rectangle(xy=(0, 0), width=im.size[1]-1, height=im.size[0]-1,
|
||||
edgecolor=(1, 0, 0), fill=False, linewidth=5))
|
||||
else:
|
||||
label='false'
|
||||
axes[i, j+1].add_patch(plt.Rectangle(xy=(0, 0), width=im.size[1]-1, height=im.size[0]-1,
|
||||
edgecolor=(0, 0, 1), fill=False, linewidth=5))
|
||||
im.show(ax=axes[i,j+1], title=f'{sim[j]:.3f} / {label}')
|
||||
|
||||
return fig
|
|
@ -4,14 +4,15 @@
|
|||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import os
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import torch.nn.functional as F
|
||||
|
||||
from data.datasets.eval_reid import evaluate
|
||||
from fastai.basic_data import DatasetType
|
||||
from fastai.vision import *
|
||||
import torch.nn.functional as F
|
||||
import matplotlib.pyplot as plt
|
||||
from fastai.basic_train import Learner
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@ -20,8 +21,8 @@ class TrackValue(Callback):
|
|||
total_iter: int
|
||||
|
||||
# def on_batch_end(self, num_batch, last_loss, **kwargs):
|
||||
# if (num_batch+1) % (self.total_iter//3) == 0:
|
||||
# self.logger.info('Iter [{}/{}], loss: {:.4f}'.format(num_batch, self.total_iter, last_loss.item()))
|
||||
# if (num_batch+1) % (self.total_iter//3) == 0:
|
||||
# self.logger.info('Iter [{}/{}], loss: {:.4f}'.format(num_batch, self.total_iter, last_loss.item()))
|
||||
|
||||
def on_epoch_end(self, epoch, smooth_loss, **kwargs):
|
||||
self.logger.info('Epoch {}[Iter {}], loss: {:.4f}'.format(epoch, self.total_iter, smooth_loss.item()))
|
||||
|
@ -116,8 +117,8 @@ def do_train(
|
|||
partial(TestModel, test_labels=test_labels, eval_period=eval_period, num_query=num_query, logger=logger)],
|
||||
callbacks=[TrackValue(logger, total_iter)])
|
||||
|
||||
learn.fit(epochs, wd=cfg.SOLVER.WEIGHT_DECAY)
|
||||
learn.fit(epochs, lr=cfg.SOLVER.BASE_LR, wd=cfg.SOLVER.WEIGHT_DECAY)
|
||||
learn.recorder.plot_losses()
|
||||
plt.savefig(os.path.join(output_dir, "loss.jpg"))
|
||||
learn.recorder.plot_lr()
|
||||
plt.savefig(os.path.join(output_dir, "lr.jpg"))
|
||||
plt.savefig(os.path.join(output_dir, "lr.jpg"))
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
gpu=1
|
||||
gpu=0
|
||||
|
||||
CUDA_VISIBLE_DEVICES=$gpu python tools/test.py -cfg='configs/softmax_triplet.yml' \
|
||||
INPUT.SIZE_TRAIN '(256, 128)' \
|
||||
DATASETS.NAMES '("market1501","duke","beijing")' \
|
||||
OUTPUT_DIR 'logs/test' \
|
||||
TEST.WEIGHT 'logs/beijing/market+duke+bj/models/model_149.pth'
|
||||
TEST.WEIGHT 'logs/beijing/market_duke_cuhk03_beijing_revise_bs64/models/model_149.pth'
|
|
@ -2,4 +2,4 @@ gpu=3
|
|||
|
||||
CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \
|
||||
DATASETS.NAMES '("market1501",)' \
|
||||
OUTPUT_DIR 'logs/market/softmax_triplet_256_128_bs512'
|
||||
OUTPUT_DIR 'logs/market/adabound_256_128_bs64'
|
|
@ -1,6 +1,6 @@
|
|||
gpu=2
|
||||
gpu=0
|
||||
|
||||
CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \
|
||||
DATASETS.NAMES '("market1501","duke","cuhk03","beijing")' \
|
||||
SOLVER.IMS_PER_BATCH '64' \
|
||||
OUTPUT_DIR 'logs/beijing/market_duke_cuhk03_beijing_bs64'
|
||||
OUTPUT_DIR 'logs/beijing/market_duke_cuhk03_beijing_revise_bs64_light'
|
|
@ -4,4 +4,4 @@
|
|||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
|
||||
from .build import *
|
||||
from .adabound import *
|
|
@ -0,0 +1,239 @@
|
|||
import math
|
||||
import torch
|
||||
from torch.optim import Optimizer
|
||||
|
||||
|
||||
__all__ = ['AdaBound', 'AdaBoundW']
|
||||
|
||||
class AdaBound(Optimizer):
|
||||
"""Implements AdaBound algorithm.
|
||||
It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
|
||||
Arguments:
|
||||
params (iterable): iterable of parameters to optimize or dicts defining
|
||||
parameter groups
|
||||
lr (float, optional): Adam learning rate (default: 1e-3)
|
||||
betas (Tuple[float, float], optional): coefficients used for computing
|
||||
running averages of gradient and its square (default: (0.9, 0.999))
|
||||
final_lr (float, optional): final (SGD) learning rate (default: 0.1)
|
||||
gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
|
||||
eps (float, optional): term added to the denominator to improve
|
||||
numerical stability (default: 1e-8)
|
||||
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
|
||||
amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
|
||||
.. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
|
||||
https://openreview.net/forum?id=Bkg3g2R9FX
|
||||
"""
|
||||
|
||||
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
|
||||
eps=1e-8, weight_decay=0, amsbound=False):
|
||||
if not 0.0 <= lr:
|
||||
raise ValueError("Invalid learning rate: {}".format(lr))
|
||||
if not 0.0 <= eps:
|
||||
raise ValueError("Invalid epsilon value: {}".format(eps))
|
||||
if not 0.0 <= betas[0] < 1.0:
|
||||
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
|
||||
if not 0.0 <= betas[1] < 1.0:
|
||||
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
|
||||
if not 0.0 <= final_lr:
|
||||
raise ValueError("Invalid final learning rate: {}".format(final_lr))
|
||||
if not 0.0 <= gamma < 1.0:
|
||||
raise ValueError("Invalid gamma parameter: {}".format(gamma))
|
||||
defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
|
||||
weight_decay=weight_decay, amsbound=amsbound)
|
||||
super(AdaBound, self).__init__(params, defaults)
|
||||
|
||||
self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
|
||||
|
||||
def __setstate__(self, state):
|
||||
super(AdaBound, self).__setstate__(state)
|
||||
for group in self.param_groups:
|
||||
group.setdefault('amsbound', False)
|
||||
|
||||
def step(self, closure=None):
|
||||
"""Performs a single optimization step.
|
||||
Arguments:
|
||||
closure (callable, optional): A closure that reevaluates the model
|
||||
and returns the loss.
|
||||
"""
|
||||
loss = None
|
||||
if closure is not None:
|
||||
loss = closure()
|
||||
|
||||
for group, base_lr in zip(self.param_groups, self.base_lrs):
|
||||
base_lr = group['lr']
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad.data
|
||||
if grad.is_sparse:
|
||||
raise RuntimeError(
|
||||
'Adam does not support sparse gradients, please consider SparseAdam instead')
|
||||
amsbound = group['amsbound']
|
||||
|
||||
state = self.state[p]
|
||||
|
||||
# State initialization
|
||||
if len(state) == 0:
|
||||
state['step'] = 0
|
||||
# Exponential moving average of gradient values
|
||||
state['exp_avg'] = torch.zeros_like(p.data)
|
||||
# Exponential moving average of squared gradient values
|
||||
state['exp_avg_sq'] = torch.zeros_like(p.data)
|
||||
if amsbound:
|
||||
# Maintains max of all exp. moving avg. of sq. grad. values
|
||||
state['max_exp_avg_sq'] = torch.zeros_like(p.data)
|
||||
|
||||
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
|
||||
if amsbound:
|
||||
max_exp_avg_sq = state['max_exp_avg_sq']
|
||||
beta1, beta2 = group['betas']
|
||||
|
||||
state['step'] += 1
|
||||
|
||||
if group['weight_decay'] != 0:
|
||||
grad = grad.add(group['weight_decay'], p.data)
|
||||
|
||||
# Decay the first and second moment running average coefficient
|
||||
exp_avg.mul_(beta1).add_(1 - beta1, grad)
|
||||
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
|
||||
if amsbound:
|
||||
# Maintains the maximum of all 2nd moment running avg. till now
|
||||
torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
|
||||
# Use the max. for normalizing running avg. of gradient
|
||||
denom = max_exp_avg_sq.sqrt().add_(group['eps'])
|
||||
else:
|
||||
denom = exp_avg_sq.sqrt().add_(group['eps'])
|
||||
|
||||
bias_correction1 = 1 - beta1 ** state['step']
|
||||
bias_correction2 = 1 - beta2 ** state['step']
|
||||
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
|
||||
|
||||
# Applies bounds on actual learning rate
|
||||
# lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
|
||||
final_lr = group['final_lr'] * group['lr'] / base_lr
|
||||
lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
|
||||
upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
|
||||
step_size = torch.full_like(denom, step_size)
|
||||
step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
|
||||
|
||||
p.data.add_(-step_size)
|
||||
|
||||
return loss
|
||||
|
||||
|
||||
class AdaBoundW(Optimizer):
|
||||
"""Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101)
|
||||
It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
|
||||
Arguments:
|
||||
params (iterable): iterable of parameters to optimize or dicts defining
|
||||
parameter groups
|
||||
lr (float, optional): Adam learning rate (default: 1e-3)
|
||||
betas (Tuple[float, float], optional): coefficients used for computing
|
||||
running averages of gradient and its square (default: (0.9, 0.999))
|
||||
final_lr (float, optional): final (SGD) learning rate (default: 0.1)
|
||||
gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
|
||||
eps (float, optional): term added to the denominator to improve
|
||||
numerical stability (default: 1e-8)
|
||||
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
|
||||
amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
|
||||
.. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
|
||||
https://openreview.net/forum?id=Bkg3g2R9FX
|
||||
"""
|
||||
|
||||
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
|
||||
eps=1e-8, weight_decay=0, amsbound=False):
|
||||
if not 0.0 <= lr:
|
||||
raise ValueError("Invalid learning rate: {}".format(lr))
|
||||
if not 0.0 <= eps:
|
||||
raise ValueError("Invalid epsilon value: {}".format(eps))
|
||||
if not 0.0 <= betas[0] < 1.0:
|
||||
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
|
||||
if not 0.0 <= betas[1] < 1.0:
|
||||
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
|
||||
if not 0.0 <= final_lr:
|
||||
raise ValueError("Invalid final learning rate: {}".format(final_lr))
|
||||
if not 0.0 <= gamma < 1.0:
|
||||
raise ValueError("Invalid gamma parameter: {}".format(gamma))
|
||||
defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
|
||||
weight_decay=weight_decay, amsbound=amsbound)
|
||||
super(AdaBoundW, self).__init__(params, defaults)
|
||||
|
||||
self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
|
||||
|
||||
def __setstate__(self, state):
|
||||
super(AdaBoundW, self).__setstate__(state)
|
||||
for group in self.param_groups:
|
||||
group.setdefault('amsbound', False)
|
||||
|
||||
def step(self, closure=None):
|
||||
"""Performs a single optimization step.
|
||||
Arguments:
|
||||
closure (callable, optional): A closure that reevaluates the model
|
||||
and returns the loss.
|
||||
"""
|
||||
loss = None
|
||||
if closure is not None:
|
||||
loss = closure()
|
||||
|
||||
for group, base_lr in zip(self.param_groups, self.base_lrs):
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad.data
|
||||
if grad.is_sparse:
|
||||
raise RuntimeError(
|
||||
'Adam does not support sparse gradients, please consider SparseAdam instead')
|
||||
amsbound = group['amsbound']
|
||||
|
||||
state = self.state[p]
|
||||
|
||||
# State initialization
|
||||
if len(state) == 0:
|
||||
state['step'] = 0
|
||||
# Exponential moving average of gradient values
|
||||
state['exp_avg'] = torch.zeros_like(p.data)
|
||||
# Exponential moving average of squared gradient values
|
||||
state['exp_avg_sq'] = torch.zeros_like(p.data)
|
||||
if amsbound:
|
||||
# Maintains max of all exp. moving avg. of sq. grad. values
|
||||
state['max_exp_avg_sq'] = torch.zeros_like(p.data)
|
||||
|
||||
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
|
||||
if amsbound:
|
||||
max_exp_avg_sq = state['max_exp_avg_sq']
|
||||
beta1, beta2 = group['betas']
|
||||
|
||||
state['step'] += 1
|
||||
|
||||
# Decay the first and second moment running average coefficient
|
||||
exp_avg.mul_(beta1).add_(1 - beta1, grad)
|
||||
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
|
||||
if amsbound:
|
||||
# Maintains the maximum of all 2nd moment running avg. till now
|
||||
torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
|
||||
# Use the max. for normalizing running avg. of gradient
|
||||
denom = max_exp_avg_sq.sqrt().add_(group['eps'])
|
||||
else:
|
||||
denom = exp_avg_sq.sqrt().add_(group['eps'])
|
||||
|
||||
bias_correction1 = 1 - beta1 ** state['step']
|
||||
bias_correction2 = 1 - beta2 ** state['step']
|
||||
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
|
||||
|
||||
# Applies bounds on actual learning rate
|
||||
# lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
|
||||
final_lr = group['final_lr'] * group['lr'] / base_lr
|
||||
lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
|
||||
upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
|
||||
step_size = torch.full_like(denom, step_size)
|
||||
step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
|
||||
|
||||
if group['weight_decay'] != 0:
|
||||
decayed_weights = torch.mul(p.data, group['weight_decay'])
|
||||
p.data.add_(-step_size)
|
||||
p.data.sub_(decayed_weights)
|
||||
else:
|
||||
p.data.add_(-step_size)
|
||||
|
||||
return loss
|
||||
|
|
@ -5,10 +5,18 @@
|
|||
"""
|
||||
|
||||
import sys
|
||||
from fastai.vision import *
|
||||
sys.path.append('.')
|
||||
from data import get_data_bunch
|
||||
from config import cfg
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
data = get_data_bunch(cfg)
|
||||
# cfg.INPUT.SIZE_TRAIN = (384, 128)
|
||||
data, label, num_q = get_data_bunch(cfg)
|
||||
# def get_ex(): return open_image('datasets/beijingStation/query/000245_c10s2_1561732033722.000000.jpg')
|
||||
# im = get_ex()
|
||||
print(data.train_ds[0])
|
||||
print(data.test_ds[0])
|
||||
from ipdb import set_trace; set_trace()
|
||||
# im.apply_tfms(crop_pad(size=(300, 300)))
|
||||
|
|
|
@ -52,7 +52,7 @@ def main():
|
|||
data_bunch, test_labels, num_query = get_data_bunch(cfg)
|
||||
model = build_model(cfg, data_bunch.c)
|
||||
state_dict = torch.load(cfg.TEST.WEIGHT)
|
||||
model.load_state_dict(state_dict['model'])
|
||||
model.load_params_wo_fc(state_dict['model'])
|
||||
model.cuda()
|
||||
# model = torch.jit.load("/export/home/lxy/reid_baseline/pcb_model_v0.2.pt")
|
||||
|
||||
|
|
|
@ -5,20 +5,22 @@
|
|||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import os
|
||||
import sys
|
||||
from bisect import bisect_right
|
||||
|
||||
from torch.backends import cudnn
|
||||
|
||||
sys.path.append('.')
|
||||
import sys
|
||||
sys.path.append(".")
|
||||
from config import cfg
|
||||
from data import get_data_bunch
|
||||
from engine.trainer import do_train
|
||||
from fastai.vision import *
|
||||
from layers import make_loss
|
||||
from modeling import build_model
|
||||
from solver import *
|
||||
from utils.logger import setup_logger
|
||||
from fastai.vision import *
|
||||
|
||||
|
||||
def train(cfg):
|
||||
|
@ -27,8 +29,6 @@ def train(cfg):
|
|||
|
||||
# prepare model
|
||||
model = build_model(cfg, data_bunch.c)
|
||||
# state_dict = torch.load("logs/beijing/market_duke_softmax_triplet_256_128_bs256/models/model_149.pth")
|
||||
# model.load_params_wo_fc(state_dict['model'])
|
||||
|
||||
opt_func = partial(torch.optim.Adam)
|
||||
|
||||
|
@ -42,7 +42,8 @@ def train(cfg):
|
|||
warmup_factor = cfg.SOLVER.WARMUP_FACTOR * (1 - alpha) + alpha
|
||||
return start * warmup_factor * gamma ** bisect_right(milestones, pct)
|
||||
|
||||
lr = cfg.SOLVER.BASE_LR * (cfg.SOLVER.IMS_PER_BATCH // 64)
|
||||
# lr = cfg.SOLVER.BASE_LR * (cfg.SOLVER.IMS_PER_BATCH // 64)
|
||||
lr = cfg.SOLVER.BASE_LR
|
||||
lr_sched = Scheduler(lr, cfg.SOLVER.MAX_EPOCHS, warmup_multistep)
|
||||
|
||||
loss_func = make_loss(cfg)
|
||||
|
|
Loading…
Reference in New Issue