mirror of https://github.com/JDAI-CV/fast-reid.git
release ready
parent
88dfd71a76
commit
5fd586492f
|
@ -5,3 +5,4 @@ __pycache__
|
|||
csrc/eval_cylib/build/
|
||||
logs/
|
||||
.ipynb_checkpoints
|
||||
tests/
|
|
@ -44,9 +44,7 @@ _C.INPUT.PADDING = 10
|
|||
_C.INPUT.DO_LIGHTING = False
|
||||
_C.INPUT.MAX_LIGHTING = 0.2
|
||||
_C.INPUT.P_LIGHTING=0.75
|
||||
# Mixup
|
||||
_C.INPUT.MIXUP = False
|
||||
_C.INPUT.MIXUP_ALPHA = 0.4
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Dataset
|
||||
# -----------------------------------------------------------------------------
|
||||
|
|
|
@ -18,14 +18,9 @@ from .transforms import build_transforms
|
|||
def get_data_bunch(cfg):
|
||||
ds_tfms = build_transforms(cfg)
|
||||
|
||||
def _process_dir(dir_path, recursive=False):
|
||||
def _process_dir(dir_path):
|
||||
img_paths = []
|
||||
if recursive:
|
||||
id_dirs = os.listdir(dir_path)
|
||||
for d in id_dirs:
|
||||
img_paths.extend(glob.glob(os.path.join(dir_path, d, '*.jpg')))
|
||||
else:
|
||||
img_paths = glob.glob(os.path.join(dir_path, '*.jpg'))
|
||||
img_paths = glob.glob(os.path.join(dir_path, '*.jpg'))
|
||||
pattern = re.compile(r'([-\d]+)_c(\d*)')
|
||||
v_paths = []
|
||||
for img_path in img_paths:
|
||||
|
@ -38,12 +33,9 @@ def get_data_bunch(cfg):
|
|||
market_train_path = 'datasets/Market-1501-v15.09.15/bounding_box_train'
|
||||
duke_train_path = 'datasets/DukeMTMC-reID/bounding_box_train'
|
||||
cuhk03_train_path = 'datasets/cuhk03/'
|
||||
bjStation_train_path = 'datasets/beijingStation/20190720/train'
|
||||
|
||||
market_query_path = 'datasets/Market-1501-v15.09.15/query'
|
||||
marker_gallery_path = 'datasets/Market-1501-v15.09.15/bounding_box_test'
|
||||
bj_query_path = 'datasets/beijingStation/query'
|
||||
bj_gallery_path = 'datasets/beijingStation/test'
|
||||
|
||||
train_img_names = list()
|
||||
for d in cfg.DATASETS.NAMES:
|
||||
|
@ -51,21 +43,16 @@ def get_data_bunch(cfg):
|
|||
train_img_names.extend(_process_dir(market_train_path))
|
||||
elif d == 'duke':
|
||||
train_img_names.extend(_process_dir(duke_train_path))
|
||||
elif d == 'beijing':
|
||||
train_img_names.extend(_process_dir(bjStation_train_path, True))
|
||||
elif d == 'cuhk03':
|
||||
train_img_names.extend(CUHK03().train)
|
||||
else:
|
||||
raise NameError("{} is not available".format(d))
|
||||
raise NameError(f'{d} is not available')
|
||||
|
||||
train_names = [i[0] for i in train_img_names]
|
||||
|
||||
if cfg.DATASETS.TEST_NAMES == "market1501":
|
||||
query_names = _process_dir(market_query_path)
|
||||
gallery_names = _process_dir(marker_gallery_path)
|
||||
elif cfg.DATASETS.TEST_NAMES == "bj":
|
||||
query_names = _process_dir(bj_query_path)
|
||||
gallery_names = _process_dir(bj_gallery_path, True)
|
||||
else:
|
||||
print(f"not support {cfg.DATASETS.TEST_NAMES} test set")
|
||||
|
||||
|
@ -98,4 +85,3 @@ def get_data_bunch(cfg):
|
|||
data_bunch.normalize(imagenet_stats)
|
||||
|
||||
return data_bunch, test_labels, len(query_names)
|
||||
|
||||
|
|
|
@ -1,64 +0,0 @@
|
|||
# encoding: utf-8
|
||||
"""
|
||||
@author: liaoxingyu
|
||||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from torch.backends import cudnn
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
|
||||
def eval_roc(distmat, q_pids, g_pids, q_cmaids, g_camids, t_start=0.1, t_end=0.9):
|
||||
# sort cosine dist from large to small
|
||||
indices = np.argsort(distmat, axis=1)[:, ::-1]
|
||||
# query id and gallery id match
|
||||
matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32)
|
||||
|
||||
new_dist = []
|
||||
new_matches = []
|
||||
# Remove the same identity in the same camera.
|
||||
num_q = distmat.shape[0]
|
||||
for q_idx in range(num_q):
|
||||
q_pid = q_pids[q_idx]
|
||||
q_camid = q_cmaids[q_idx]
|
||||
|
||||
order = indices[q_idx]
|
||||
remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)
|
||||
keep = np.invert(remove)
|
||||
new_matches.extend(matches[q_idx][keep].tolist())
|
||||
new_dist.extend(distmat[q_idx][indices[q_idx]][keep].tolist())
|
||||
|
||||
fpr = []
|
||||
tpr = []
|
||||
fps = []
|
||||
tps = []
|
||||
thresholds = np.arange(t_start, t_end, 0.02)
|
||||
|
||||
# get number of positive and negative examples in the dataset
|
||||
p = sum(new_matches)
|
||||
n = len(new_matches) - p
|
||||
|
||||
# iteration through all thresholds and determine fraction of true positives
|
||||
# and false positives found at this threshold
|
||||
for t in thresholds:
|
||||
fp = 0
|
||||
tp = 0
|
||||
for i in range(len(new_dist)):
|
||||
if new_dist[i] > t:
|
||||
if new_matches[i] == 1:
|
||||
tp += 1
|
||||
else:
|
||||
fp += 1
|
||||
fpr.append(fp / float(n))
|
||||
tpr.append(tp / float(p))
|
||||
fps.append(fp)
|
||||
tps.append(tp)
|
||||
return fpr, tpr, fps, tps, p, n, thresholds
|
17
demo.py
17
demo.py
|
@ -5,7 +5,6 @@
|
|||
"""
|
||||
|
||||
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
|
@ -27,19 +26,19 @@ class Reid(object):
|
|||
|
||||
# self.cfg = self.prepare('config/softmax_triplet.yml')
|
||||
# self.num_classes = 413
|
||||
self.model = Baseline(10, 1)
|
||||
state_dict = torch.load('logs/beijing/market+duke+bj_bs64/models/model_149.pth')
|
||||
self.model.load_params_wo_fc(state_dict['model'])
|
||||
self.model.cuda()
|
||||
self.model.eval()
|
||||
# self.model = torch.jit.load("reid_model_v0.1.pt")
|
||||
# self.model = Baseline('resnet50_ibn', 100, 1)
|
||||
# state_dict = torch.load('/export/home/lxy/reid_baseline/logs/2019.8.12/bj/ibn_lighting/models/model_119.pth')
|
||||
# self.model.load_params_wo_fc(state_dict['model'])
|
||||
# self.model.cuda()
|
||||
# self.model.eval()
|
||||
self.model = torch.jit.load("reid_model.pt")
|
||||
# self.model.eval()
|
||||
# self.model.cuda()
|
||||
|
||||
# example = torch.rand(1, 3, 384, 128)
|
||||
# example = torch.rand(1, 3, 256, 128)
|
||||
# example = example.cuda()
|
||||
# traced_script_module = torch.jit.trace(self.model, example)
|
||||
# traced_script_module.save("reid_model_v0.1.pt")
|
||||
# traced_script_module.save("reid_model.pt")
|
||||
|
||||
def demo(self, img_path):
|
||||
img = cv2.imread(img_path)
|
||||
|
|
|
@ -4,14 +4,9 @@
|
|||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import torch
|
||||
import matplotlib.pyplot as plt
|
||||
from fastai.train import *
|
||||
from fastai.torch_core import *
|
||||
from fastai.basic_data import *
|
||||
from fastai.basic_train import Learner
|
||||
from fastai.vision import *
|
||||
|
||||
|
||||
class ReidInterpretation():
|
||||
|
@ -60,18 +55,18 @@ class ReidInterpretation():
|
|||
remove = (self.g_pids[order] == q_pid) & (self.g_camids[order] == q_camid)
|
||||
keep = np.invert(remove)
|
||||
cmc = self.matches[q_index][keep]
|
||||
matched_idx = order[keep]
|
||||
return cmc, matched_idx
|
||||
sort_idx = order[keep]
|
||||
return cmc, sort_idx
|
||||
|
||||
def plot_rank_result(self, q_idx, top=5, title="Rank result"):
|
||||
cmc,matched_idx = self.get_matched_result(q_idx)
|
||||
cmc, sort_idx = self.get_matched_result(q_idx)
|
||||
|
||||
fig,axes = plt.subplots(1, top+1, figsize=(15, 5))
|
||||
fig.suptitle('query similarity/true(false)')
|
||||
query_im,cl=self.test_dl.dataset[q_idx]
|
||||
query_im.show(ax=axes.flat[0], title='query')
|
||||
for i in range(top):
|
||||
g_idx = self.num_q + matched_idx[i]
|
||||
g_idx = self.num_q + sort_idx[i]
|
||||
im,cl = self.test_dl.dataset[g_idx]
|
||||
if cmc[i] == 1:
|
||||
label='true'
|
||||
|
@ -81,7 +76,7 @@ class ReidInterpretation():
|
|||
label='false'
|
||||
axes.flat[i+1].add_patch(plt.Rectangle(xy=(0, 0), width=im.size[1]-1, height=im.size[0]-1,
|
||||
edgecolor=(0, 0, 1), fill=False, linewidth=5))
|
||||
im.show(ax=axes.flat[i+1], title=f'{self.distmat[q_idx, matched_idx[i]]:.3f} / {label}')
|
||||
im.show(ax=axes.flat[i+1], title=f'{self.distmat[q_idx, sort_idx[i]]:.3f} / {label}')
|
||||
return fig
|
||||
|
||||
def get_top_error(self):
|
||||
|
@ -90,9 +85,9 @@ class ReidInterpretation():
|
|||
storeCorrect = []
|
||||
storeWrong = []
|
||||
for q_index in range(self.num_q):
|
||||
cmc,matched_idx = self.get_matched_result(q_index)
|
||||
single_item = similarity_score(query=q_index, gallery=[self.num_q + matched_idx[i] for i in range(5)],
|
||||
sim=[self.distmat[q_index, matched_idx[i]] for i in range(5)],
|
||||
cmc, sort_idx = self.get_matched_result(q_index)
|
||||
single_item = similarity_score(query=q_index, gallery=[self.num_q + sort_idx[i] for i in range(5)],
|
||||
sim=[self.distmat[q_index, sort_idx[i]] for i in range(5)],
|
||||
cmc=cmc[:5])
|
||||
if cmc[0] == 1:
|
||||
storeCorrect.append(single_item)
|
||||
|
@ -125,11 +120,48 @@ class ReidInterpretation():
|
|||
if cmc[j] == 1:
|
||||
label='true'
|
||||
axes[i,j+1].add_patch(plt.Rectangle(xy=(0, 0), width=im.size[1]-1, height=im.size[0]-1,
|
||||
edgecolor=(1, 0, 0), fill=False, linewidth=5))
|
||||
edgecolor=(1, 0, 0), fill=False, linewidth=5))
|
||||
else:
|
||||
label='false'
|
||||
axes[i, j+1].add_patch(plt.Rectangle(xy=(0, 0), width=im.size[1]-1, height=im.size[0]-1,
|
||||
edgecolor=(0, 0, 1), fill=False, linewidth=5))
|
||||
edgecolor=(0, 0, 1), fill=False, linewidth=5))
|
||||
im.show(ax=axes[i,j+1], title=f'{sim[j]:.3f} / {label}')
|
||||
|
||||
return fig
|
||||
return fig
|
||||
|
||||
def plot_positve_negative_dist(self):
|
||||
pos_sim, neg_sim = [], []
|
||||
for i, q in enumerate(self.q_pids):
|
||||
cmc, sort_idx = self.get_matched_result(i) # remove same id in same camera
|
||||
for j in range(len(cmc)):
|
||||
if cmc[j] == 1:
|
||||
pos_sim.append(self.distmat[i,sort_idx[j]])
|
||||
else:
|
||||
neg_sim.append(self.distmat[i,sort_idx[j]])
|
||||
fig = plt.figure(figsize=(10,5))
|
||||
plt.hist(pos_sim, bins=80, alpha=0.7, density=True, color='red', label='positive')
|
||||
plt.hist(neg_sim, bins=80, alpha=0.5, density=True, color='blue', label='negative')
|
||||
plt.xticks(np.arange(-0.3, 0.8, 0.1))
|
||||
plt.title('posivie and negative pair distribution')
|
||||
return pos_sim, neg_sim
|
||||
|
||||
def plot_same_cam_diff_cam_dist(self):
|
||||
same_cam, diff_cam = [], []
|
||||
for i, q in enumerate(self.q_pids):
|
||||
q_camid = self.q_camids[i]
|
||||
|
||||
order = self.indices[i]
|
||||
same = (self.g_pids[order] == q) & (self.g_camids[order] == q_camid)
|
||||
diff = (self.g_pids[order] == q) & (self.g_camids[order] != q_camid)
|
||||
sameCam_idx = order[same]
|
||||
diffCam_idx = order[diff]
|
||||
|
||||
same_cam.extend(self.distmat[i, sameCam_idx])
|
||||
diff_cam.extend(self.distmat[i, diffCam_idx])
|
||||
|
||||
fig = plt.figure(figsize=(10,5))
|
||||
plt.hist(same_cam, bins=80, alpha=0.7, density=True, color='red', label='same camera')
|
||||
plt.hist(diff_cam, bins=80, alpha=0.5, density=True, color='blue', label='diff camera')
|
||||
plt.xticks(np.arange(0.1, 1.0, 0.1))
|
||||
plt.title('posivie and negative pair distribution')
|
||||
return fig
|
||||
|
|
|
@ -19,10 +19,6 @@ class TrackValue(Callback):
|
|||
logger: logging.Logger
|
||||
total_iter: int
|
||||
|
||||
# def on_batch_end(self, num_batch, last_loss, **kwargs):
|
||||
# if (num_batch+1) % (self.total_iter//3) == 0:
|
||||
# self.logger.info('Iter [{}/{}], loss: {:.4f}'.format(num_batch, self.total_iter, last_loss.item()))
|
||||
|
||||
def on_epoch_end(self, epoch, smooth_loss, **kwargs):
|
||||
self.logger.info('Epoch {}[Iter {}], loss: {:.4f}'.format(epoch, self.total_iter, smooth_loss.item()))
|
||||
|
||||
|
@ -87,71 +83,6 @@ class TestModel(LearnerCallback):
|
|||
self.learn.save("model_{}".format(epoch))
|
||||
|
||||
|
||||
class MixUpCallback(LearnerCallback):
|
||||
"Callback that creates the mixed-up input and target."
|
||||
def __init__(self, learn:Learner, alpha:float=0.4, stack_x:bool=False, stack_y:bool=True):
|
||||
super().__init__(learn)
|
||||
self.alpha,self.stack_x,self.stack_y = alpha,stack_x,stack_y
|
||||
|
||||
def on_train_begin(self, **kwargs):
|
||||
if self.stack_y: self.learn.loss_func = MixUpLoss(self.learn.loss_func)
|
||||
|
||||
def on_batch_begin(self, last_input, last_target, train, **kwargs):
|
||||
"Applies mixup to `last_input` and `last_target` if `train`."
|
||||
if not train: return
|
||||
from ipdb import set_trace; set_trace()
|
||||
lambd = np.random.beta(self.alpha, self.alpha, last_target.size(0))
|
||||
lambd = np.concatenate([lambd[:,None], 1-lambd[:,None]], 1).max(1)
|
||||
lambd = last_input.new(lambd)
|
||||
shuffle = torch.randperm(last_target.size(0)).to(last_input.device)
|
||||
x1, y1 = last_input[shuffle], last_target[shuffle]
|
||||
if self.stack_x:
|
||||
new_input = [last_input, last_input[shuffle], lambd]
|
||||
else:
|
||||
out_shape = [lambd.size(0)] + [1 for _ in range(len(x1.shape) - 1)]
|
||||
new_input = (last_input * lambd.view(out_shape) + x1 * (1-lambd).view(out_shape))
|
||||
if self.stack_y:
|
||||
new_target = torch.cat([last_target[:,None].float(), y1[:,None].float(), lambd[:,None].float()], 1)
|
||||
else:
|
||||
if len(last_target.shape) == 2:
|
||||
lambd = lambd.unsqueeze(1).float()
|
||||
new_target = last_target.float() * lambd + y1.float() * (1-lambd)
|
||||
return {'last_input': new_input, 'last_target': new_target}
|
||||
|
||||
def on_train_end(self, **kwargs):
|
||||
if self.stack_y: self.learn.loss_func = self.learn.loss_func.get_old()
|
||||
|
||||
|
||||
class MixUpLoss(Module):
|
||||
"Adapt the loss function `crit` to go with mixup."
|
||||
|
||||
def __init__(self, crit, reduction='mean'):
|
||||
super().__init__()
|
||||
if hasattr(crit, 'reduction'):
|
||||
self.crit = crit
|
||||
self.old_red = crit.reduction
|
||||
setattr(self.crit, 'reduction', 'none')
|
||||
else:
|
||||
self.crit = partial(crit, reduction='none')
|
||||
self.old_crit = crit
|
||||
self.reduction = reduction
|
||||
|
||||
def forward(self, output, target):
|
||||
if len(target.size()) == 2:
|
||||
loss1, loss2 = self.crit(output,target[:,0].long()), self.crit(output,target[:,1].long())
|
||||
d = (loss1 * target[:,2] + loss2 * (1-target[:,2])).mean()
|
||||
else: d = self.crit(output, target)
|
||||
if self.reduction == 'mean': return d.mean()
|
||||
elif self.reduction == 'sum': return d.sum()
|
||||
return d
|
||||
|
||||
def get_old(self):
|
||||
if hasattr(self, 'old_crit'): return self.old_crit
|
||||
elif hasattr(self, 'old_red'):
|
||||
setattr(self.crit, 'reduction', self.old_red)
|
||||
return self.crit
|
||||
|
||||
|
||||
def do_train(
|
||||
cfg,
|
||||
model,
|
||||
|
@ -160,7 +91,7 @@ def do_train(
|
|||
opt_func,
|
||||
lr_sched,
|
||||
loss_func,
|
||||
num_query
|
||||
num_query,
|
||||
):
|
||||
eval_period = cfg.SOLVER.EVAL_PERIOD
|
||||
output_dir = Path(cfg.OUTPUT_DIR)
|
||||
|
@ -174,9 +105,6 @@ def do_train(
|
|||
partial(LRScheduler, lr_sched=lr_sched),
|
||||
partial(TestModel, test_labels=test_labels, eval_period=eval_period, num_query=num_query, logger=logger),
|
||||
]
|
||||
if cfg.INPUT.MIXUP:
|
||||
cb_fns.append(
|
||||
partial(MixUpCallback, alpha=cfg.INPUT.MIXUP_ALPHA))
|
||||
|
||||
learn = Learner(
|
||||
data_bunch,
|
||||
|
@ -189,7 +117,3 @@ def do_train(
|
|||
callbacks=[TrackValue(logger, total_iter)])
|
||||
|
||||
learn.fit(epochs, lr=cfg.SOLVER.BASE_LR, wd=cfg.SOLVER.WEIGHT_DECAY)
|
||||
# learn.recorder.plot_losses()
|
||||
# plt.savefig(os.path.join(output_dir, "loss.jpg"))
|
||||
# learn.recorder.plot_lr()
|
||||
# plt.savefig(os.path.join(output_dir, "lr.jpg"))
|
||||
|
|
|
@ -54,10 +54,11 @@ class Bottleneck(nn.Module):
|
|||
class ResNet(nn.Module):
|
||||
def __init__(self, last_stride=2, block=Bottleneck, layers=[3, 4, 6, 3]):
|
||||
self.inplanes = 64
|
||||
super().__init__()
|
||||
super(ResNet, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
|
||||
bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
||||
|
@ -85,6 +86,7 @@ class ResNet(nn.Module):
|
|||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
|
|
|
@ -18,7 +18,8 @@ model_urls = {
|
|||
class IBN(nn.Module):
|
||||
def __init__(self, planes):
|
||||
super(IBN, self).__init__()
|
||||
half1 = int(planes/2)
|
||||
# half1 = int(planes/2)
|
||||
half1 = int(planes/8)
|
||||
self.half = half1
|
||||
half2 = planes - half1
|
||||
self.IN = nn.InstanceNorm2d(half1, affine=True)
|
||||
|
@ -27,7 +28,8 @@ class IBN(nn.Module):
|
|||
def forward(self, x):
|
||||
split = torch.split(x, self.half, 1)
|
||||
out1 = self.IN(split[0].contiguous())
|
||||
out2 = self.BN(split[1].contiguous())
|
||||
out2 = self.BN(torch.cat(split[1:], dim=1).contiguous())
|
||||
# out2 = self.BN(split[1].contiguous())
|
||||
out = torch.cat((out1, out2), 1)
|
||||
return out
|
||||
|
||||
|
@ -140,7 +142,8 @@ class ResNet_IBN(nn.Module):
|
|||
if 'fc' in i:
|
||||
continue
|
||||
j = '.'.join(i.split('.')[1:]) # remove 'module' in state_dict
|
||||
self.state_dict()[j].copy_(param_dict[i])
|
||||
if self.state_dict()[j].shape == param_dict[i].shape:
|
||||
self.state_dict()[j].copy_(param_dict[i])
|
||||
|
||||
|
||||
def resnet50_ibn_a(last_stride, **kwargs):
|
||||
|
|
|
@ -1,5 +1,21 @@
|
|||
gpu=3
|
||||
gpu=1
|
||||
|
||||
CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \
|
||||
DATASETS.NAMES '("beijing",)' \
|
||||
OUTPUT_DIR 'logs/beijing/market_duke_finetune_bs64'
|
||||
DATASETS.NAMES '("market1501","duke","cuhk03","beijing")' \
|
||||
DATASETS.TEST_NAMES 'bj' \
|
||||
INPUT.DO_LIGHTING 'False' \
|
||||
OUTPUT_DIR 'logs/2019.8.14/bj/baseline'
|
||||
|
||||
# CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \
|
||||
# DATASETS.NAMES '("market1501","duke","cuhk03","beijing")' \
|
||||
# DATASETS.TEST_NAMES 'bj' \
|
||||
# INPUT.DO_LIGHTING 'True' \
|
||||
# OUTPUT_DIR 'logs/2019.8.9/bj/lighting'
|
||||
|
||||
# CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \
|
||||
# DATASETS.NAMES '("market1501","duke","cuhk03","beijing")' \
|
||||
# DATASETS.TEST_NAMES 'bj' \
|
||||
# MODEL.BACKBONE 'resnet50_ibn' \
|
||||
# MODEL.PRETRAIN_PATH '/export/home/lxy/.cache/torch/checkpoints/resnet50_ibn_a.pth.tar' \
|
||||
# INPUT.DO_LIGHTING 'True' \
|
||||
# OUTPUT_DIR 'logs/2019.8.14/bj/lighting_ibn7_1'
|
|
@ -1,18 +1,20 @@
|
|||
gpu=0
|
||||
|
||||
CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \
|
||||
DATASETS.NAMES '("market1501",)' \
|
||||
DATASETS.TEST_NAMES 'market1501' \
|
||||
INPUT.DO_LIGHTING 'True' \
|
||||
OUTPUT_DIR 'logs/2019.8.9/market/lighting'
|
||||
|
||||
CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \
|
||||
DATASETS.NAMES '("market1501",)' \
|
||||
DATASETS.TEST_NAMES 'market1501' \
|
||||
MODEL.BACKBONE 'resnet50_ibn' \
|
||||
MODEL.PRETRAIN_PATH '/export/home/lxy/.cache/torch/checkpoints/resnet50_ibn_a.pth.tar' \
|
||||
INPUT.DO_LIGHTING 'True' \
|
||||
OUTPUT_DIR 'logs/2019.8.9/market/ibn_lighting'
|
||||
OUTPUT_DIR 'logs/2019.8.14/market/lighting_ibn_7_1'
|
||||
|
||||
# CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \
|
||||
# DATASETS.NAMES '("market1501",)' \
|
||||
# DATASETS.TEST_NAMES 'market1501' \
|
||||
# MODEL.BACKBONE 'resnet50_ibn' \
|
||||
# MODEL.PRETRAIN_PATH '/export/home/lxy/.cache/torch/checkpoints/resnet50_ibn_a.pth.tar' \
|
||||
# INPUT.DO_LIGHTING 'False' \
|
||||
# OUTPUT_DIR 'logs/2019.8.13/market/ibn7_1'
|
||||
|
||||
# CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \
|
||||
# DATASETS.NAMES '("market1501",)' \
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
gpu=2
|
||||
|
||||
CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \
|
||||
DATASETS.NAMES '("market1501","duke")' \
|
||||
OUTPUT_DIR 'logs/beijing/market_duke_softmax_triplet_256_128_bs512'
|
|
@ -1,20 +0,0 @@
|
|||
gpu=1
|
||||
|
||||
# CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \
|
||||
# DATASETS.NAMES '("market1501","duke","cuhk03","beijing")' \
|
||||
# DATASETS.TEST_NAMES 'bj' \
|
||||
# OUTPUT_DIR 'logs/2019.8.9/bj/baseline'
|
||||
|
||||
# CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \
|
||||
# DATASETS.NAMES '("market1501","duke","cuhk03","beijing")' \
|
||||
# DATASETS.TEST_NAMES 'bj' \
|
||||
# INPUT.DO_LIGHTING 'True' \
|
||||
# OUTPUT_DIR 'logs/2019.8.9/bj/lighting'
|
||||
|
||||
CUDA_VISIBLE_DEVICES=$gpu python tools/train.py -cfg='configs/softmax_triplet.yml' \
|
||||
DATASETS.NAMES '("market1501","duke","cuhk03","beijing")' \
|
||||
MODEL.BACKBONE 'resnet50_ibn' \
|
||||
MODEL.PRETRAIN_PATH '/export/home/lxy/.cache/torch/checkpoints/resnet50_ibn_a.pth.tar' \
|
||||
DATASETS.TEST_NAMES 'bj' \
|
||||
INPUT.DO_LIGHTING 'True' \
|
||||
OUTPUT_DIR 'logs/2019.8.12/bj/ibn_lighting'
|
|
@ -1,239 +0,0 @@
|
|||
import math
|
||||
import torch
|
||||
from torch.optim import Optimizer
|
||||
|
||||
|
||||
__all__ = ['AdaBound', 'AdaBoundW']
|
||||
|
||||
class AdaBound(Optimizer):
|
||||
"""Implements AdaBound algorithm.
|
||||
It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
|
||||
Arguments:
|
||||
params (iterable): iterable of parameters to optimize or dicts defining
|
||||
parameter groups
|
||||
lr (float, optional): Adam learning rate (default: 1e-3)
|
||||
betas (Tuple[float, float], optional): coefficients used for computing
|
||||
running averages of gradient and its square (default: (0.9, 0.999))
|
||||
final_lr (float, optional): final (SGD) learning rate (default: 0.1)
|
||||
gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
|
||||
eps (float, optional): term added to the denominator to improve
|
||||
numerical stability (default: 1e-8)
|
||||
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
|
||||
amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
|
||||
.. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
|
||||
https://openreview.net/forum?id=Bkg3g2R9FX
|
||||
"""
|
||||
|
||||
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
|
||||
eps=1e-8, weight_decay=0, amsbound=False):
|
||||
if not 0.0 <= lr:
|
||||
raise ValueError("Invalid learning rate: {}".format(lr))
|
||||
if not 0.0 <= eps:
|
||||
raise ValueError("Invalid epsilon value: {}".format(eps))
|
||||
if not 0.0 <= betas[0] < 1.0:
|
||||
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
|
||||
if not 0.0 <= betas[1] < 1.0:
|
||||
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
|
||||
if not 0.0 <= final_lr:
|
||||
raise ValueError("Invalid final learning rate: {}".format(final_lr))
|
||||
if not 0.0 <= gamma < 1.0:
|
||||
raise ValueError("Invalid gamma parameter: {}".format(gamma))
|
||||
defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
|
||||
weight_decay=weight_decay, amsbound=amsbound)
|
||||
super(AdaBound, self).__init__(params, defaults)
|
||||
|
||||
self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
|
||||
|
||||
def __setstate__(self, state):
|
||||
super(AdaBound, self).__setstate__(state)
|
||||
for group in self.param_groups:
|
||||
group.setdefault('amsbound', False)
|
||||
|
||||
def step(self, closure=None):
|
||||
"""Performs a single optimization step.
|
||||
Arguments:
|
||||
closure (callable, optional): A closure that reevaluates the model
|
||||
and returns the loss.
|
||||
"""
|
||||
loss = None
|
||||
if closure is not None:
|
||||
loss = closure()
|
||||
|
||||
for group, base_lr in zip(self.param_groups, self.base_lrs):
|
||||
base_lr = group['lr']
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad.data
|
||||
if grad.is_sparse:
|
||||
raise RuntimeError(
|
||||
'Adam does not support sparse gradients, please consider SparseAdam instead')
|
||||
amsbound = group['amsbound']
|
||||
|
||||
state = self.state[p]
|
||||
|
||||
# State initialization
|
||||
if len(state) == 0:
|
||||
state['step'] = 0
|
||||
# Exponential moving average of gradient values
|
||||
state['exp_avg'] = torch.zeros_like(p.data)
|
||||
# Exponential moving average of squared gradient values
|
||||
state['exp_avg_sq'] = torch.zeros_like(p.data)
|
||||
if amsbound:
|
||||
# Maintains max of all exp. moving avg. of sq. grad. values
|
||||
state['max_exp_avg_sq'] = torch.zeros_like(p.data)
|
||||
|
||||
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
|
||||
if amsbound:
|
||||
max_exp_avg_sq = state['max_exp_avg_sq']
|
||||
beta1, beta2 = group['betas']
|
||||
|
||||
state['step'] += 1
|
||||
|
||||
if group['weight_decay'] != 0:
|
||||
grad = grad.add(group['weight_decay'], p.data)
|
||||
|
||||
# Decay the first and second moment running average coefficient
|
||||
exp_avg.mul_(beta1).add_(1 - beta1, grad)
|
||||
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
|
||||
if amsbound:
|
||||
# Maintains the maximum of all 2nd moment running avg. till now
|
||||
torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
|
||||
# Use the max. for normalizing running avg. of gradient
|
||||
denom = max_exp_avg_sq.sqrt().add_(group['eps'])
|
||||
else:
|
||||
denom = exp_avg_sq.sqrt().add_(group['eps'])
|
||||
|
||||
bias_correction1 = 1 - beta1 ** state['step']
|
||||
bias_correction2 = 1 - beta2 ** state['step']
|
||||
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
|
||||
|
||||
# Applies bounds on actual learning rate
|
||||
# lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
|
||||
final_lr = group['final_lr'] * group['lr'] / base_lr
|
||||
lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
|
||||
upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
|
||||
step_size = torch.full_like(denom, step_size)
|
||||
step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
|
||||
|
||||
p.data.add_(-step_size)
|
||||
|
||||
return loss
|
||||
|
||||
|
||||
class AdaBoundW(Optimizer):
|
||||
"""Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101)
|
||||
It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
|
||||
Arguments:
|
||||
params (iterable): iterable of parameters to optimize or dicts defining
|
||||
parameter groups
|
||||
lr (float, optional): Adam learning rate (default: 1e-3)
|
||||
betas (Tuple[float, float], optional): coefficients used for computing
|
||||
running averages of gradient and its square (default: (0.9, 0.999))
|
||||
final_lr (float, optional): final (SGD) learning rate (default: 0.1)
|
||||
gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
|
||||
eps (float, optional): term added to the denominator to improve
|
||||
numerical stability (default: 1e-8)
|
||||
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
|
||||
amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
|
||||
.. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
|
||||
https://openreview.net/forum?id=Bkg3g2R9FX
|
||||
"""
|
||||
|
||||
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
|
||||
eps=1e-8, weight_decay=0, amsbound=False):
|
||||
if not 0.0 <= lr:
|
||||
raise ValueError("Invalid learning rate: {}".format(lr))
|
||||
if not 0.0 <= eps:
|
||||
raise ValueError("Invalid epsilon value: {}".format(eps))
|
||||
if not 0.0 <= betas[0] < 1.0:
|
||||
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
|
||||
if not 0.0 <= betas[1] < 1.0:
|
||||
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
|
||||
if not 0.0 <= final_lr:
|
||||
raise ValueError("Invalid final learning rate: {}".format(final_lr))
|
||||
if not 0.0 <= gamma < 1.0:
|
||||
raise ValueError("Invalid gamma parameter: {}".format(gamma))
|
||||
defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
|
||||
weight_decay=weight_decay, amsbound=amsbound)
|
||||
super(AdaBoundW, self).__init__(params, defaults)
|
||||
|
||||
self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
|
||||
|
||||
def __setstate__(self, state):
|
||||
super(AdaBoundW, self).__setstate__(state)
|
||||
for group in self.param_groups:
|
||||
group.setdefault('amsbound', False)
|
||||
|
||||
def step(self, closure=None):
|
||||
"""Performs a single optimization step.
|
||||
Arguments:
|
||||
closure (callable, optional): A closure that reevaluates the model
|
||||
and returns the loss.
|
||||
"""
|
||||
loss = None
|
||||
if closure is not None:
|
||||
loss = closure()
|
||||
|
||||
for group, base_lr in zip(self.param_groups, self.base_lrs):
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad.data
|
||||
if grad.is_sparse:
|
||||
raise RuntimeError(
|
||||
'Adam does not support sparse gradients, please consider SparseAdam instead')
|
||||
amsbound = group['amsbound']
|
||||
|
||||
state = self.state[p]
|
||||
|
||||
# State initialization
|
||||
if len(state) == 0:
|
||||
state['step'] = 0
|
||||
# Exponential moving average of gradient values
|
||||
state['exp_avg'] = torch.zeros_like(p.data)
|
||||
# Exponential moving average of squared gradient values
|
||||
state['exp_avg_sq'] = torch.zeros_like(p.data)
|
||||
if amsbound:
|
||||
# Maintains max of all exp. moving avg. of sq. grad. values
|
||||
state['max_exp_avg_sq'] = torch.zeros_like(p.data)
|
||||
|
||||
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
|
||||
if amsbound:
|
||||
max_exp_avg_sq = state['max_exp_avg_sq']
|
||||
beta1, beta2 = group['betas']
|
||||
|
||||
state['step'] += 1
|
||||
|
||||
# Decay the first and second moment running average coefficient
|
||||
exp_avg.mul_(beta1).add_(1 - beta1, grad)
|
||||
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
|
||||
if amsbound:
|
||||
# Maintains the maximum of all 2nd moment running avg. till now
|
||||
torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
|
||||
# Use the max. for normalizing running avg. of gradient
|
||||
denom = max_exp_avg_sq.sqrt().add_(group['eps'])
|
||||
else:
|
||||
denom = exp_avg_sq.sqrt().add_(group['eps'])
|
||||
|
||||
bias_correction1 = 1 - beta1 ** state['step']
|
||||
bias_correction2 = 1 - beta2 ** state['step']
|
||||
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
|
||||
|
||||
# Applies bounds on actual learning rate
|
||||
# lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
|
||||
final_lr = group['final_lr'] * group['lr'] / base_lr
|
||||
lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
|
||||
upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
|
||||
step_size = torch.full_like(denom, step_size)
|
||||
step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
|
||||
|
||||
if group['weight_decay'] != 0:
|
||||
decayed_weights = torch.mul(p.data, group['weight_decay'])
|
||||
p.data.add_(-step_size)
|
||||
p.data.sub_(decayed_weights)
|
||||
else:
|
||||
p.data.add_(-step_size)
|
||||
|
||||
return loss
|
||||
|
|
@ -6,9 +6,7 @@
|
|||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from bisect import bisect_right
|
||||
|
||||
from torch.backends import cudnn
|
||||
|
||||
import sys
|
||||
|
@ -23,6 +21,7 @@ from solver import *
|
|||
from utils.logger import setup_logger
|
||||
|
||||
|
||||
|
||||
def train(cfg):
|
||||
# prepare dataset
|
||||
data_bunch, test_labels, num_query = get_data_bunch(cfg)
|
||||
|
@ -56,7 +55,7 @@ def train(cfg):
|
|||
opt_func,
|
||||
lr_sched,
|
||||
loss_func,
|
||||
num_query
|
||||
num_query,
|
||||
)
|
||||
|
||||
|
||||
|
|
215
vis_data.ipynb
215
vis_data.ipynb
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue