master
Liao 2017-06-19 16:57:31 +10:00
commit cff0edc108
4 changed files with 396 additions and 0 deletions

26
README.md 100644
View File

@ -0,0 +1,26 @@
------------------------------------------------------------------------------------------
Readme for the SCDA method package
version Dec. 18, 2016
------------------------------------------------------------------------------------------
The package includes the MATLAB code of the SCDA method for fine-grained image retrieval.
The main / key parts of SCDA are included in this package, i.e., selection and aggregation.
You will find a demo 'imdb.mat' file for the CUB200 dataset. But you need to download the
pre-trained VGG-16 model by yourself via http://www.vlfeat.org/matconvnet/pretrained/.
References:
X.-S. Wei, J.-H. Luo, J. Wu and Z.-H. Zhou. Selective Convolutional Descriptor Aggregation
for Fine-Grained Image Retrieval. IEEE Transactions on Image Processing, 2016, in press.
ATTN: This packages are free for academic usage. You can run them at your own risk. For other
purposes, please contact Prof. Jianxin Wu (wujx2001@gmail.com).
Requirement: To use this package, MatConvNet (http://www.vlfeat.org/matconvnet/) must be available.
ATTN2: This packages were developed by Mr. Xiu-Shen Wei (weixs.gm@gmail.com).
For any problem concerning the code, please feel free to contact Mr. Wei.
------------------------------------------------------------------------------------------

165
SCDA_Aggregation.m 100644
View File

@ -0,0 +1,165 @@
% The codes are written by Xiu-Shen Wei (weixs.gm@gmail.com). For any problem concerning the code, please feel free to contact Mr. Wei.
% This packages are free for academic usage. You can run them at your own risk. For other purposes, please contact Prof. Jianxin Wu (wujx2001@gmail.com).
% The codes are corresponding to the deep descriptors *aggregation* procedure
% of the proposed SCDA method.
% Load the extracted SCDA features
load('SCDA_flip_plus.mat');
%% Global-avg-pool
train_data_L31a = [];
train_data_L31b = [];
for i = 1 : size(cnnFeat_tr_L31,1)
if isempty(cnnFeat_tr_L31{i,1})
train_data_L31a(i,:) = 0;
else
train_data_L31a(i,:) = mean(squeeze(cnnFeat_tr_L31{i,1}));
train_data_L31a(i,:) = train_data_L31a(i,:) ./ norm(train_data_L31a(i,:));
end
if isempty(cnnFeat_tr_L31{i,2})
train_data_L31b(i,:) = 0;
else
train_data_L31b(i,:) = mean(squeeze(cnnFeat_tr_L31{i,2}));
train_data_L31b(i,:) = train_data_L31b(i,:) ./ norm(train_data_L31b(i,:));
end
end
test_data_L31a = [];
test_data_L31b = [];
for i = 1 : size(cnnFeat_te_L31,1)
if isempty(cnnFeat_te_L31{i,1})
test_data_L31a(i,:) = 0;
else
test_data_L31a(i,:) = mean(squeeze(cnnFeat_te_L31{i,1}));
test_data_L31a(i,:) = test_data_L31a(i,:) ./ norm(test_data_L31a(i,:));
end
if isempty(cnnFeat_te_L31{i,2})
test_data_L31b(i,:) = 0;
else
test_data_L31b(i,:) = mean(squeeze(cnnFeat_te_L31{i,2}));
test_data_L31b(i,:) = test_data_L31b(i,:) ./ norm(test_data_L31b(i,:));
end
end
train_data_L28a = [];
train_data_L28b = [];
for i = 1 : size(cnnFeat_tr_L28,1)
if isempty(cnnFeat_tr_L28{i,1})
train_data_L28a(i,:) = 0;
else
train_data_L28a(i,:) = mean(squeeze(cnnFeat_tr_L28{i,1}));
train_data_L28a(i,:) = train_data_L28a(i,:) ./ norm(train_data_L28a(i,:));
end
if isempty(cnnFeat_tr_L28{i,2})
train_data_L28b(i,:) = 0;
else
train_data_L28b(i,:) = mean(squeeze(cnnFeat_tr_L28{i,2}));
train_data_L28b(i,:) = train_data_L28b(i,:) ./ norm(train_data_L28b(i,:));
end
end
test_data_L28a = [];
test_data_L28b = [];
for i = 1 : size(cnnFeat_te_L28,1)
if isempty(cnnFeat_te_L28{i,1})
test_data_L28a(i,:) = 0;
else
test_data_L28a(i,:) = mean(squeeze(cnnFeat_te_L28{i,1}));
test_data_L28a(i,:) = test_data_L28a(i,:) ./ norm(test_data_L28a(i,:));
end
if isempty(cnnFeat_te_L28{i,2})
test_data_L28b(i,:) = 0;
else
test_data_L28b(i,:) = mean(squeeze(cnnFeat_te_L28{i,2}));
test_data_L28b(i,:) = test_data_L28b(i,:) ./ norm(test_data_L28b(i,:));
end
end
save('SCDA_avgPool.mat','train_data_L31a','test_data_L31a','train_data_L28a','test_data_L28a'...
,'train_data_L31b','test_data_L31b','train_data_L28b','test_data_L28b'...
,'train_label','test_label','-v7.3');
disp('SCDA avgPool is done ...');
%% Global-max-pool
train_data_L31a = [];
train_data_L31b = [];
for i = 1 : size(cnnFeat_tr_L31,1)
if isempty(cnnFeat_tr_L31{i,1})
train_data_L31a(i,:) = 0;
else
train_data_L31a(i,:) = max(squeeze(cnnFeat_tr_L31{i,1}));
train_data_L31a(i,:) = train_data_L31a(i,:) ./ norm(train_data_L31a(i,:));
end
if isempty(cnnFeat_tr_L31{i,2})
train_data_L31b(i,:) = 0;
else
train_data_L31b(i,:) = max(squeeze(cnnFeat_tr_L31{i,2}));
train_data_L31b(i,:) = train_data_L31b(i,:) ./ norm(train_data_L31b(i,:));
end
end
test_data_L31a = [];
test_data_L31b = [];
for i = 1 : size(cnnFeat_te_L31,1)
if isempty(cnnFeat_te_L31{i,1})
test_data_L31a(i,:) = 0;
else
test_data_L31a(i,:) = max(squeeze(cnnFeat_te_L31{i,1}));
test_data_L31a(i,:) = test_data_L31a(i,:) ./ norm(test_data_L31a(i,:));
end
if isempty(cnnFeat_te_L31{i,2})
test_data_L31b(i,:) = 0;
else
test_data_L31b(i,:) = max(squeeze(cnnFeat_te_L31{i,2}));
test_data_L31b(i,:) = test_data_L31b(i,:) ./ norm(test_data_L31b(i,:));
end
end
train_data_L28a = [];
train_data_L28b = [];
for i = 1 : size(cnnFeat_tr_L28,1)
if isempty(cnnFeat_tr_L28{i,1})
train_data_L28a(i,:) = 0;
else
train_data_L28a(i,:) = max(squeeze(cnnFeat_tr_L28{i,1}));
train_data_L28a(i,:) = train_data_L28a(i,:) ./ norm(train_data_L28a(i,:));
end
if isempty(cnnFeat_tr_L28{i,2})
train_data_L28b(i,:) = 0;
else
train_data_L28b(i,:) = max(squeeze(cnnFeat_tr_L28{i,2}));
train_data_L28b(i,:) = train_data_L28b(i,:) ./ norm(train_data_L28b(i,:));
end
end
test_data_L28a = [];
test_data_L28b = [];
for i = 1 : size(cnnFeat_te_L28,1)
if isempty(cnnFeat_te_L28{i,1})
test_data_L28a(i,:) = 0;
else
test_data_L28a(i,:) = max(squeeze(cnnFeat_te_L28{i,1}));
test_data_L28a(i,:) = test_data_L28a(i,:) ./ norm(test_data_L28a(i,:));
end
if isempty(cnnFeat_te_L28{i,2})
test_data_L28b(i,:) = 0;
else
test_data_L28b(i,:) = max(squeeze(cnnFeat_te_L28{i,2}));
test_data_L28b(i,:) = test_data_L28b(i,:) ./ norm(test_data_L28b(i,:));
end
end
save('SCDA_maxPool.mat','train_data_L31a','test_data_L31a','train_data_L28a','test_data_L28a'...
,'train_data_L31b','test_data_L31b','train_data_L28b','test_data_L28b'...
,'train_label','test_label','-v7.3');
disp('SCDA maxPool is done ...');
% Concatenation
avg = load('SCDA_avgPool.mat');
maxi = load('SCDA_maxPool.mat');
train_label = avg.train_label;
test_label = avg.test_label;
ratio = 0.5;
train_data = [avg.train_data_L31a maxi.train_data_L31a ratio.*avg.train_data_L28a ratio.*maxi.train_data_L28a ...
avg.train_data_L31b maxi.train_data_L31b ratio.*avg.train_data_L28b ratio.*maxi.train_data_L28b];
test_data = [avg.test_data_L31a maxi.test_data_L31a ratio.*avg.test_data_L28a ratio.*maxi.test_data_L28a ...
avg.test_data_L31b maxi.test_data_L31b ratio.*avg.test_data_L28b ratio.*maxi.test_data_L28b];
disp('The final SCDA_flip_plus feature is done ...');

205
SCDA_Selection.m 100644
View File

@ -0,0 +1,205 @@
% The codes are written by Xiu-Shen Wei (weixs.gm@gmail.com). For any problem concerning the code, please feel free to contact Mr. Wei.
% This packages are free for academic usage. You can run them at your own risk. For other purposes, please contact Prof. Jianxin Wu (wujx2001@gmail.com).
% The codes are corresponding to the deep descriptors *selection* procedure
% of the proposed SCDA method.
% Setting the GPU device
opt.gpu = 1;
g = gpuDevice(opt.gpu);
reset(g);
% Fine-grained datasets
opt.dataset = 'CUB200';
% CUB200
% Dogs
% Flowers
% Moth
% Pets
% Airplane
% Car
% The pre-trained model--VGG-16
opt.model = 'imagenet-vgg-verydeep-16';
% Our selective threshold
opt.thr = 'mean';
% load CNN model
run('../toolbox/matconvnet-1.0-beta23/matlab/vl_setupnn.m');
net = load(['../model/' opt.model '.mat']);
net.layers(end-5:end)=[]; % Removing the fully connected layers
net = vl_simplenn_move(net, 'gpu') ;
disp('CNN model is ready ...');
% load imdb
path = './feats/';
imdb = load([path opt.dataset '/imdb.mat']);
% Using the RGB average values obtained from ImageNet
net.normalization.averageImage = ones(224,224,3);
net.normalization.averageImage(:,:,1) = net.normalization.averageImage(:,:,1) .* net.meta.normalization.averageImage(1,1);
net.normalization.averageImage(:,:,2) = net.normalization.averageImage(:,:,2) .* net.meta.normalization.averageImage(1,2);
net.normalization.averageImage(:,:,3) = net.normalization.averageImage(:,:,3) .* net.meta.normalization.averageImage(1,3);
imdb.averageImage = net.normalization.averageImage;
num_tr = size(find(imdb.images.set==1),2);
num_te = size(find(imdb.images.set==3),2);
cnnFeat_tr_L31 = cell(num_tr,2);
cnnFeat_te_L31 = cell(num_te,2);
cnnFeat_tr_L28 = cell(num_tr,2);
cnnFeat_te_L28 = cell(num_te,2);
count_tr = 1;
count_te = 1;
ex_time = [];
for i = 1 : size(imdb.images.name,2)
tic
%% original image
im = imread([imdb.imageDir '/' imdb.images.name{1,i} '.jpg']);
im_ = single(im);
[h,w,~] = size(im_);
if min(h,w) > 700
im_ = imresize(im_, [h*(700/min(h,w)) w*(700/min(h,w))]);
end
[h,w,c] = size(im_);
if c > 2
im_ = im_ - imresize(imdb.averageImage,[h,w]) ;
else
im_ = bsxfun(@minus,im_,imresize(imdb.averageImage,[h,w])) ;
end
res = vl_simplenn(net, gpuArray(im_)) ;
% res = vl_simplenn(net, im_);
tmp_1 = gather(res(32).x);
tmp_2 = gather(res(29).x);
% Pool5
tmp_featmap = tmp_1;
tmp_featmap_sum = sum(tmp_featmap, 3);
tmp_mean = mean(mean(tmp_featmap_sum));
highlight = zeros(size(tmp_featmap_sum));
highlight(find(tmp_featmap_sum>tmp_mean)) = 1;
cc = bwconncomp(highlight); % The biggest component
numPixel = cellfun(@numel,cc.PixelIdxList);
[~,conn_idx] = max(numPixel);
highlight_conn_L31 = zeros(size(highlight));
highlight_conn_L31(cc.PixelIdxList{conn_idx}) = 1;
tmp_sel_feat_L31 = [];
for sel_i = 1 : size(tmp_featmap,1)
for sel_j = 1 : size(tmp_featmap,2)
if highlight_conn_L31(sel_i,sel_j)
tmp_sel_feat_L31 = [tmp_sel_feat_L31, tmp_featmap(sel_i,sel_j,:)];
end
end
end
% Relu5_2
tmp_featmap = tmp_2;
tmp_featmap_sum = sum(tmp_featmap, 3);
tmp_mean = mean(mean(tmp_featmap_sum));
highlight = zeros(size(tmp_featmap_sum));
highlight(find(tmp_featmap_sum>tmp_mean)) = 1;
highlight28 = highlight;
highlight = highlight28 & imresize(highlight_conn_L31, size(highlight28), 'nearest');
tmp_sel_feat_L28 = [];
for sel_i = 1 : size(tmp_featmap,1)
for sel_j = 1 : size(tmp_featmap,2)
if highlight(sel_i,sel_j)
tmp_sel_feat_L28 = [tmp_sel_feat_L28, tmp_featmap(sel_i,sel_j,:)];
end
end
end
if imdb.images.set(1,i) == 1
% train data
cnnFeat_tr_L31{count_tr,1} = tmp_sel_feat_L31;
cnnFeat_tr_L28{count_tr,1} = tmp_sel_feat_L28;
else
% test data
cnnFeat_te_L31{count_te,1} = tmp_sel_feat_L31;
cnnFeat_te_L28{count_te,1} = tmp_sel_feat_L28;
end
%% horizontal flip
im = fliplr(im);
im_ = single(im);
[h,w,~] = size(im_);
if min(h,w) > 700
im_ = imresize(im_, [h*(700/min(h,w)) w*(700/min(h,w))]);
end
[h,w,c] = size(im_);
if c > 2
im_ = im_ - imresize(imdb.averageImage,[h,w]) ;
else
im_ = bsxfun(@minus,im_,imresize(imdb.averageImage,[h,w])) ;
end
res = vl_simplenn(net, gpuArray(im_)) ;
% res = vl_simplenn(net, im_);
tmp_1 = gather(res(32).x);
tmp_2 = gather(res(29).x);
% Pool5
tmp_featmap = tmp_1;
tmp_featmap_sum = sum(tmp_featmap, 3);
tmp_mean = mean(mean(tmp_featmap_sum));
highlight = zeros(size(tmp_featmap_sum));
highlight(find(tmp_featmap_sum>tmp_mean)) = 1;
cc = bwconncomp(highlight);
numPixel = cellfun(@numel,cc.PixelIdxList);
[~,conn_idx] = max(numPixel);
highlight_conn_L31 = zeros(size(highlight));
highlight_conn_L31(cc.PixelIdxList{conn_idx}) = 1;
tmp_sel_feat_L31 = [];
for sel_i = 1 : size(tmp_featmap,1)
for sel_j = 1 : size(tmp_featmap,2)
if highlight_conn_L31(sel_i,sel_j)
tmp_sel_feat_L31 = [tmp_sel_feat_L31, tmp_featmap(sel_i,sel_j,:)];
end
end
end
% Relu5_2
tmp_featmap = tmp_2;
tmp_featmap_sum = sum(tmp_featmap, 3);
tmp_mean = mean(mean(tmp_featmap_sum));
highlight = zeros(size(tmp_featmap_sum));
highlight(find(tmp_featmap_sum>tmp_mean)) = 1;
highlight28 = highlight;
highlight = highlight28 & imresize(highlight_conn_L31, size(highlight28), 'nearest');
tmp_sel_feat_L28 = [];
for sel_i = 1 : size(tmp_featmap,1)
for sel_j = 1 : size(tmp_featmap,2)
if highlight(sel_i,sel_j)
tmp_sel_feat_L28 = [tmp_sel_feat_L28, tmp_featmap(sel_i,sel_j,:)];
end
end
end
if imdb.images.set(1,i) == 1
% train data
cnnFeat_tr_L31{count_tr,2} = tmp_sel_feat_L31;
cnnFeat_tr_L28{count_tr,2} = tmp_sel_feat_L28;
count_tr = count_tr + 1;
else
% test data
cnnFeat_te_L31{count_te,2} = tmp_sel_feat_L31;
cnnFeat_te_L28{count_te,2} = tmp_sel_feat_L28;
count_te = count_te + 1;
end
ex_time(i,1) = toc;
disp(['Extracing ' opt.dataset ': ' num2str(i) 'th image (' num2str(i*100/size(imdb.images.name,2)) '%) used ' num2str(ex_time(i,1)) 's ...']);
end
train_label = imdb.images.class(find(imdb.images.set==1))';
test_label = imdb.images.class(find(imdb.images.set==3))';
save([path opt.dataset '/SCDA_flip_plus.mat'],'cnnFeat_tr_L31','cnnFeat_te_L31','cnnFeat_tr_L28','cnnFeat_te_L28',...
'train_label','test_label','ex_time','-v7.3');
disp(['Feature extracting of ' opt.dataset ' is finished ...']);

Binary file not shown.