basic

2017-06-19 16:57:31 +10:00 · 2017-06-19 16:57:31 +10:00 · cff0edc108
commit cff0edc108
4 changed files with 396 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,26 @@
+------------------------------------------------------------------------------------------
+	                   Readme for the SCDA method package
+								version Dec. 18, 2016
+------------------------------------------------------------------------------------------
+
+The package includes the MATLAB code of the SCDA method for fine-grained image retrieval.
+
+The main / key parts of SCDA are included in this package, i.e., selection and aggregation.
+
+You will find a demo 'imdb.mat' file for the CUB200 dataset. But you need to download the 
+pre-trained VGG-16 model by yourself via http://www.vlfeat.org/matconvnet/pretrained/.
+
+References: 
+X.-S. Wei, J.-H. Luo, J. Wu and Z.-H. Zhou. Selective Convolutional Descriptor Aggregation
+for Fine-Grained Image Retrieval. IEEE Transactions on Image Processing, 2016, in press.
+
+ATTN: This packages are free for academic usage. You can run them at your own risk. For other
+purposes, please contact Prof. Jianxin Wu (wujx2001@gmail.com).
+
+Requirement: To use this package, MatConvNet (http://www.vlfeat.org/matconvnet/) must be available.
+
+ATTN2: This packages were developed by Mr. Xiu-Shen Wei (weixs.gm@gmail.com).
+For any problem concerning the code, please feel free to contact Mr. Wei.
+
+
+------------------------------------------------------------------------------------------
--- a/SCDA_Aggregation.m
+++ b/SCDA_Aggregation.m
@ -0,0 +1,165 @@
+% The codes are written by Xiu-Shen Wei (weixs.gm@gmail.com). For any problem concerning the code, please feel free to contact Mr. Wei.
+% This packages are free for academic usage. You can run them at your own risk. For other purposes, please contact Prof. Jianxin Wu (wujx2001@gmail.com).
+
+% The codes are corresponding to the deep descriptors *aggregation* procedure
+% of the proposed SCDA method.
+
+% Load the extracted SCDA features
+load('SCDA_flip_plus.mat');
+
+%% Global-avg-pool
+train_data_L31a = [];
+train_data_L31b = [];
+for i = 1 : size(cnnFeat_tr_L31,1)
+    if isempty(cnnFeat_tr_L31{i,1})
+        train_data_L31a(i,:) = 0;        
+    else
+        train_data_L31a(i,:) = mean(squeeze(cnnFeat_tr_L31{i,1}));
+        train_data_L31a(i,:) = train_data_L31a(i,:) ./ norm(train_data_L31a(i,:));        
+    end
+    if isempty(cnnFeat_tr_L31{i,2})
+        train_data_L31b(i,:) = 0;
+    else
+        train_data_L31b(i,:) = mean(squeeze(cnnFeat_tr_L31{i,2}));
+        train_data_L31b(i,:) = train_data_L31b(i,:) ./ norm(train_data_L31b(i,:));
+    end
+end
+test_data_L31a = [];
+test_data_L31b = [];
+for i = 1 : size(cnnFeat_te_L31,1)
+    if isempty(cnnFeat_te_L31{i,1})
+        test_data_L31a(i,:) = 0;        
+    else
+        test_data_L31a(i,:) = mean(squeeze(cnnFeat_te_L31{i,1}));
+        test_data_L31a(i,:) = test_data_L31a(i,:) ./ norm(test_data_L31a(i,:));        
+    end
+    if isempty(cnnFeat_te_L31{i,2})       
+        test_data_L31b(i,:) = 0;
+    else       
+        test_data_L31b(i,:) = mean(squeeze(cnnFeat_te_L31{i,2}));
+        test_data_L31b(i,:) = test_data_L31b(i,:) ./ norm(test_data_L31b(i,:));
+    end
+end
+train_data_L28a = [];
+train_data_L28b = [];
+for i = 1 : size(cnnFeat_tr_L28,1)
+    if isempty(cnnFeat_tr_L28{i,1})
+        train_data_L28a(i,:) = 0;
+    else
+        train_data_L28a(i,:) = mean(squeeze(cnnFeat_tr_L28{i,1}));
+        train_data_L28a(i,:) = train_data_L28a(i,:) ./ norm(train_data_L28a(i,:));
+    end
+    if isempty(cnnFeat_tr_L28{i,2})
+        train_data_L28b(i,:) = 0;
+    else
+        train_data_L28b(i,:) = mean(squeeze(cnnFeat_tr_L28{i,2}));
+        train_data_L28b(i,:) = train_data_L28b(i,:) ./ norm(train_data_L28b(i,:));
+    end
+end
+test_data_L28a = [];
+test_data_L28b = [];
+for i = 1 : size(cnnFeat_te_L28,1)
+    if isempty(cnnFeat_te_L28{i,1})
+        test_data_L28a(i,:) = 0;
+    else
+        test_data_L28a(i,:) = mean(squeeze(cnnFeat_te_L28{i,1}));
+        test_data_L28a(i,:) = test_data_L28a(i,:) ./ norm(test_data_L28a(i,:));
+    end
+    if isempty(cnnFeat_te_L28{i,2})
+        test_data_L28b(i,:) = 0;
+    else
+        test_data_L28b(i,:) = mean(squeeze(cnnFeat_te_L28{i,2}));
+        test_data_L28b(i,:) = test_data_L28b(i,:) ./ norm(test_data_L28b(i,:));
+    end
+end
+
+save('SCDA_avgPool.mat','train_data_L31a','test_data_L31a','train_data_L28a','test_data_L28a'...
+    ,'train_data_L31b','test_data_L31b','train_data_L28b','test_data_L28b'...
+    ,'train_label','test_label','-v7.3');
+
+disp('SCDA avgPool is done ...');
+
+%% Global-max-pool
+train_data_L31a = [];
+train_data_L31b = [];
+for i = 1 : size(cnnFeat_tr_L31,1)
+    if isempty(cnnFeat_tr_L31{i,1})
+        train_data_L31a(i,:) = 0;
+    else
+        train_data_L31a(i,:) = max(squeeze(cnnFeat_tr_L31{i,1}));
+        train_data_L31a(i,:) = train_data_L31a(i,:) ./ norm(train_data_L31a(i,:));
+    end
+    if isempty(cnnFeat_tr_L31{i,2})
+        train_data_L31b(i,:) = 0;
+    else
+        train_data_L31b(i,:) = max(squeeze(cnnFeat_tr_L31{i,2}));
+        train_data_L31b(i,:) = train_data_L31b(i,:) ./ norm(train_data_L31b(i,:));
+    end
+end
+test_data_L31a = [];
+test_data_L31b = [];
+for i = 1 : size(cnnFeat_te_L31,1)
+    if isempty(cnnFeat_te_L31{i,1})
+        test_data_L31a(i,:) = 0;
+    else
+        test_data_L31a(i,:) = max(squeeze(cnnFeat_te_L31{i,1}));
+        test_data_L31a(i,:) = test_data_L31a(i,:) ./ norm(test_data_L31a(i,:));
+    end
+    if isempty(cnnFeat_te_L31{i,2})
+        test_data_L31b(i,:) = 0;
+    else
+        test_data_L31b(i,:) = max(squeeze(cnnFeat_te_L31{i,2}));
+        test_data_L31b(i,:) = test_data_L31b(i,:) ./ norm(test_data_L31b(i,:));
+    end
+end
+train_data_L28a = [];
+train_data_L28b = [];
+for i = 1 : size(cnnFeat_tr_L28,1)
+    if isempty(cnnFeat_tr_L28{i,1})
+        train_data_L28a(i,:) = 0;
+    else
+        train_data_L28a(i,:) = max(squeeze(cnnFeat_tr_L28{i,1}));
+        train_data_L28a(i,:) = train_data_L28a(i,:) ./ norm(train_data_L28a(i,:));
+    end
+    if isempty(cnnFeat_tr_L28{i,2})
+        train_data_L28b(i,:) = 0;
+    else
+        train_data_L28b(i,:) = max(squeeze(cnnFeat_tr_L28{i,2}));
+        train_data_L28b(i,:) = train_data_L28b(i,:) ./ norm(train_data_L28b(i,:));
+    end
+end
+test_data_L28a = [];
+test_data_L28b = [];
+for i = 1 : size(cnnFeat_te_L28,1)
+    if isempty(cnnFeat_te_L28{i,1})
+        test_data_L28a(i,:) = 0;
+    else
+        test_data_L28a(i,:) = max(squeeze(cnnFeat_te_L28{i,1}));
+        test_data_L28a(i,:) = test_data_L28a(i,:) ./ norm(test_data_L28a(i,:));
+    end
+    if isempty(cnnFeat_te_L28{i,2})
+        test_data_L28b(i,:) = 0;
+    else
+        test_data_L28b(i,:) = max(squeeze(cnnFeat_te_L28{i,2}));
+        test_data_L28b(i,:) = test_data_L28b(i,:) ./ norm(test_data_L28b(i,:));
+    end
+end
+
+save('SCDA_maxPool.mat','train_data_L31a','test_data_L31a','train_data_L28a','test_data_L28a'...
+    ,'train_data_L31b','test_data_L31b','train_data_L28b','test_data_L28b'...
+    ,'train_label','test_label','-v7.3');
+
+disp('SCDA maxPool is done ...');
+
+% Concatenation
+avg = load('SCDA_avgPool.mat');
+maxi = load('SCDA_maxPool.mat');
+train_label = avg.train_label;
+test_label = avg.test_label;
+
+ratio = 0.5;
+train_data = [avg.train_data_L31a maxi.train_data_L31a ratio.*avg.train_data_L28a ratio.*maxi.train_data_L28a ...
+    avg.train_data_L31b maxi.train_data_L31b ratio.*avg.train_data_L28b ratio.*maxi.train_data_L28b];
+test_data = [avg.test_data_L31a maxi.test_data_L31a ratio.*avg.test_data_L28a ratio.*maxi.test_data_L28a ...
+    avg.test_data_L31b maxi.test_data_L31b ratio.*avg.test_data_L28b ratio.*maxi.test_data_L28b];
+disp('The final SCDA_flip_plus feature is done ...');
--- a/SCDA_Selection.m
+++ b/SCDA_Selection.m
@ -0,0 +1,205 @@
+% The codes are written by Xiu-Shen Wei (weixs.gm@gmail.com). For any problem concerning the code, please feel free to contact Mr. Wei.
+% This packages are free for academic usage. You can run them at your own risk. For other purposes, please contact Prof. Jianxin Wu (wujx2001@gmail.com).
+
+% The codes are corresponding to the deep descriptors *selection* procedure
+% of the proposed SCDA method.
+
+% Setting the GPU device
+opt.gpu = 1;
+g = gpuDevice(opt.gpu);
+reset(g);
+
+% Fine-grained datasets
+opt.dataset = 'CUB200';
+% CUB200
+% Dogs
+% Flowers
+% Moth
+% Pets
+% Airplane
+% Car
+
+% The pre-trained model--VGG-16
+opt.model = 'imagenet-vgg-verydeep-16';
+
+% Our selective threshold
+opt.thr = 'mean';
+
+% load CNN model
+run('../toolbox/matconvnet-1.0-beta23/matlab/vl_setupnn.m');
+net = load(['../model/' opt.model '.mat']);
+net.layers(end-5:end)=[]; % Removing the fully connected layers
+net = vl_simplenn_move(net, 'gpu') ;
+disp('CNN model is ready ...');
+
+% load imdb
+path = './feats/';
+imdb = load([path opt.dataset '/imdb.mat']);
+
+% Using the RGB average values obtained from ImageNet
+net.normalization.averageImage = ones(224,224,3);
+net.normalization.averageImage(:,:,1) = net.normalization.averageImage(:,:,1) .* net.meta.normalization.averageImage(1,1);
+net.normalization.averageImage(:,:,2) = net.normalization.averageImage(:,:,2) .* net.meta.normalization.averageImage(1,2);
+net.normalization.averageImage(:,:,3) = net.normalization.averageImage(:,:,3) .* net.meta.normalization.averageImage(1,3);
+imdb.averageImage = net.normalization.averageImage;
+
+num_tr = size(find(imdb.images.set==1),2);
+num_te = size(find(imdb.images.set==3),2);
+cnnFeat_tr_L31 = cell(num_tr,2);
+cnnFeat_te_L31 = cell(num_te,2);
+cnnFeat_tr_L28 = cell(num_tr,2);
+cnnFeat_te_L28 = cell(num_te,2);
+count_tr = 1;
+count_te = 1;
+ex_time = [];
+
+for i = 1 : size(imdb.images.name,2)
+    tic
+    %% original image
+    im = imread([imdb.imageDir '/' imdb.images.name{1,i} '.jpg']);
+    im_ = single(im);
+    [h,w,~] = size(im_);
+    if min(h,w) > 700
+        im_ = imresize(im_, [h*(700/min(h,w)) w*(700/min(h,w))]);
+    end
+    [h,w,c] = size(im_);
+    if  c > 2
+        im_ = im_ - imresize(imdb.averageImage,[h,w]) ;
+    else    
+        im_ = bsxfun(@minus,im_,imresize(imdb.averageImage,[h,w])) ;
+    end
+    
+    res = vl_simplenn(net, gpuArray(im_)) ;
+%     res = vl_simplenn(net, im_);
+    tmp_1 = gather(res(32).x);
+    tmp_2 = gather(res(29).x);
+    
+    % Pool5
+    tmp_featmap = tmp_1;
+    tmp_featmap_sum = sum(tmp_featmap, 3);
+    tmp_mean = mean(mean(tmp_featmap_sum));
+    highlight = zeros(size(tmp_featmap_sum));
+    highlight(find(tmp_featmap_sum>tmp_mean)) = 1;
+    
+    cc = bwconncomp(highlight); % The biggest component
+    numPixel = cellfun(@numel,cc.PixelIdxList);
+    [~,conn_idx] = max(numPixel);
+    highlight_conn_L31 = zeros(size(highlight));
+    highlight_conn_L31(cc.PixelIdxList{conn_idx}) = 1;
+    
+    tmp_sel_feat_L31 = [];
+    for sel_i = 1 : size(tmp_featmap,1)
+        for sel_j = 1 : size(tmp_featmap,2)
+            if highlight_conn_L31(sel_i,sel_j)
+                tmp_sel_feat_L31 = [tmp_sel_feat_L31, tmp_featmap(sel_i,sel_j,:)];
+            end
+        end
+    end
+    
+    % Relu5_2
+    tmp_featmap = tmp_2;
+    tmp_featmap_sum = sum(tmp_featmap, 3);
+    tmp_mean = mean(mean(tmp_featmap_sum));
+    highlight = zeros(size(tmp_featmap_sum));
+    highlight(find(tmp_featmap_sum>tmp_mean)) = 1;
+    highlight28 = highlight;
+    highlight = highlight28 & imresize(highlight_conn_L31, size(highlight28), 'nearest');
+    tmp_sel_feat_L28 = [];
+    for sel_i = 1 : size(tmp_featmap,1)
+        for sel_j = 1 : size(tmp_featmap,2)
+            if highlight(sel_i,sel_j)
+                tmp_sel_feat_L28 = [tmp_sel_feat_L28, tmp_featmap(sel_i,sel_j,:)];
+            end
+        end
+    end
+    
+    if imdb.images.set(1,i) == 1
+        % train data
+        cnnFeat_tr_L31{count_tr,1} = tmp_sel_feat_L31; 
+        cnnFeat_tr_L28{count_tr,1} = tmp_sel_feat_L28;
+    else
+        % test data
+        cnnFeat_te_L31{count_te,1} = tmp_sel_feat_L31; 
+        cnnFeat_te_L28{count_te,1} = tmp_sel_feat_L28;
+    end
+    
+    %% horizontal flip
+    im = fliplr(im);
+    im_ = single(im);
+    [h,w,~] = size(im_);
+    if min(h,w) > 700
+        im_ = imresize(im_, [h*(700/min(h,w)) w*(700/min(h,w))]);
+    end
+    [h,w,c] = size(im_);
+    if  c > 2
+        im_ = im_ - imresize(imdb.averageImage,[h,w]) ;
+    else    
+        im_ = bsxfun(@minus,im_,imresize(imdb.averageImage,[h,w])) ;
+    end
+    
+    res = vl_simplenn(net, gpuArray(im_)) ;
+%     res = vl_simplenn(net, im_);
+    tmp_1 = gather(res(32).x);
+    tmp_2 = gather(res(29).x);
+    
+    % Pool5
+    tmp_featmap = tmp_1;
+    tmp_featmap_sum = sum(tmp_featmap, 3);
+    tmp_mean = mean(mean(tmp_featmap_sum));
+    highlight = zeros(size(tmp_featmap_sum));
+    highlight(find(tmp_featmap_sum>tmp_mean)) = 1;
+
+    cc = bwconncomp(highlight);
+    numPixel = cellfun(@numel,cc.PixelIdxList);
+    [~,conn_idx] = max(numPixel);
+    highlight_conn_L31 = zeros(size(highlight));
+    highlight_conn_L31(cc.PixelIdxList{conn_idx}) = 1;
+    
+    tmp_sel_feat_L31 = [];
+    for sel_i = 1 : size(tmp_featmap,1)
+        for sel_j = 1 : size(tmp_featmap,2)
+            if highlight_conn_L31(sel_i,sel_j)
+                tmp_sel_feat_L31 = [tmp_sel_feat_L31, tmp_featmap(sel_i,sel_j,:)];
+            end
+        end
+    end
+    
+    % Relu5_2
+    tmp_featmap = tmp_2;
+    tmp_featmap_sum = sum(tmp_featmap, 3);
+    tmp_mean = mean(mean(tmp_featmap_sum));
+    highlight = zeros(size(tmp_featmap_sum));
+    highlight(find(tmp_featmap_sum>tmp_mean)) = 1;
+    highlight28 = highlight;
+    highlight = highlight28 & imresize(highlight_conn_L31, size(highlight28), 'nearest');
+    tmp_sel_feat_L28 = [];
+    for sel_i = 1 : size(tmp_featmap,1)
+        for sel_j = 1 : size(tmp_featmap,2)
+            if highlight(sel_i,sel_j)
+                tmp_sel_feat_L28 = [tmp_sel_feat_L28, tmp_featmap(sel_i,sel_j,:)];
+            end
+        end
+    end
+    
+    if imdb.images.set(1,i) == 1
+        % train data
+        cnnFeat_tr_L31{count_tr,2} = tmp_sel_feat_L31; 
+        cnnFeat_tr_L28{count_tr,2} = tmp_sel_feat_L28;
+        count_tr = count_tr + 1;
+    else
+        % test data
+        cnnFeat_te_L31{count_te,2} = tmp_sel_feat_L31; 
+        cnnFeat_te_L28{count_te,2} = tmp_sel_feat_L28;
+        count_te = count_te + 1;
+    end
+    
+    ex_time(i,1) = toc;
+    disp(['Extracing ' opt.dataset ': ' num2str(i) 'th image (' num2str(i*100/size(imdb.images.name,2)) '%) used ' num2str(ex_time(i,1)) 's ...']);
+end
+train_label = imdb.images.class(find(imdb.images.set==1))';
+test_label = imdb.images.class(find(imdb.images.set==3))';
+
+save([path opt.dataset '/SCDA_flip_plus.mat'],'cnnFeat_tr_L31','cnnFeat_te_L31','cnnFeat_tr_L28','cnnFeat_te_L28',...
+    'train_label','test_label','ex_time','-v7.3');
+
+disp(['Feature extracting of ' opt.dataset ' is finished ...']);
--- a/feats/CUB200/imdb.mat
+++ b/feats/CUB200/imdb.mat