322 lines
11 KiB
Matlab
322 lines
11 KiB
Matlab
function descriptors = LOMO(images, options)
|
|
%% function Descriptors = LOMO(images, options)
|
|
% Function for the Local Maximal Occurrence (LOMO) feature extraction
|
|
%
|
|
% Input:
|
|
% <images>: a set of n RGB color images. Size: [h, w, 3, n]
|
|
% [optioins]: optional parameters. A structure containing any of the
|
|
% following fields:
|
|
% numScales: number of pyramid scales in feature extraction. Default: 3
|
|
% blockSize: size of the sub-window for histogram counting. Default: 10
|
|
% blockStep: sliding step for the sub-windows. Default: 5
|
|
% hsvBins: number of bins for HSV channels. Default: [8,8,8]
|
|
% tau: the tau parameter in SILTP. Default: 0.3
|
|
% R: the radius paramter in SILTP. Specify multiple values for multiscale SILTP. Default: [3, 5]
|
|
% numPoints: number of neiborhood points for SILTP encoding. Default: 4
|
|
% The above default parameters are good for 128x48 and 160x60 person
|
|
% images. You may need to adjust the numScales, blockSize, and R parameters
|
|
% for other smaller or higher resolutions.
|
|
%
|
|
% Output:
|
|
% descriptors: the extracted LOMO descriptors. Size: [d, n]
|
|
%
|
|
% Example:
|
|
% I = imread('../images/000_45_a.bmp');
|
|
% descriptor = LOMO(I);
|
|
%
|
|
% Reference:
|
|
% Shengcai Liao, Yang Hu, Xiangyu Zhu, and Stan Z. Li. Person
|
|
% re-identification by local maximal occurrence representation and metric
|
|
% learning. In IEEE Conference on Computer Vision and Pattern Recognition, 2015.
|
|
%
|
|
% Version: 1.0
|
|
% Date: 2015-04-29
|
|
%
|
|
% Author: Shengcai Liao
|
|
% Institute: National Laboratory of Pattern Recognition,
|
|
% Institute of Automation, Chinese Academy of Sciences
|
|
% Email: scliao@nlpr.ia.ac.cn
|
|
|
|
%% set parameters
|
|
numScales = 3;
|
|
blockSize = 10;
|
|
blockStep = 5;
|
|
|
|
hsvBins = [8,8,8];
|
|
tau = 0.3;
|
|
R = [3, 5];
|
|
numPoints = 4;
|
|
|
|
if nargin >= 2
|
|
if isfield(options,'numScales') && ~isempty(options.numScales) && isscalar(options.numScales) && isnumeric(options.numScales) && options.numScales > 0
|
|
numScales = options.numScales;
|
|
fprintf('numScales = %d.\n', numScales);
|
|
end
|
|
if isfield(options,'blockSize') && ~isempty(options.blockSize) && isscalar(options.blockSize) && isnumeric(options.blockSize) && options.blockSize > 0
|
|
blockSize = options.blockSize;
|
|
fprintf('blockSize = %d.\n', blockSize);
|
|
end
|
|
if isfield(options,'blockStep') && ~isempty(options.blockStep) && isscalar(options.blockStep) && isnumeric(options.blockStep) && options.blockStep > 0
|
|
blockStep = options.blockStep;
|
|
fprintf('blockStep = %d.\n', blockStep);
|
|
end
|
|
if isfield(options,'hsvBins') && ~isempty(options.hsvBins) && isvector(options.blockStep) && isnumeric(options.hsvBins) && length(options.hsvBins) == 3 && all(options.hsvBins > 0)
|
|
hsvBins = options.hsvBins;
|
|
fprintf('hsvBins = [%d, %d, %d].\n', hsvBins);
|
|
end
|
|
if isfield(options,'tau') && ~isempty(options.tau) && isscalar(options.tau) && isnumeric(options.tau) && options.tau > 0
|
|
tau = options.tau;
|
|
fprintf('tau = %g.\n', tau);
|
|
end
|
|
if isfield(options,'R') && ~isempty(options.R) && isnumeric(options.R) && all(options.R > 0)
|
|
R = options.R;
|
|
fprintf('R = %d.\n', R);
|
|
end
|
|
if isfield(options,'numPoints') && ~isempty(options.numPoints) && isscalar(options.numPoints) && isnumeric(options.numPoints) && options.numPoints > 0
|
|
numPoints = options.numPoints;
|
|
fprintf('numPoints = %d.\n', numPoints);
|
|
end
|
|
end
|
|
|
|
t0 = tic;
|
|
|
|
%% extract Joint HSV based LOMO descriptors
|
|
fea1 = PyramidMaxJointHist( images, numScales, blockSize, blockStep, hsvBins );
|
|
|
|
%% extract SILTP based LOMO descriptors
|
|
fea2 = [];
|
|
|
|
for i = 1 : length(R)
|
|
fea2 = [fea2; PyramidMaxSILTPHist( images, numScales, blockSize, blockStep, tau, R(i), numPoints )]; %#ok<AGROW>
|
|
end
|
|
|
|
%% finishing
|
|
descriptors = [fea1; fea2];
|
|
clear Fea1 Fea2
|
|
|
|
feaTime = toc(t0);
|
|
meanTime = feaTime / size(images, 4);
|
|
% fprintf('LOMO feature extraction finished. Running time: %.3f seconds in total, %.3f seconds per image.\n', feaTime, meanTime);
|
|
end
|
|
|
|
|
|
function descriptors = PyramidMaxJointHist( oriImgs, numScales, blockSize, blockStep, colorBins )
|
|
%% PyramidMaxJointHist: HSV based LOMO representation
|
|
|
|
if nargin == 1
|
|
numScales = 3;
|
|
blockSize = 10;
|
|
blockStep = 5;
|
|
colorBins = [8,8,8];
|
|
end
|
|
|
|
totalBins = prod(colorBins);
|
|
numImgs = size(oriImgs, 4);
|
|
images = zeros(size(oriImgs));
|
|
|
|
% color transformation
|
|
for i = 1 : numImgs
|
|
I = oriImgs(:,:,:,i);
|
|
I = Retinex(I);
|
|
|
|
I = rgb2hsv(I);
|
|
I(:,:,1) = min( floor( I(:,:,1) * colorBins(1) ), colorBins(1)-1 );
|
|
I(:,:,2) = min( floor( I(:,:,2) * colorBins(2) ), colorBins(2)-1 );
|
|
I(:,:,3) = min( floor( I(:,:,3) * colorBins(3) ), colorBins(3)-1 );
|
|
images(:,:,:,i) = I; % HSV
|
|
end
|
|
|
|
minRow = 1;
|
|
minCol = 1;
|
|
descriptors = [];
|
|
|
|
% Scan multi-scale blocks and compute histograms
|
|
for i = 1 : numScales
|
|
patterns = images(:,:,3,:) * colorBins(2) * colorBins(1) + images(:,:,2,:)*colorBins(1) + images(:,:,1,:); % HSV
|
|
patterns = reshape(patterns, [], numImgs);
|
|
|
|
height = size(images, 1);
|
|
width = size(images, 2);
|
|
maxRow = height - blockSize + 1;
|
|
maxCol = width - blockSize + 1;
|
|
|
|
[cols,rows] = meshgrid(minCol:blockStep:maxCol, minRow:blockStep:maxRow); % top-left positions
|
|
cols = cols(:);
|
|
rows = rows(:);
|
|
numBlocks = length(cols);
|
|
numBlocksCol = length(minCol:blockStep:maxCol);
|
|
|
|
if numBlocks == 0
|
|
break;
|
|
end
|
|
|
|
offset = bsxfun(@plus, (0 : blockSize-1)', (0 : blockSize-1) * height); % offset to the top-left positions. blockSize-by-blockSize
|
|
index = sub2ind([height, width], rows, cols);
|
|
index = bsxfun(@plus, offset(:), index'); % (blockSize*blockSize)-by-numBlocks
|
|
patches = patterns(index(:), :); % (blockSize * blockSize * numBlocks)-by-numImgs
|
|
patches = reshape(patches, [], numBlocks * numImgs); % (blockSize * blockSize)-by-(numBlocks * numChannels * numImgs)
|
|
|
|
fea = hist(patches, 0 : totalBins-1); % totalBins-by-(numBlocks * numImgs)
|
|
fea = reshape(fea, [totalBins, numBlocks / numBlocksCol, numBlocksCol, numImgs]);
|
|
fea = max(fea, [], 3);
|
|
fea = reshape(fea, [], numImgs);
|
|
|
|
descriptors = [descriptors; fea]; %#ok<AGROW>
|
|
|
|
if i < numScales
|
|
images = ColorPooling(images, 'average');
|
|
end
|
|
end
|
|
|
|
descriptors = log(descriptors + 1);
|
|
descriptors = normc(descriptors);
|
|
end
|
|
|
|
|
|
function outImages = ColorPooling(images, method)
|
|
[height, width, numChannels, numImgs] = size(images);
|
|
outImages = images;
|
|
|
|
if mod(height, 2) == 1
|
|
outImages(end, :, :, :) = [];
|
|
height = height - 1;
|
|
end
|
|
|
|
if mod(width, 2) == 1
|
|
outImages(:, end, :, :) = [];
|
|
width = width - 1;
|
|
end
|
|
|
|
if height == 0 || width == 0
|
|
error('Over scaled image: height=%d, width=%d.', height, width);
|
|
end
|
|
|
|
height = height / 2;
|
|
width = width / 2;
|
|
|
|
outImages = reshape(outImages, 2, height, 2, width, numChannels, numImgs);
|
|
outImages = permute(outImages, [2, 4, 5, 6, 1, 3]);
|
|
outImages = reshape(outImages, height, width, numChannels, numImgs, 2*2);
|
|
|
|
if strcmp(method, 'average')
|
|
outImages = floor(mean(outImages, 5));
|
|
else if strcmp(method, 'max')
|
|
outImages = max(outImages, [], 5);
|
|
else
|
|
error('Error pooling method: %s.', method);
|
|
end
|
|
end
|
|
end
|
|
|
|
function descriptors = PyramidMaxSILTPHist( oriImgs, numScales, blockSize, blockStep, tau, R, numPoints )
|
|
%% PyramidMaxSILTPHist: SILTP based LOMO representation
|
|
|
|
if nargin == 1
|
|
numScales = 3;
|
|
blockSize = 10;
|
|
blockStep = 5;
|
|
tau = 0.3;
|
|
R = 5;
|
|
numPoints = 4;
|
|
end
|
|
|
|
totalBins = 3^numPoints;
|
|
|
|
[imgHeight, imgWidth, ~, numImgs] = size(oriImgs);
|
|
images = zeros(imgHeight,imgWidth, numImgs);
|
|
|
|
% Convert gray images
|
|
for i = 1 : numImgs
|
|
I = oriImgs(:,:,:,i);
|
|
I = rgb2gray(I);
|
|
images(:,:,i) = double(I) / 255;
|
|
end
|
|
|
|
minRow = 1;
|
|
minCol = 1;
|
|
descriptors = [];
|
|
|
|
% Scan multi-scale blocks and compute histograms
|
|
for i = 1 : numScales
|
|
height = size(images, 1);
|
|
width = size(images, 2);
|
|
|
|
if width < R * 2 + 1
|
|
fprintf('Skip scale R = %d, width = %d.\n', R, width);
|
|
continue;
|
|
end
|
|
|
|
patterns = SILTP(images, tau, R, numPoints);
|
|
patterns = reshape(patterns, [], numImgs);
|
|
|
|
maxRow = height - blockSize + 1;
|
|
maxCol = width - blockSize + 1;
|
|
|
|
[cols,rows] = meshgrid(minCol:blockStep:maxCol, minRow:blockStep:maxRow); % top-left positions
|
|
cols = cols(:);
|
|
rows = rows(:);
|
|
numBlocks = length(cols);
|
|
numBlocksCol = length(minCol:blockStep:maxCol);
|
|
|
|
if numBlocks == 0
|
|
break;
|
|
end
|
|
|
|
offset = bsxfun(@plus, (0 : blockSize-1)', (0 : blockSize-1) * height); % offset to the top-left positions. blockSize-by-blockSize
|
|
index = sub2ind([height, width], rows, cols);
|
|
index = bsxfun(@plus, offset(:), index'); % (blockSize*blockSize)-by-numBlocks
|
|
patches = patterns(index(:), :); % (blockSize * blockSize * numBlocks)-by-numImgs
|
|
patches = reshape(patches, [], numBlocks * numImgs); % (blockSize * blockSize)-by-(numBlocks * numChannels * numImgs)
|
|
|
|
fea = hist(patches, 0:totalBins-1); % totalBins-by-(numBlocks * numImgs)
|
|
fea = reshape(fea, [totalBins, numBlocks / numBlocksCol, numBlocksCol, numImgs]);
|
|
fea = max(fea, [], 3);
|
|
fea = reshape(fea, [], numImgs);
|
|
|
|
descriptors = [descriptors; fea]; %#ok<AGROW>
|
|
|
|
if i < numScales
|
|
images = Pooling(images, 'average');
|
|
end
|
|
end
|
|
|
|
descriptors = log(descriptors + 1);
|
|
descriptors = normc(descriptors);
|
|
end
|
|
|
|
|
|
function outImages = Pooling(images, method)
|
|
[height, width, numImgs] = size(images);
|
|
outImages = images;
|
|
|
|
if mod(height, 2) == 1
|
|
outImages(end, :, :) = [];
|
|
height = height - 1;
|
|
end
|
|
|
|
if mod(width, 2) == 1
|
|
outImages(:, end, :) = [];
|
|
width = width - 1;
|
|
end
|
|
|
|
if height == 0 || width == 0
|
|
error('Over scaled image: height=%d, width=%d.', height, width);
|
|
end
|
|
|
|
height = height / 2;
|
|
width = width / 2;
|
|
|
|
outImages = reshape(outImages, 2, height, 2, width, numImgs);
|
|
outImages = permute(outImages, [2, 4, 5, 1, 3]);
|
|
outImages = reshape(outImages, height, width, numImgs, 2*2);
|
|
|
|
if strcmp(method, 'average')
|
|
outImages = mean(outImages, 4);
|
|
else if strcmp(method, 'max')
|
|
outImages = max(outImages, [], 4);
|
|
else
|
|
error('Error pooling method: %s.', method);
|
|
end
|
|
end
|
|
end
|