mirror of
https://github.com/open-mmlab/mmclassification.git
synced 2025-06-03 21:53:55 +08:00
[Feature] Support PASCAL VOC 2007 dataset for multilabel task (#134)
* support voc * minor change
This commit is contained in:
parent
194ab7efda
commit
c07e60dd7d
@ -5,11 +5,13 @@ from .dataset_wrappers import (ClassBalancedDataset, ConcatDataset,
|
|||||||
RepeatDataset)
|
RepeatDataset)
|
||||||
from .imagenet import ImageNet
|
from .imagenet import ImageNet
|
||||||
from .mnist import MNIST, FashionMNIST
|
from .mnist import MNIST, FashionMNIST
|
||||||
|
from .multi_label import MultiLabelDataset
|
||||||
from .samplers import DistributedSampler
|
from .samplers import DistributedSampler
|
||||||
|
from .voc import VOC
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'BaseDataset', 'ImageNet', 'CIFAR10', 'CIFAR100', 'MNIST', 'FashionMNIST',
|
'BaseDataset', 'ImageNet', 'CIFAR10', 'CIFAR100', 'MNIST', 'FashionMNIST',
|
||||||
'build_dataloader', 'build_dataset', 'Compose', 'DistributedSampler',
|
'VOC', 'MultiLabelDataset', 'build_dataloader', 'build_dataset', 'Compose',
|
||||||
'ConcatDataset', 'RepeatDataset', 'ClassBalancedDataset', 'DATASETS',
|
'DistributedSampler', 'ConcatDataset', 'RepeatDataset',
|
||||||
'PIPELINES'
|
'ClassBalancedDataset', 'DATASETS', 'PIPELINES'
|
||||||
]
|
]
|
||||||
|
@ -36,8 +36,8 @@ class BaseDataset(Dataset, metaclass=ABCMeta):
|
|||||||
self.data_prefix = data_prefix
|
self.data_prefix = data_prefix
|
||||||
self.test_mode = test_mode
|
self.test_mode = test_mode
|
||||||
self.pipeline = Compose(pipeline)
|
self.pipeline = Compose(pipeline)
|
||||||
self.data_infos = self.load_annotations()
|
|
||||||
self.CLASSES = self.get_classes(classes)
|
self.CLASSES = self.get_classes(classes)
|
||||||
|
self.data_infos = self.load_annotations()
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def load_annotations(self):
|
def load_annotations(self):
|
||||||
@ -133,13 +133,13 @@ class BaseDataset(Dataset, metaclass=ABCMeta):
|
|||||||
metrics = metric
|
metrics = metric
|
||||||
allowed_metrics = ['accuracy', 'precision', 'recall', 'f1_score']
|
allowed_metrics = ['accuracy', 'precision', 'recall', 'f1_score']
|
||||||
eval_results = {}
|
eval_results = {}
|
||||||
for metric in metrics:
|
|
||||||
if metric not in allowed_metrics:
|
|
||||||
raise KeyError(f'metric {metric} is not supported.')
|
|
||||||
results = np.vstack(results)
|
results = np.vstack(results)
|
||||||
gt_labels = self.get_gt_labels()
|
gt_labels = self.get_gt_labels()
|
||||||
num_imgs = len(results)
|
num_imgs = len(results)
|
||||||
assert len(gt_labels) == num_imgs
|
assert len(gt_labels) == num_imgs
|
||||||
|
for metric in metrics:
|
||||||
|
if metric not in allowed_metrics:
|
||||||
|
raise KeyError(f'metric {metric} is not supported.')
|
||||||
if metric == 'accuracy':
|
if metric == 'accuracy':
|
||||||
topk = metric_options.get('topk')
|
topk = metric_options.get('topk')
|
||||||
acc = accuracy(results, gt_labels, topk)
|
acc = accuracy(results, gt_labels, topk)
|
||||||
|
65
mmcls/datasets/multi_label.py
Normal file
65
mmcls/datasets/multi_label.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from mmcls.core import average_performance, mAP
|
||||||
|
from .base_dataset import BaseDataset
|
||||||
|
|
||||||
|
|
||||||
|
class MultiLabelDataset(BaseDataset):
|
||||||
|
""" Multi-label Dataset.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def get_cat_ids(self, idx):
|
||||||
|
"""Get category ids by index.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
idx (int): Index of data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
np.ndarray: Image categories of specified index.
|
||||||
|
"""
|
||||||
|
gt_labels = self.data_infos[idx]['gt_label']
|
||||||
|
cat_ids = np.where(gt_labels == 1)[0]
|
||||||
|
return cat_ids
|
||||||
|
|
||||||
|
def evaluate(self, results, metric='mAP', logger=None, **eval_kwargs):
|
||||||
|
"""Evaluate the dataset.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
results (list): Testing results of the dataset.
|
||||||
|
metric (str | list[str]): Metrics to be evaluated.
|
||||||
|
Default value is 'mAP'. Options are 'mAP', 'CP', 'CR', 'CF1',
|
||||||
|
'OP', 'OR' and 'OF1'.
|
||||||
|
logger (logging.Logger | None | str): Logger used for printing
|
||||||
|
related information during evaluation. Default: None.
|
||||||
|
Returns:
|
||||||
|
dict: evaluation results
|
||||||
|
"""
|
||||||
|
if isinstance(metric, str):
|
||||||
|
metrics = [metric]
|
||||||
|
else:
|
||||||
|
metrics = metric
|
||||||
|
allowed_metrics = ['mAP', 'CP', 'CR', 'CF1', 'OP', 'OR', 'OF1']
|
||||||
|
eval_results = {}
|
||||||
|
results = np.vstack(results)
|
||||||
|
gt_labels = self.get_gt_labels()
|
||||||
|
num_imgs = len(results)
|
||||||
|
assert len(gt_labels) == num_imgs, 'dataset testing results should '\
|
||||||
|
'be of the same length as gt_labels.'
|
||||||
|
|
||||||
|
invalid_metrics = set(metrics) - set(allowed_metrics)
|
||||||
|
if len(invalid_metrics) != 0:
|
||||||
|
raise KeyError(f'metirc {invalid_metrics} is not supported.')
|
||||||
|
|
||||||
|
if 'mAP' in metrics:
|
||||||
|
mAP_value = mAP(results, gt_labels)
|
||||||
|
eval_results['mAP'] = mAP_value
|
||||||
|
metrics.remove('mAP')
|
||||||
|
if len(metrics) != 0:
|
||||||
|
performance_keys = ['CP', 'CR', 'CF1', 'OP', 'OR', 'OF1']
|
||||||
|
performance_values = average_performance(results, gt_labels,
|
||||||
|
**eval_kwargs)
|
||||||
|
for k, v in zip(performance_keys, performance_values):
|
||||||
|
if k in metrics:
|
||||||
|
eval_results[k] = v
|
||||||
|
|
||||||
|
return eval_results
|
69
mmcls/datasets/voc.py
Normal file
69
mmcls/datasets/voc.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
import os.path as osp
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
import mmcv
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from .builder import DATASETS
|
||||||
|
from .multi_label import MultiLabelDataset
|
||||||
|
|
||||||
|
|
||||||
|
@DATASETS.register_module()
|
||||||
|
class VOC(MultiLabelDataset):
|
||||||
|
"""`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Dataset.
|
||||||
|
"""
|
||||||
|
|
||||||
|
CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
|
||||||
|
'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
|
||||||
|
'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
|
||||||
|
'tvmonitor')
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(VOC, self).__init__(**kwargs)
|
||||||
|
if 'VOC2007' in self.data_prefix:
|
||||||
|
self.year = 2007
|
||||||
|
else:
|
||||||
|
raise ValueError('Cannot infer dataset year from img_prefix.')
|
||||||
|
|
||||||
|
def load_annotations(self):
|
||||||
|
"""Load annotations
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[dict]: Annotation info from XML file.
|
||||||
|
"""
|
||||||
|
data_infos = []
|
||||||
|
img_ids = mmcv.list_from_file(self.ann_file)
|
||||||
|
for img_id in img_ids:
|
||||||
|
filename = f'JPEGImages/{img_id}.jpg'
|
||||||
|
xml_path = osp.join(self.data_prefix, 'Annotations',
|
||||||
|
f'{img_id}.xml')
|
||||||
|
tree = ET.parse(xml_path)
|
||||||
|
root = tree.getroot()
|
||||||
|
labels = []
|
||||||
|
labels_difficult = []
|
||||||
|
for obj in root.findall('object'):
|
||||||
|
label_name = obj.find('name').text
|
||||||
|
# in case customized dataset has wrong labels
|
||||||
|
# or CLASSES has been override.
|
||||||
|
if label_name not in self.CLASSES:
|
||||||
|
continue
|
||||||
|
label = self.class_to_idx[label_name]
|
||||||
|
difficult = int(obj.find('difficult').text)
|
||||||
|
if difficult:
|
||||||
|
labels_difficult.append(label)
|
||||||
|
else:
|
||||||
|
labels.append(label)
|
||||||
|
|
||||||
|
gt_label = np.zeros(len(self.CLASSES))
|
||||||
|
# The order cannot be swapped for the case where multiple objects
|
||||||
|
# of the same kind exist and some are difficult.
|
||||||
|
gt_label[labels_difficult] = -1
|
||||||
|
gt_label[labels] = 1
|
||||||
|
|
||||||
|
info = dict(
|
||||||
|
img_prefix=self.data_prefix,
|
||||||
|
img_info=dict(filename=filename),
|
||||||
|
gt_label=gt_label.astype(np.int8))
|
||||||
|
data_infos.append(info)
|
||||||
|
|
||||||
|
return data_infos
|
@ -10,28 +10,49 @@ import numpy as np
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from mmcls.datasets import (DATASETS, BaseDataset, ClassBalancedDataset,
|
from mmcls.datasets import (DATASETS, BaseDataset, ClassBalancedDataset,
|
||||||
ConcatDataset, RepeatDataset)
|
ConcatDataset, MultiLabelDataset, RepeatDataset)
|
||||||
from mmcls.datasets.utils import check_integrity, rm_suffix
|
from mmcls.datasets.utils import check_integrity, rm_suffix
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
'dataset_name',
|
'dataset_name',
|
||||||
['MNIST', 'FashionMNIST', 'CIFAR10', 'CIFAR100', 'ImageNet'])
|
['MNIST', 'FashionMNIST', 'CIFAR10', 'CIFAR100', 'ImageNet', 'VOC'])
|
||||||
def test_datasets_override_default(dataset_name):
|
def test_datasets_override_default(dataset_name):
|
||||||
dataset_class = DATASETS.get(dataset_name)
|
dataset_class = DATASETS.get(dataset_name)
|
||||||
dataset_class.load_annotations = MagicMock()
|
dataset_class.load_annotations = MagicMock()
|
||||||
|
|
||||||
original_classes = dataset_class.CLASSES
|
original_classes = dataset_class.CLASSES
|
||||||
|
|
||||||
|
# Test VOC year
|
||||||
|
if dataset_name == 'VOC':
|
||||||
|
dataset = dataset_class(
|
||||||
|
data_prefix='VOC2007',
|
||||||
|
pipeline=[],
|
||||||
|
classes=('bus', 'car'),
|
||||||
|
test_mode=True)
|
||||||
|
assert dataset.year == 2007
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
dataset = dataset_class(
|
||||||
|
data_prefix='VOC',
|
||||||
|
pipeline=[],
|
||||||
|
classes=('bus', 'car'),
|
||||||
|
test_mode=True)
|
||||||
|
|
||||||
# Test setting classes as a tuple
|
# Test setting classes as a tuple
|
||||||
dataset = dataset_class(
|
dataset = dataset_class(
|
||||||
data_prefix='', pipeline=[], classes=('bus', 'car'), test_mode=True)
|
data_prefix='VOC2007' if dataset_name == 'VOC' else '',
|
||||||
|
pipeline=[],
|
||||||
|
classes=('bus', 'car'),
|
||||||
|
test_mode=True)
|
||||||
assert dataset.CLASSES != original_classes
|
assert dataset.CLASSES != original_classes
|
||||||
assert dataset.CLASSES == ('bus', 'car')
|
assert dataset.CLASSES == ('bus', 'car')
|
||||||
|
|
||||||
# Test setting classes as a list
|
# Test setting classes as a list
|
||||||
dataset = dataset_class(
|
dataset = dataset_class(
|
||||||
data_prefix='', pipeline=[], classes=['bus', 'car'], test_mode=True)
|
data_prefix='VOC2007' if dataset_name == 'VOC' else '',
|
||||||
|
pipeline=[],
|
||||||
|
classes=['bus', 'car'],
|
||||||
|
test_mode=True)
|
||||||
assert dataset.CLASSES != original_classes
|
assert dataset.CLASSES != original_classes
|
||||||
assert dataset.CLASSES == ['bus', 'car']
|
assert dataset.CLASSES == ['bus', 'car']
|
||||||
|
|
||||||
@ -40,7 +61,10 @@ def test_datasets_override_default(dataset_name):
|
|||||||
with open(tmp_file.name, 'w') as f:
|
with open(tmp_file.name, 'w') as f:
|
||||||
f.write('bus\ncar\n')
|
f.write('bus\ncar\n')
|
||||||
dataset = dataset_class(
|
dataset = dataset_class(
|
||||||
data_prefix='', pipeline=[], classes=tmp_file.name, test_mode=True)
|
data_prefix='VOC2007' if dataset_name == 'VOC' else '',
|
||||||
|
pipeline=[],
|
||||||
|
classes=tmp_file.name,
|
||||||
|
test_mode=True)
|
||||||
tmp_file.close()
|
tmp_file.close()
|
||||||
|
|
||||||
assert dataset.CLASSES != original_classes
|
assert dataset.CLASSES != original_classes
|
||||||
@ -48,21 +72,30 @@ def test_datasets_override_default(dataset_name):
|
|||||||
|
|
||||||
# Test overriding not a subset
|
# Test overriding not a subset
|
||||||
dataset = dataset_class(
|
dataset = dataset_class(
|
||||||
data_prefix='', pipeline=[], classes=['foo'], test_mode=True)
|
data_prefix='VOC2007' if dataset_name == 'VOC' else '',
|
||||||
|
pipeline=[],
|
||||||
|
classes=['foo'],
|
||||||
|
test_mode=True)
|
||||||
assert dataset.CLASSES != original_classes
|
assert dataset.CLASSES != original_classes
|
||||||
assert dataset.CLASSES == ['foo']
|
assert dataset.CLASSES == ['foo']
|
||||||
|
|
||||||
# Test default behavior
|
# Test default behavior
|
||||||
dataset = dataset_class(data_prefix='', pipeline=[])
|
dataset = dataset_class(
|
||||||
|
data_prefix='VOC2007' if dataset_name == 'VOC' else '', pipeline=[])
|
||||||
|
|
||||||
|
if dataset_name == 'VOC':
|
||||||
|
assert dataset.data_prefix == 'VOC2007'
|
||||||
|
else:
|
||||||
assert dataset.data_prefix == ''
|
assert dataset.data_prefix == ''
|
||||||
assert not dataset.test_mode
|
assert not dataset.test_mode
|
||||||
assert dataset.ann_file is None
|
assert dataset.ann_file is None
|
||||||
assert dataset.CLASSES == original_classes
|
assert dataset.CLASSES == original_classes
|
||||||
|
|
||||||
|
|
||||||
|
@patch.multiple(MultiLabelDataset, __abstractmethods__=set())
|
||||||
@patch.multiple(BaseDataset, __abstractmethods__=set())
|
@patch.multiple(BaseDataset, __abstractmethods__=set())
|
||||||
def test_dataset_evaluation():
|
def test_dataset_evaluation():
|
||||||
|
# test multi-class single-label evaluation
|
||||||
dataset = BaseDataset(data_prefix='', pipeline=[], test_mode=True)
|
dataset = BaseDataset(data_prefix='', pipeline=[], test_mode=True)
|
||||||
dataset.data_infos = [
|
dataset.data_infos = [
|
||||||
dict(gt_label=0),
|
dict(gt_label=0),
|
||||||
@ -83,6 +116,37 @@ def test_dataset_evaluation():
|
|||||||
assert eval_results['f1_score'] == pytest.approx(
|
assert eval_results['f1_score'] == pytest.approx(
|
||||||
(4 / 5 + 2 / 3 + 1 / 2) / 3 * 100.0)
|
(4 / 5 + 2 / 3 + 1 / 2) / 3 * 100.0)
|
||||||
|
|
||||||
|
# test multi-label evalutation
|
||||||
|
dataset = MultiLabelDataset(data_prefix='', pipeline=[], test_mode=True)
|
||||||
|
dataset.data_infos = [
|
||||||
|
dict(gt_label=[1, 1, 0, -1]),
|
||||||
|
dict(gt_label=[1, 1, 0, -1]),
|
||||||
|
dict(gt_label=[0, -1, 1, -1]),
|
||||||
|
dict(gt_label=[0, 1, 0, -1]),
|
||||||
|
dict(gt_label=[0, 1, 0, -1]),
|
||||||
|
]
|
||||||
|
fake_results = np.array([[0.9, 0.8, 0.3, 0.2], [0.1, 0.2, 0.2, 0.1],
|
||||||
|
[0.7, 0.5, 0.9, 0.3], [0.8, 0.1, 0.1, 0.2],
|
||||||
|
[0.8, 0.1, 0.1, 0.2]])
|
||||||
|
|
||||||
|
# the metric must be valid
|
||||||
|
with pytest.raises(KeyError):
|
||||||
|
metric = 'coverage'
|
||||||
|
dataset.evaluate(fake_results, metric=metric)
|
||||||
|
# only one metric
|
||||||
|
metric = 'mAP'
|
||||||
|
eval_results = dataset.evaluate(fake_results, metric=metric)
|
||||||
|
assert 'mAP' in eval_results.keys()
|
||||||
|
assert 'CP' not in eval_results.keys()
|
||||||
|
|
||||||
|
# multiple metrics
|
||||||
|
metric = ['mAP', 'CR', 'OF1']
|
||||||
|
eval_results = dataset.evaluate(fake_results, metric=metric)
|
||||||
|
assert 'mAP' in eval_results.keys()
|
||||||
|
assert 'CR' in eval_results.keys()
|
||||||
|
assert 'OF1' in eval_results.keys()
|
||||||
|
assert 'CF1' not in eval_results.keys()
|
||||||
|
|
||||||
|
|
||||||
@patch.multiple(BaseDataset, __abstractmethods__=set())
|
@patch.multiple(BaseDataset, __abstractmethods__=set())
|
||||||
def test_dataset_wrapper():
|
def test_dataset_wrapper():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user