[Feature] Support some downstream classification datasets. (#1467)

* feat: support some downstream classification datasets

* update sun397

* sum

* update sun397

* [CI] Add test mim CI. (#879)

* feat: support some downstream classification datasets

* update sun397

* sum

* update sun397

* rebase

* feat: support some downstream classification datasets

* update sun397

* update sun397

* update sun397

* update sun397

* fix unittest

* update docstring

* rm

* update

* update

* refactor names of datasets

* refactor some implements of datasets

* refactor some implements of datasets

* fix datasets unittest

* refactor cub and stanford cars

* refactor cub and cifar

* refactor cub and cifar

* refactor cub and cifar

* update downstream datasets and docs

* update docstring

---------

Co-authored-by: Ma Zerun <mzr1996@163.com>
Co-authored-by: Ezra-Yu <18586273+Ezra-Yu@users.noreply.github.com>
pull/1554/head
zzc98 2023-05-05 14:43:14 +08:00 committed by GitHub
parent a3fa328f09
commit 496e098b21
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 2360 additions and 103 deletions

View File

@ -23,8 +23,8 @@ train_dataloader = dict(
num_workers=2,
dataset=dict(
type=dataset_type,
data_prefix='data/cifar100',
test_mode=False,
data_root='data/cifar100',
split='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
)
@ -34,8 +34,8 @@ val_dataloader = dict(
num_workers=2,
dataset=dict(
type=dataset_type,
data_prefix='data/cifar100/',
test_mode=True,
data_root='data/cifar100/',
split='test',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)

View File

@ -23,8 +23,8 @@ train_dataloader = dict(
num_workers=2,
dataset=dict(
type=dataset_type,
data_prefix='data/cifar10',
test_mode=False,
data_root='data/cifar10',
split='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
)
@ -34,8 +34,8 @@ val_dataloader = dict(
num_workers=2,
dataset=dict(
type=dataset_type,
data_prefix='data/cifar10/',
test_mode=True,
data_root='data/cifar10/',
split='test',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)

View File

@ -30,7 +30,7 @@ train_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root='data/CUB_200_2011',
test_mode=False,
split='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
)
@ -41,7 +41,7 @@ val_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root='data/CUB_200_2011',
test_mode=True,
split='test',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)

View File

@ -29,7 +29,7 @@ train_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root='data/CUB_200_2011',
test_mode=False,
split='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
)
@ -40,7 +40,7 @@ val_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root='data/CUB_200_2011',
test_mode=True,
split='test',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)

View File

@ -66,6 +66,47 @@ Base classes
.. autoclass:: MultiLabelDataset
Caltech101
----------------
.. autoclass:: Caltech101
Food101
----------------
.. autoclass:: Food101
DTD
----------------
.. autoclass:: DTD
FGVCAircraft
----------------
.. autoclass:: FGVCAircraft
Flowers102
----------------
.. autoclass:: Flowers102
StanfordCars
----------------
.. autoclass:: StanfordCars
OxfordIIITPet
----------------
.. autoclass:: OxfordIIITPet
SUN397
----------------
.. autoclass:: SUN397
Dataset Wrappers
----------------

View File

@ -229,7 +229,7 @@ napoleon_custom_sections = [
# Disable docstring inheritance
autodoc_inherit_docstrings = False
# Mock some imports during generate API docs.
autodoc_mock_imports = ['rich', 'attr', 'einops']
autodoc_mock_imports = ['rich', 'attr', 'einops', 'mat4py']
# Disable displaying type annotations, these can be very verbose
autodoc_typehints = 'none'

View File

@ -233,7 +233,7 @@ napoleon_custom_sections = [
# Disable docstring inheritance
autodoc_inherit_docstrings = False
# Mock some imports during generate API docs.
autodoc_mock_imports = ['rich', 'attr', 'einops']
autodoc_mock_imports = ['rich', 'attr', 'einops', 'mat4py']
# Disable displaying type annotations, these can be very verbose
autodoc_typehints = 'none'

View File

@ -1,17 +1,25 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .base_dataset import BaseDataset
from .builder import build_dataset
from .caltech101 import Caltech101
from .cifar import CIFAR10, CIFAR100
from .cub import CUB
from .custom import CustomDataset
from .dataset_wrappers import KFoldDataset
from .dtd import DTD
from .fgvcaircraft import FGVCAircraft
from .flowers102 import Flowers102
from .food101 import Food101
from .imagenet import ImageNet, ImageNet21k
from .inshop import InShop
from .mnist import MNIST, FashionMNIST
from .multi_label import MultiLabelDataset
from .multi_task import MultiTaskDataset
from .oxfordiiitpet import OxfordIIITPet
from .places205 import Places205
from .samplers import * # noqa: F401,F403
from .stanfordcars import StanfordCars
from .sun397 import SUN397
from .transforms import * # noqa: F401,F403
from .voc import VOC
@ -19,5 +27,6 @@ __all__ = [
'BaseDataset', 'ImageNet', 'CIFAR10', 'CIFAR100', 'MNIST', 'FashionMNIST',
'VOC', 'build_dataset', 'ImageNet21k', 'KFoldDataset', 'CUB',
'CustomDataset', 'MultiLabelDataset', 'MultiTaskDataset', 'InShop',
'Places205'
'Places205', 'Flowers102', 'OxfordIIITPet', 'DTD', 'FGVCAircraft',
'StanfordCars', 'SUN397', 'Caltech101', 'Food101'
]

View File

@ -49,7 +49,9 @@ class BaseDataset(_BaseDataset):
when enabled, data loader workers can use shared RAM from master
process instead of making a copy. Defaults to True.
pipeline (Sequence): Processing pipeline. Defaults to an empty tuple.
test_mode (bool): ``test_mode=True`` means in test phase.
test_mode (bool, optional): ``test_mode=True`` means in test phase,
an error will be raised when getting an item fails, ``test_mode=False``
means in training phase, another item will be returned randomly.
Defaults to False.
lazy_init (bool): Whether to load annotation during instantiation.
In some cases, such as visualization, only the meta information of

View File

@ -0,0 +1,113 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
from mmengine import get_file_backend, list_from_file
from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
from .categories import CALTECH101_CATEGORIES
@DATASETS.register_module()
class Caltech101(BaseDataset):
"""The Caltech101 Dataset.
Support the `Caltech101 <https://data.caltech.edu/records/mzrjq-6wc02>`_ Dataset.
After downloading and decompression, the dataset directory structure is as follows.
Caltech101 dataset directory: ::
caltech-101
101_ObjectCategories
class_x
xx1.jpg
xx2.jpg
...
class_y
yy1.jpg
yy2.jpg
...
...
Annotations
class_x
xx1.mat
...
...
meta
train.txt
test.txt
....
Please note that since there is no official splitting for training and
test set, you can use the train.txt and text.txt provided by us or
create your own annotation files. Here is the download
`link <https://download.openmmlab.com/mmpretrain/datasets/caltech_meta.zip>`_
for the annotations.
Args:
data_root (str): The root directory for the Caltech101 dataset.
split (str, optional): The dataset split, supports "train" and "test".
Default to "train".
Examples:
>>> from mmpretrain.datasets import Caltech101
>>> train_dataset = Caltech101(data_root='data/caltech-101', split='train')
>>> train_dataset
Dataset Caltech101
Number of samples: 3060
Number of categories: 102
Root of dataset: data/caltech-101
>>> test_dataset = Caltech101(data_root='data/caltech-101', split='test')
>>> test_dataset
Dataset Caltech101
Number of samples: 6728
Number of categories: 102
Root of dataset: data/caltech-101
""" # noqa: E501
METAINFO = {'classes': CALTECH101_CATEGORIES}
def __init__(self, data_root: str, split: str = 'train', **kwargs):
splits = ['train', 'test']
assert split in splits, \
f"The split must be one of {splits}, but get '{split}'"
self.split = split
self.backend = get_file_backend(data_root, enable_singleton=True)
if split == 'train':
ann_file = self.backend.join_path('meta', 'train.txt')
else:
ann_file = self.backend.join_path('meta', 'test.txt')
data_prefix = '101_ObjectCategories'
test_mode = split == 'test'
super(Caltech101, self).__init__(
ann_file=ann_file,
data_root=data_root,
data_prefix=data_prefix,
test_mode=test_mode,
**kwargs)
def load_data_list(self):
"""Load images and ground truth labels."""
pairs = list_from_file(self.ann_file)
data_list = []
for pair in pairs:
path, gt_label = pair.split()
img_path = self.backend.join_path(self.img_prefix, path)
info = dict(img_path=img_path, gt_label=int(gt_label))
data_list.append(info)
return data_list
def extra_repr(self) -> List[str]:
"""The extra repr information of the dataset."""
body = [
f'Root of dataset: \t{self.data_root}',
]
return body

View File

@ -1141,3 +1141,289 @@ PLACES205_CATEGORIES = (
'valley', 'vegetable_garden', 'veranda', 'viaduct', 'volcano',
'waiting_room', 'water_tower', 'watering_hole', 'wheat_field', 'wind_farm',
'windmill', 'yard')
OxfordIIITPet_CATEGORIES = (
'Abyssinian', 'american_bulldog', 'american_pit_bull_terrier',
'basset_hound', 'beagle', 'Bengal', 'Birman', 'Bombay', 'boxer',
'British_Shorthair', 'chihuahua', 'Egyptian_Mau', 'english_cocker_spaniel',
'english_setter', 'german_shorthaired', 'great_pyrenees', 'havanese',
'japanese_chin', 'keeshond', 'leonberger', 'Maine_Coon',
'miniature_pinscher', 'newfoundland', 'Persian', 'pomeranian', 'pug',
'Ragdoll', 'Russian_Blue', 'saint_bernard', 'samoyed', 'scottish_terrier',
'shiba_inu', 'Siamese', 'Sphynx', 'staffordshire_bull_terrier',
'wheaten_terrier', 'yorkshire_terrier')
DTD_CATEGORIES = ('banded', 'blotchy', 'braided', 'bubbly', 'bumpy',
'chequered', 'cobwebbed', 'cracked', 'crosshatched',
'crystalline', 'dotted', 'fibrous', 'flecked', 'freckled',
'frilly', 'gauzy', 'grid', 'grooved', 'honeycombed',
'interlaced', 'knitted', 'lacelike', 'lined', 'marbled',
'matted', 'meshed', 'paisley', 'perforated', 'pitted',
'pleated', 'polka-dotted', 'porous', 'potholed', 'scaly',
'smeared', 'spiralled', 'sprinkled', 'stained', 'stratified',
'striped', 'studded', 'swirly', 'veined', 'waffled', 'woven',
'wrinkled', 'zigzagged')
FGVCAIRCRAFT_CATEGORIES = (
'707-320', '727-200', '737-200', '737-300', '737-400', '737-500',
'737-600', '737-700', '737-800', '737-900', '747-100', '747-200',
'747-300', '747-400', '757-200', '757-300', '767-200', '767-300',
'767-400', '777-200', '777-300', 'A300B4', 'A310', 'A318', 'A319', 'A320',
'A321', 'A330-200', 'A330-300', 'A340-200', 'A340-300', 'A340-500',
'A340-600', 'A380', 'ATR-42', 'ATR-72', 'An-12', 'BAE 146-200',
'BAE 146-300', 'BAE-125', 'Beechcraft 1900', 'Boeing 717', 'C-130', 'C-47',
'CRJ-200', 'CRJ-700', 'CRJ-900', 'Cessna 172', 'Cessna 208', 'Cessna 525',
'Cessna 560', 'Challenger 600', 'DC-10', 'DC-3', 'DC-6', 'DC-8', 'DC-9-30',
'DH-82', 'DHC-1', 'DHC-6', 'DHC-8-100', 'DHC-8-300', 'DR-400',
'Dornier 328', 'E-170', 'E-190', 'E-195', 'EMB-120', 'ERJ 135', 'ERJ 145',
'Embraer Legacy 600', 'Eurofighter Typhoon', 'F-16A/B', 'F/A-18',
'Falcon 2000', 'Falcon 900', 'Fokker 100', 'Fokker 50', 'Fokker 70',
'Global Express', 'Gulfstream IV', 'Gulfstream V', 'Hawk T1', 'Il-76',
'L-1011', 'MD-11', 'MD-80', 'MD-87', 'MD-90', 'Metroliner', 'Model B200',
'PA-28', 'SR-20', 'Saab 2000', 'Saab 340', 'Spitfire', 'Tornado', 'Tu-134',
'Tu-154', 'Yak-42')
STANFORDCARS_CATEGORIES = (
'AM General Hummer SUV 2000', 'Acura RL Sedan 2012', 'Acura TL Sedan 2012',
'Acura TL Type-S 2008', 'Acura TSX Sedan 2012',
'Acura Integra Type R 2001', 'Acura ZDX Hatchback 2012',
'Aston Martin V8 Vantage Convertible 2012',
'Aston Martin V8 Vantage Coupe 2012',
'Aston Martin Virage Convertible 2012', 'Aston Martin Virage Coupe 2012',
'Audi RS 4 Convertible 2008', 'Audi A5 Coupe 2012', 'Audi TTS Coupe 2012',
'Audi R8 Coupe 2012', 'Audi V8 Sedan 1994', 'Audi 100 Sedan 1994',
'Audi 100 Wagon 1994', 'Audi TT Hatchback 2011', 'Audi S6 Sedan 2011',
'Audi S5 Convertible 2012', 'Audi S5 Coupe 2012', 'Audi S4 Sedan 2012',
'Audi S4 Sedan 2007', 'Audi TT RS Coupe 2012',
'BMW ActiveHybrid 5 Sedan 2012', 'BMW 1 Series Convertible 2012',
'BMW 1 Series Coupe 2012', 'BMW 3 Series Sedan 2012',
'BMW 3 Series Wagon 2012', 'BMW 6 Series Convertible 2007',
'BMW X5 SUV 2007', 'BMW X6 SUV 2012', 'BMW M3 Coupe 2012',
'BMW M5 Sedan 2010', 'BMW M6 Convertible 2010', 'BMW X3 SUV 2012',
'BMW Z4 Convertible 2012',
'Bentley Continental Supersports Conv. Convertible 2012',
'Bentley Arnage Sedan 2009', 'Bentley Mulsanne Sedan 2011',
'Bentley Continental GT Coupe 2012', 'Bentley Continental GT Coupe 2007',
'Bentley Continental Flying Spur Sedan 2007',
'Bugatti Veyron 16.4 Convertible 2009', 'Bugatti Veyron 16.4 Coupe 2009',
'Buick Regal GS 2012', 'Buick Rainier SUV 2007', 'Buick Verano Sedan 2012',
'Buick Enclave SUV 2012', 'Cadillac CTS-V Sedan 2012',
'Cadillac SRX SUV 2012', 'Cadillac Escalade EXT Crew Cab 2007',
'Chevrolet Silverado 1500 Hybrid Crew Cab 2012',
'Chevrolet Corvette Convertible 2012', 'Chevrolet Corvette ZR1 2012',
'Chevrolet Corvette Ron Fellows Edition Z06 2007',
'Chevrolet Traverse SUV 2012', 'Chevrolet Camaro Convertible 2012',
'Chevrolet HHR SS 2010', 'Chevrolet Impala Sedan 2007',
'Chevrolet Tahoe Hybrid SUV 2012', 'Chevrolet Sonic Sedan 2012',
'Chevrolet Express Cargo Van 2007', 'Chevrolet Avalanche Crew Cab 2012',
'Chevrolet Cobalt SS 2010', 'Chevrolet Malibu Hybrid Sedan 2010',
'Chevrolet TrailBlazer SS 2009',
'Chevrolet Silverado 2500HD Regular Cab 2012',
'Chevrolet Silverado 1500 Classic Extended Cab 2007',
'Chevrolet Express Van 2007', 'Chevrolet Monte Carlo Coupe 2007',
'Chevrolet Malibu Sedan 2007',
'Chevrolet Silverado 1500 Extended Cab 2012',
'Chevrolet Silverado 1500 Regular Cab 2012', 'Chrysler Aspen SUV 2009',
'Chrysler Sebring Convertible 2010',
'Chrysler Town and Country Minivan 2012', 'Chrysler 300 SRT-8 2010',
'Chrysler Crossfire Convertible 2008',
'Chrysler PT Cruiser Convertible 2008', 'Daewoo Nubira Wagon 2002',
'Dodge Caliber Wagon 2012', 'Dodge Caliber Wagon 2007',
'Dodge Caravan Minivan 1997', 'Dodge Ram Pickup 3500 Crew Cab 2010',
'Dodge Ram Pickup 3500 Quad Cab 2009', 'Dodge Sprinter Cargo Van 2009',
'Dodge Journey SUV 2012', 'Dodge Dakota Crew Cab 2010',
'Dodge Dakota Club Cab 2007', 'Dodge Magnum Wagon 2008',
'Dodge Challenger SRT8 2011', 'Dodge Durango SUV 2012',
'Dodge Durango SUV 2007', 'Dodge Charger Sedan 2012',
'Dodge Charger SRT-8 2009', 'Eagle Talon Hatchback 1998',
'FIAT 500 Abarth 2012', 'FIAT 500 Convertible 2012',
'Ferrari FF Coupe 2012', 'Ferrari California Convertible 2012',
'Ferrari 458 Italia Convertible 2012', 'Ferrari 458 Italia Coupe 2012',
'Fisker Karma Sedan 2012', 'Ford F-450 Super Duty Crew Cab 2012',
'Ford Mustang Convertible 2007', 'Ford Freestar Minivan 2007',
'Ford Expedition EL SUV 2009', 'Ford Edge SUV 2012',
'Ford Ranger SuperCab 2011', 'Ford GT Coupe 2006',
'Ford F-150 Regular Cab 2012', 'Ford F-150 Regular Cab 2007',
'Ford Focus Sedan 2007', 'Ford E-Series Wagon Van 2012',
'Ford Fiesta Sedan 2012', 'GMC Terrain SUV 2012', 'GMC Savana Van 2012',
'GMC Yukon Hybrid SUV 2012', 'GMC Acadia SUV 2012',
'GMC Canyon Extended Cab 2012', 'Geo Metro Convertible 1993',
'HUMMER H3T Crew Cab 2010', 'HUMMER H2 SUT Crew Cab 2009',
'Honda Odyssey Minivan 2012', 'Honda Odyssey Minivan 2007',
'Honda Accord Coupe 2012', 'Honda Accord Sedan 2012',
'Hyundai Veloster Hatchback 2012', 'Hyundai Santa Fe SUV 2012',
'Hyundai Tucson SUV 2012', 'Hyundai Veracruz SUV 2012',
'Hyundai Sonata Hybrid Sedan 2012', 'Hyundai Elantra Sedan 2007',
'Hyundai Accent Sedan 2012', 'Hyundai Genesis Sedan 2012',
'Hyundai Sonata Sedan 2012', 'Hyundai Elantra Touring Hatchback 2012',
'Hyundai Azera Sedan 2012', 'Infiniti G Coupe IPL 2012',
'Infiniti QX56 SUV 2011', 'Isuzu Ascender SUV 2008', 'Jaguar XK XKR 2012',
'Jeep Patriot SUV 2012', 'Jeep Wrangler SUV 2012', 'Jeep Liberty SUV 2012',
'Jeep Grand Cherokee SUV 2012', 'Jeep Compass SUV 2012',
'Lamborghini Reventon Coupe 2008', 'Lamborghini Aventador Coupe 2012',
'Lamborghini Gallardo LP 570-4 Superleggera 2012',
'Lamborghini Diablo Coupe 2001', 'Land Rover Range Rover SUV 2012',
'Land Rover LR2 SUV 2012', 'Lincoln Town Car Sedan 2011',
'MINI Cooper Roadster Convertible 2012',
'Maybach Landaulet Convertible 2012', 'Mazda Tribute SUV 2011',
'McLaren MP4-12C Coupe 2012', 'Mercedes-Benz 300-Class Convertible 1993',
'Mercedes-Benz C-Class Sedan 2012', 'Mercedes-Benz SL-Class Coupe 2009',
'Mercedes-Benz E-Class Sedan 2012', 'Mercedes-Benz S-Class Sedan 2012',
'Mercedes-Benz Sprinter Van 2012', 'Mitsubishi Lancer Sedan 2012',
'Nissan Leaf Hatchback 2012', 'Nissan NV Passenger Van 2012',
'Nissan Juke Hatchback 2012', 'Nissan 240SX Coupe 1998',
'Plymouth Neon Coupe 1999', 'Porsche Panamera Sedan 2012',
'Ram C/V Cargo Van Minivan 2012',
'Rolls-Royce Phantom Drophead Coupe Convertible 2012',
'Rolls-Royce Ghost Sedan 2012', 'Rolls-Royce Phantom Sedan 2012',
'Scion xD Hatchback 2012', 'Spyker C8 Convertible 2009',
'Spyker C8 Coupe 2009', 'Suzuki Aerio Sedan 2007',
'Suzuki Kizashi Sedan 2012', 'Suzuki SX4 Hatchback 2012',
'Suzuki SX4 Sedan 2012', 'Tesla Model S Sedan 2012',
'Toyota Sequoia SUV 2012', 'Toyota Camry Sedan 2012',
'Toyota Corolla Sedan 2012', 'Toyota 4Runner SUV 2012',
'Volkswagen Golf Hatchback 2012', 'Volkswagen Golf Hatchback 1991',
'Volkswagen Beetle Hatchback 2012', 'Volvo C30 Hatchback 2012',
'Volvo 240 Sedan 1993', 'Volvo XC90 SUV 2007',
'smart fortwo Convertible 2012')
SUN397_CATEGORIES = (
'abbey', 'airplane_cabin', 'airport_terminal', 'alley', 'amphitheater',
'amusement_arcade', 'amusement_park', 'anechoic_chamber',
'apartment_building_outdoor', 'apse_indoor', 'aquarium', 'aqueduct',
'arch', 'archive', 'arrival_gate_outdoor', 'art_gallery', 'art_school',
'art_studio', 'assembly_line', 'athletic_field_outdoor', 'atrium_public',
'attic', 'auditorium', 'auto_factory', 'badlands',
'badminton_court_indoor', 'baggage_claim', 'bakery_shop',
'balcony_exterior', 'balcony_interior', 'ball_pit', 'ballroom',
'bamboo_forest', 'banquet_hall', 'bar', 'barn', 'barndoor',
'baseball_field', 'basement', 'basilica', 'basketball_court_outdoor',
'bathroom', 'batters_box', 'bayou', 'bazaar_indoor', 'bazaar_outdoor',
'beach', 'beauty_salon', 'bedroom', 'berth', 'biology_laboratory',
'bistro_indoor', 'boardwalk', 'boat_deck', 'boathouse', 'bookstore',
'booth_indoor', 'botanical_garden', 'bow_window_indoor',
'bow_window_outdoor', 'bowling_alley', 'boxing_ring', 'brewery_indoor',
'bridge', 'building_facade', 'bullring', 'burial_chamber', 'bus_interior',
'butchers_shop', 'butte', 'cabin_outdoor', 'cafeteria', 'campsite',
'campus', 'canal_natural', 'canal_urban', 'candy_store', 'canyon',
'car_interior_backseat', 'car_interior_frontseat', 'carrousel',
'casino_indoor', 'castle', 'catacomb', 'cathedral_indoor',
'cathedral_outdoor', 'cavern_indoor', 'cemetery', 'chalet',
'cheese_factory', 'chemistry_lab', 'chicken_coop_indoor',
'chicken_coop_outdoor', 'childs_room', 'church_indoor', 'church_outdoor',
'classroom', 'clean_room', 'cliff', 'cloister_indoor', 'closet',
'clothing_store', 'coast', 'cockpit', 'coffee_shop', 'computer_room',
'conference_center', 'conference_room', 'construction_site',
'control_room', 'control_tower_outdoor', 'corn_field', 'corral',
'corridor', 'cottage_garden', 'courthouse', 'courtroom', 'courtyard',
'covered_bridge_exterior', 'creek', 'crevasse', 'crosswalk',
'cubicle_office', 'dam', 'delicatessen', 'dentists_office', 'desert_sand',
'desert_vegetation', 'diner_indoor', 'diner_outdoor', 'dinette_home',
'dinette_vehicle', 'dining_car', 'dining_room', 'discotheque', 'dock',
'doorway_outdoor', 'dorm_room', 'driveway', 'driving_range_outdoor',
'drugstore', 'electrical_substation', 'elevator_door', 'elevator_interior',
'elevator_shaft', 'engine_room', 'escalator_indoor', 'excavation',
'factory_indoor', 'fairway', 'fastfood_restaurant', 'field_cultivated',
'field_wild', 'fire_escape', 'fire_station', 'firing_range_indoor',
'fishpond', 'florist_shop_indoor', 'food_court', 'forest_broadleaf',
'forest_needleleaf', 'forest_path', 'forest_road', 'formal_garden',
'fountain', 'galley', 'game_room', 'garage_indoor', 'garbage_dump',
'gas_station', 'gazebo_exterior', 'general_store_indoor',
'general_store_outdoor', 'gift_shop', 'golf_course', 'greenhouse_indoor',
'greenhouse_outdoor', 'gymnasium_indoor', 'hangar_indoor',
'hangar_outdoor', 'harbor', 'hayfield', 'heliport', 'herb_garden',
'highway', 'hill', 'home_office', 'hospital', 'hospital_room',
'hot_spring', 'hot_tub_outdoor', 'hotel_outdoor', 'hotel_room', 'house',
'hunting_lodge_outdoor', 'ice_cream_parlor', 'ice_floe', 'ice_shelf',
'ice_skating_rink_indoor', 'ice_skating_rink_outdoor', 'iceberg', 'igloo',
'industrial_area', 'inn_outdoor', 'islet', 'jacuzzi_indoor', 'jail_indoor',
'jail_cell', 'jewelry_shop', 'kasbah', 'kennel_indoor', 'kennel_outdoor',
'kindergarden_classroom', 'kitchen', 'kitchenette', 'labyrinth_outdoor',
'lake_natural', 'landfill', 'landing_deck', 'laundromat', 'lecture_room',
'library_indoor', 'library_outdoor', 'lido_deck_outdoor', 'lift_bridge',
'lighthouse', 'limousine_interior', 'living_room', 'lobby', 'lock_chamber',
'locker_room', 'mansion', 'manufactured_home', 'market_indoor',
'market_outdoor', 'marsh', 'martial_arts_gym', 'mausoleum', 'medina',
'moat_water', 'monastery_outdoor', 'mosque_indoor', 'mosque_outdoor',
'motel', 'mountain', 'mountain_snowy', 'movie_theater_indoor',
'museum_indoor', 'music_store', 'music_studio',
'nuclear_power_plant_outdoor', 'nursery', 'oast_house',
'observatory_outdoor', 'ocean', 'office', 'office_building',
'oil_refinery_outdoor', 'oilrig', 'operating_room', 'orchard',
'outhouse_outdoor', 'pagoda', 'palace', 'pantry', 'park',
'parking_garage_indoor', 'parking_garage_outdoor', 'parking_lot', 'parlor',
'pasture', 'patio', 'pavilion', 'pharmacy', 'phone_booth',
'physics_laboratory', 'picnic_area', 'pilothouse_indoor',
'planetarium_outdoor', 'playground', 'playroom', 'plaza', 'podium_indoor',
'podium_outdoor', 'pond', 'poolroom_establishment', 'poolroom_home',
'power_plant_outdoor', 'promenade_deck', 'pub_indoor', 'pulpit',
'putting_green', 'racecourse', 'raceway', 'raft', 'railroad_track',
'rainforest', 'reception', 'recreation_room', 'residential_neighborhood',
'restaurant', 'restaurant_kitchen', 'restaurant_patio', 'rice_paddy',
'riding_arena', 'river', 'rock_arch', 'rope_bridge', 'ruin', 'runway',
'sandbar', 'sandbox', 'sauna', 'schoolhouse', 'sea_cliff', 'server_room',
'shed', 'shoe_shop', 'shopfront', 'shopping_mall_indoor', 'shower',
'skatepark', 'ski_lodge', 'ski_resort', 'ski_slope', 'sky', 'skyscraper',
'slum', 'snowfield', 'squash_court', 'stable', 'stadium_baseball',
'stadium_football', 'stage_indoor', 'staircase', 'street',
'subway_interior', 'subway_station_platform', 'supermarket', 'sushi_bar',
'swamp', 'swimming_pool_indoor', 'swimming_pool_outdoor',
'synagogue_indoor', 'synagogue_outdoor', 'television_studio',
'temple_east_asia', 'temple_south_asia', 'tennis_court_indoor',
'tennis_court_outdoor', 'tent_outdoor', 'theater_indoor_procenium',
'theater_indoor_seats', 'thriftshop', 'throne_room', 'ticket_booth',
'toll_plaza', 'topiary_garden', 'tower', 'toyshop', 'track_outdoor',
'train_railway', 'train_station_platform', 'tree_farm', 'tree_house',
'trench', 'underwater_coral_reef', 'utility_room', 'valley',
'van_interior', 'vegetable_garden', 'veranda', 'veterinarians_office',
'viaduct', 'videostore', 'village', 'vineyard', 'volcano',
'volleyball_court_indoor', 'volleyball_court_outdoor', 'waiting_room',
'warehouse_indoor', 'water_tower', 'waterfall_block', 'waterfall_fan',
'waterfall_plunge', 'watering_hole', 'wave', 'wet_bar', 'wheat_field',
'wind_farm', 'windmill', 'wine_cellar_barrel_storage',
'wine_cellar_bottle_storage', 'wrestling_ring_indoor', 'yard',
'youth_hostel')
CALTECH101_CATEGORIES = (
'BACKGROUND_Google', 'Faces', 'Faces_easy', 'Leopards', 'Motorbikes',
'accordion', 'airplanes', 'anchor', 'ant', 'barrel', 'bass', 'beaver',
'binocular', 'bonsai', 'brain', 'brontosaurus', 'buddha', 'butterfly',
'camera', 'cannon', 'car_side', 'ceiling_fan', 'cellphone', 'chair',
'chandelier', 'cougar_body', 'cougar_face', 'crab', 'crayfish',
'crocodile', 'crocodile_head', 'cup', 'dalmatian', 'dollar_bill',
'dolphin', 'dragonfly', 'electric_guitar', 'elephant', 'emu', 'euphonium',
'ewer', 'ferry', 'flamingo', 'flamingo_head', 'garfield', 'gerenuk',
'gramophone', 'grand_piano', 'hawksbill', 'headphone', 'hedgehog',
'helicopter', 'ibis', 'inline_skate', 'joshua_tree', 'kangaroo', 'ketch',
'lamp', 'laptop', 'llama', 'lobster', 'lotus', 'mandolin', 'mayfly',
'menorah', 'metronome', 'minaret', 'nautilus', 'octopus', 'okapi',
'pagoda', 'panda', 'pigeon', 'pizza', 'platypus', 'pyramid', 'revolver',
'rhino', 'rooster', 'saxophone', 'schooner', 'scissors', 'scorpion',
'sea_horse', 'snoopy', 'soccer_ball', 'stapler', 'starfish', 'stegosaurus',
'stop_sign', 'strawberry', 'sunflower', 'tick', 'trilobite', 'umbrella',
'watch', 'water_lilly', 'wheelchair', 'wild_cat', 'windsor_chair',
'wrench', 'yin_yang')
FOOD101_CATEGORIES = (
'apple_pie', 'baby_back_ribs', 'baklava', 'beef_carpaccio', 'beef_tartare',
'beet_salad', 'beignets', 'bibimbap', 'bread_pudding', 'breakfast_burrito',
'bruschetta', 'caesar_salad', 'cannoli', 'caprese_salad', 'carrot_cake',
'ceviche', 'cheesecake', 'cheese_plate', 'chicken_curry',
'chicken_quesadilla', 'chicken_wings', 'chocolate_cake',
'chocolate_mousse', 'churros', 'clam_chowder', 'club_sandwich',
'crab_cakes', 'creme_brulee', 'croque_madame', 'cup_cakes', 'deviled_eggs',
'donuts', 'dumplings', 'edamame', 'eggs_benedict', 'escargots', 'falafel',
'filet_mignon', 'fish_and_chips', 'foie_gras', 'french_fries',
'french_onion_soup', 'french_toast', 'fried_calamari', 'fried_rice',
'frozen_yogurt', 'garlic_bread', 'gnocchi', 'greek_salad',
'grilled_cheese_sandwich', 'grilled_salmon', 'guacamole', 'gyoza',
'hamburger', 'hot_and_sour_soup', 'hot_dog', 'huevos_rancheros', 'hummus',
'ice_cream', 'lasagna', 'lobster_bisque', 'lobster_roll_sandwich',
'macaroni_and_cheese', 'macarons', 'miso_soup', 'mussels', 'nachos',
'omelette', 'onion_rings', 'oysters', 'pad_thai', 'paella', 'pancakes',
'panna_cotta', 'peking_duck', 'pho', 'pizza', 'pork_chop', 'poutine',
'prime_rib', 'pulled_pork_sandwich', 'ramen', 'ravioli', 'red_velvet_cake',
'risotto', 'samosa', 'sashimi', 'scallops', 'seaweed_salad',
'shrimp_and_grits', 'spaghetti_bolognese', 'spaghetti_carbonara',
'spring_rolls', 'steak', 'strawberry_shortcake', 'sushi', 'tacos',
'takoyaki', 'tiramisu', 'tuna_tartare', 'waffles')

View File

@ -6,6 +6,7 @@ import mmengine.dist as dist
import numpy as np
from mmengine.fileio import (LocalBackend, exists, get, get_file_backend,
join_path)
from mmengine.logging import MMLogger
from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
@ -21,13 +22,11 @@ class CIFAR10(BaseDataset):
https://github.com/pytorch/vision/blob/master/torchvision/datasets/cifar.py
Args:
data_prefix (str): Prefix for data.
test_mode (bool): ``test_mode=True`` means in test phase.
It determines to use the training set or test set.
data_root (str): The root directory of the CIFAR Dataset.
split (str, optional): The dataset split, supports "train" and "test".
Default to "train".
metainfo (dict, optional): Meta information for dataset, such as
categories information. Defaults to None.
data_root (str): The root directory for ``data_prefix``.
Defaults to ''.
download (bool): Whether to download the dataset if not exists.
Defaults to True.
**kwargs: Other keyword arguments in :class:`BaseDataset`.
@ -56,12 +55,29 @@ class CIFAR10(BaseDataset):
METAINFO = {'classes': CIFAR10_CATEGORIES}
def __init__(self,
data_prefix: str,
test_mode: bool,
metainfo: Optional[dict] = None,
data_root: str = '',
split: str = 'train',
metainfo: Optional[dict] = None,
download: bool = True,
data_prefix: str = '',
test_mode: bool = False,
**kwargs):
splits = ['train', 'test']
assert split in splits, \
f"The split must be one of {splits}, but get '{split}'"
self.split = split
# To handle the BC-breaking
if split == 'train' and test_mode:
logger = MMLogger.get_current_instance()
logger.warning('split="train" but test_mode=True. '
'The training set will be used.')
if not data_root and not data_prefix:
raise RuntimeError('Please set ``data_root`` to'
'specify the dataset path')
self.download = download
super().__init__(
# The CIFAR dataset doesn't need specify annotation file
@ -96,7 +112,7 @@ class CIFAR10(BaseDataset):
'Download failed or shared storage is unavailable. Please ' \
f'download the dataset manually through {self.url}.'
if not self.test_mode:
if self.split == 'train':
downloaded_list = self.train_list
else:
downloaded_list = self.test_list
@ -165,13 +181,11 @@ class CIFAR100(CIFAR10):
"""`CIFAR100 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
Args:
data_prefix (str): Prefix for data.
test_mode (bool): ``test_mode=True`` means in test phase.
It determines to use the training set or test set.
data_root (str): The root directory of the CIFAR Dataset.
split (str, optional): The dataset split, supports "train" and "test".
Default to "train".
metainfo (dict, optional): Meta information for dataset, such as
categories information. Defaults to None.
data_root (str): The root directory for ``data_prefix``.
Defaults to ''.
download (bool): Whether to download the dataset if not exists.
Defaults to True.
**kwargs: Other keyword arguments in :class:`BaseDataset`.

View File

@ -2,6 +2,7 @@
from typing import List
from mmengine import get_file_backend, list_from_file
from mmengine.logging import MMLogger
from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
@ -19,8 +20,8 @@ class CUB(BaseDataset):
CUB dataset directory: ::
CUB-200-2011 (data_root)/
images (data_prefix)
CUB_200_2011
images
class_x
xx1.jpg
xx2.jpg
@ -30,54 +31,56 @@ class CUB(BaseDataset):
yy2.jpg
...
...
images.txt (ann_file)
image_class_labels.txt (image_class_labels_file)
train_test_split.txt (train_test_split_file)
images.txt
image_class_labels.txt
train_test_split.txt
....
Args:
data_root (str): The root directory for CUB-200-2011 dataset.
test_mode (bool): ``test_mode=True`` means in test phase. It determines
to use the training set or test set.
ann_file (str, optional): Annotation file path, path relative to
``data_root``. Defaults to 'images.txt'.
data_prefix (str): Prefix for iamges, path relative to
``data_root``. Defaults to 'images'.
image_class_labels_file (str, optional): The label file, path
relative to ``data_root``. Defaults to 'image_class_labels.txt'.
train_test_split_file (str, optional): The split file to split train
and test dataset, path relative to ``data_root``.
Defaults to 'train_test_split_file.txt'.
split (str, optional): The dataset split, supports "train" and "test".
Default to "train".
Examples:
>>> from mmpretrain.datasets import CUB
>>> cub_train_cfg = dict(data_root='data/CUB_200_2011', test_mode=True)
>>> cub_train = CUB(**cub_train_cfg)
>>> cub_train
>>> train_dataset = CUB(data_root='data/CUB_200_2011', split='train')
>>> train_dataset
Dataset CUB
Number of samples: 5994
Number of categories: 200
Root of dataset: data/CUB_200_2011
>>> cub_test_cfg = dict(data_root='data/CUB_200_2011', test_mode=True)
>>> cub_test = CUB(**cub_test_cfg)
>>> cub_test
Number of samples: 5994
Number of categories: 200
Root of dataset: data/CUB_200_2011
>>> test_dataset = CUB(data_root='data/CUB_200_2011', split='test')
>>> test_dataset
Dataset CUB
Number of samples: 5794
Number of categories: 200
Root of dataset: data/CUB_200_2011
Number of samples: 5794
Number of categories: 200
Root of dataset: data/CUB_200_2011
""" # noqa: E501
METAINFO = {'classes': CUB_CATEGORIES}
def __init__(self,
data_root: str,
test_mode: bool,
ann_file: str = 'images.txt',
data_prefix: str = 'images',
image_class_labels_file: str = 'image_class_labels.txt',
train_test_split_file: str = 'train_test_split.txt',
split: str = 'train',
test_mode: bool = False,
**kwargs):
splits = ['train', 'test']
assert split in splits, \
f"The split must be one of {splits}, but get '{split}'"
self.split = split
# To handle the BC-breaking
if split == 'train' and test_mode:
logger = MMLogger.get_current_instance()
logger.warning('split="train" but test_mode=True. '
'The training set will be used.')
ann_file = 'images.txt'
data_prefix = 'images'
image_class_labels_file = 'image_class_labels.txt'
train_test_split_file = 'train_test_split.txt'
self.backend = get_file_backend(data_root, enable_singleton=True)
self.image_class_labels_file = self.backend.join_path(
data_root, image_class_labels_file)
@ -116,11 +119,11 @@ class CUB(BaseDataset):
data_list = []
for sample_id in sample_dict.keys():
if split_dict[sample_id] == '1' and self.test_mode:
# skip train samples when test_mode=True
if split_dict[sample_id] == '1' and self.split == 'test':
# skip train samples when split='test'
continue
elif split_dict[sample_id] == '0' and not self.test_mode:
# skip test samples when test_mode=False
elif split_dict[sample_id] == '0' and self.split == 'train':
# skip test samples when split='train'
continue
img_path = self.backend.join_path(self.img_prefix,

View File

@ -0,0 +1,116 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
import mat4py
from mmengine import get_file_backend
from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
from .categories import DTD_CATEGORIES
@DATASETS.register_module()
class DTD(BaseDataset):
"""The Describable Texture Dataset (DTD).
Support the `Describable Texture Dataset <https://www.robots.ox.ac.uk/~vgg/data/dtd/>`_ Dataset.
After downloading and decompression, the dataset directory structure is as follows.
DTD dataset directory: ::
dtd
images
banded
| | banded_0002.jpg
| | banded_0004.jpg
| | ...
...
imdb
imdb.mat
labels
| | labels_joint_anno.txt
| | test1.txt
| | test2.txt
| | ...
...
....
Args:
data_root (str): The root directory for Describable Texture dataset.
split (str, optional): The dataset split, supports "train",
"val", "trainval", and "test". Default to "trainval".
Examples:
>>> from mmpretrain.datasets import DTD
>>> train_dataset = DTD(data_root='data/dtd', split='trainval')
>>> train_dataset
Dataset DTD
Number of samples: 3760
Number of categories: 47
Root of dataset: data/dtd
>>> test_dataset = DTD(data_root='data/dtd', split='test')
>>> test_dataset
Dataset DTD
Number of samples: 1880
Number of categories: 47
Root of dataset: data/dtd
""" # noqa: E501
METAINFO = {'classes': DTD_CATEGORIES}
def __init__(self, data_root: str, split: str = 'trainval', **kwargs):
splits = ['train', 'val', 'trainval', 'test']
assert split in splits, \
f"The split must be one of {splits}, but get '{split}'"
self.split = split
data_prefix = 'images'
test_mode = split == 'test'
self.backend = get_file_backend(data_root, enable_singleton=True)
ann_file = self.backend.join_path('imdb', 'imdb.mat')
super(DTD, self).__init__(
ann_file=ann_file,
data_root=data_root,
data_prefix=data_prefix,
test_mode=test_mode,
**kwargs)
def load_data_list(self):
"""Load images and ground truth labels."""
data = mat4py.loadmat(self.ann_file)['images']
names = data['name']
labels = data['class']
parts = data['set']
num = len(names)
assert num == len(labels) == len(parts), 'get error ann file'
if self.split == 'train':
target_set = {1}
elif self.split == 'val':
target_set = {2}
elif self.split == 'test':
target_set = {3}
else:
target_set = {1, 2}
data_list = []
for i in range(num):
if parts[i] in target_set:
img_name = names[i]
img_path = self.backend.join_path(self.img_prefix, img_name)
gt_label = labels[i] - 1
info = dict(img_path=img_path, gt_label=gt_label)
data_list.append(info)
return data_list
def extra_repr(self) -> List[str]:
"""The extra repr information of the dataset."""
body = [
f'Root of dataset: \t{self.data_root}',
]
return body

View File

@ -0,0 +1,98 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
from mmengine import get_file_backend, list_from_file
from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
from .categories import FGVCAIRCRAFT_CATEGORIES
@DATASETS.register_module()
class FGVCAircraft(BaseDataset):
"""The FGVC_Aircraft Dataset.
Support the `FGVC_Aircraft Dataset <https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/>`_ Dataset.
After downloading and decompression, the dataset directory structure is as follows.
FGVC_Aircraft dataset directory: ::
fgvc-aircraft-2013b
data
images
1.jpg
2.jpg
...
images_variant_train.txt
images_variant_test.txt
images_variant_trainval.txt
images_variant_val.txt
variants.txt
....
Args:
data_root (str): The root directory for FGVC_Aircraft dataset.
split (str, optional): The dataset split, supports "train",
"val", "trainval", and "test". Default to "trainval".
Examples:
>>> from mmpretrain.datasets import FGVCAircraft
>>> train_dataset = FGVCAircraft(data_root='data/fgvc-aircraft-2013b', split='trainval')
>>> train_dataset
Dataset FGVCAircraft
Number of samples: 6667
Number of categories: 100
Root of dataset: data/fgvc-aircraft-2013b
>>> test_dataset = FGVCAircraft(data_root='data/fgvc-aircraft-2013b', split='test')
>>> test_dataset
Dataset FGVCAircraft
Number of samples: 3333
Number of categories: 100
Root of dataset: data/fgvc-aircraft-2013b
""" # noqa: E501
METAINFO = {'classes': FGVCAIRCRAFT_CATEGORIES}
def __init__(self, data_root: str, split: str = 'trainval', **kwargs):
splits = ['train', 'val', 'trainval', 'test']
assert split in splits, \
f"The split must be one of {splits}, but get '{split}'"
self.split = split
self.backend = get_file_backend(data_root, enable_singleton=True)
ann_file = self.backend.join_path('data',
f'images_variant_{split}.txt')
data_prefix = self.backend.join_path('data', 'images')
test_mode = split == 'test'
super(FGVCAircraft, self).__init__(
ann_file=ann_file,
data_root=data_root,
test_mode=test_mode,
data_prefix=data_prefix,
**kwargs)
def load_data_list(self):
"""Load images and ground truth labels."""
pairs = list_from_file(self.ann_file)
data_list = []
for pair in pairs:
pair = pair.split()
img_name = pair[0]
class_name = ' '.join(pair[1:])
img_name = f'{img_name}.jpg'
img_path = self.backend.join_path(self.img_prefix, img_name)
gt_label = self.METAINFO['classes'].index(class_name)
info = dict(img_path=img_path, gt_label=gt_label)
data_list.append(info)
return data_list
def extra_repr(self) -> List[str]:
"""The extra repr information of the dataset."""
body = [
f'Root of dataset: \t{self.data_root}',
]
return body

View File

@ -0,0 +1,104 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
import mat4py
from mmengine import get_file_backend
from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
@DATASETS.register_module()
class Flowers102(BaseDataset):
"""The Oxford 102 Flower Dataset.
Support the `Oxford 102 Flowers Dataset <https://www.robots.ox.ac.uk/~vgg/data/flowers/102/>`_ Dataset.
After downloading and decompression, the dataset directory structure is as follows.
Flowers102 dataset directory: ::
Flowers102
jpg
image_00001.jpg
image_00002.jpg
...
imagelabels.mat
setid.mat
...
Args:
data_root (str): The root directory for Oxford 102 Flowers dataset.
split (str, optional): The dataset split, supports "train",
"val", "trainval", and "test". Default to "trainval".
Examples:
>>> from mmpretrain.datasets import Flowers102
>>> train_dataset = Flowers102(data_root='data/Flowers102', split='trainval')
>>> train_dataset
Dataset Flowers102
Number of samples: 2040
Root of dataset: data/Flowers102
>>> test_dataset = Flowers102(data_root='data/Flowers102', split='test')
>>> test_dataset
Dataset Flowers102
Number of samples: 6149
Root of dataset: data/Flowers102
""" # noqa: E501
def __init__(self, data_root: str, split: str = 'trainval', **kwargs):
splits = ['train', 'val', 'trainval', 'test']
assert split in splits, \
f"The split must be one of {splits}, but get '{split}'"
self.split = split
ann_file = 'imagelabels.mat'
data_prefix = 'jpg'
train_test_split_file = 'setid.mat'
test_mode = split == 'test'
self.backend = get_file_backend(data_root, enable_singleton=True)
self.train_test_split_file = self.backend.join_path(
data_root, train_test_split_file)
super(Flowers102, self).__init__(
ann_file=ann_file,
data_root=data_root,
data_prefix=data_prefix,
test_mode=test_mode,
**kwargs)
def load_data_list(self):
"""Load images and ground truth labels."""
label_dict = mat4py.loadmat(self.ann_file)['labels']
split_list = mat4py.loadmat(self.train_test_split_file)
if self.split == 'train':
split_list = split_list['trnid']
elif self.split == 'val':
split_list = split_list['valid']
elif self.split == 'test':
split_list = split_list['tstid']
else:
train_ids = split_list['trnid']
val_ids = split_list['valid']
train_ids.extend(val_ids)
split_list = train_ids
data_list = []
for sample_id in split_list:
img_name = 'image_%05d.jpg' % (sample_id)
img_path = self.backend.join_path(self.img_prefix, img_name)
gt_label = int(label_dict[sample_id - 1]) - 1
info = dict(img_path=img_path, gt_label=gt_label)
data_list.append(info)
return data_list
def extra_repr(self) -> List[str]:
"""The extra repr information of the dataset."""
body = [
f'Root of dataset: \t{self.data_root}',
]
return body

View File

@ -0,0 +1,102 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
from mmengine import get_file_backend, list_from_file
from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
from .categories import FOOD101_CATEGORIES
@DATASETS.register_module()
class Food101(BaseDataset):
"""The Food101 Dataset.
Support the `Food101 Dataset <https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/>`_ Dataset.
After downloading and decompression, the dataset directory structure is as follows.
Food101 dataset directory: ::
food-101
images
class_x
xx1.jpg
xx2.jpg
...
class_y
yy1.jpg
yy2.jpg
...
...
meta
train.txt
test.txt
....
Args:
data_root (str): The root directory for Food101 dataset.
split (str, optional): The dataset split, supports "train" and "test".
Default to "train".
Examples:
>>> from mmpretrain.datasets import Food101
>>> train_dataset = Food101(data_root='data/food-101', split='train')
>>> train_dataset
Dataset Food101
Number of samples: 75750
Number of categories: 101
Root of dataset: data/food-101
>>> test_dataset = Food101(data_root='data/food-101', split='test')
>>> test_dataset
Dataset Food101
Number of samples: 25250
Number of categories: 101
Root of dataset: data/food-101
""" # noqa: E501
METAINFO = {'classes': FOOD101_CATEGORIES}
def __init__(self, data_root: str, split: str = 'train', **kwargs):
splits = ['train', 'test']
assert split in splits, \
f"The split must be one of {splits}, but get '{split}'"
self.split = split
self.backend = get_file_backend(data_root, enable_singleton=True)
if split == 'train':
ann_file = self.backend.join_path('meta', 'train.txt')
else:
ann_file = self.backend.join_path('meta', 'test.txt')
test_mode = split == 'test'
data_prefix = 'images'
super(Food101, self).__init__(
ann_file=ann_file,
data_root=data_root,
test_mode=test_mode,
data_prefix=data_prefix,
**kwargs)
def load_data_list(self):
"""Load images and ground truth labels."""
pairs = list_from_file(self.ann_file)
data_list = []
for pair in pairs:
class_name, img_name = pair.split('/')
img_name = f'{img_name}.jpg'
img_path = self.backend.join_path(self.img_prefix, class_name,
img_name)
gt_label = self.METAINFO['classes'].index(class_name)
info = dict(img_path=img_path, gt_label=gt_label)
data_list.append(info)
return data_list
def extra_repr(self) -> List[str]:
"""The extra repr information of the dataset."""
body = [
f'Root of dataset: \t{self.data_root}',
]
return body

View File

@ -0,0 +1,97 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
from mmengine import get_file_backend, list_from_file
from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
from .categories import OxfordIIITPet_CATEGORIES
@DATASETS.register_module()
class OxfordIIITPet(BaseDataset):
"""The Oxford-IIIT Pets Dataset.
Support the `Oxford-IIIT Pets Dataset <https://www.robots.ox.ac.uk/~vgg/data/pets/>`_ Dataset.
After downloading and decompression, the dataset directory structure is as follows.
Oxford-IIIT_Pets dataset directory: ::
Oxford-IIIT_Pets
images
Abyssinian_1.jpg
Abyssinian_2.jpg
...
annotations
trainval.txt
test.txt
list.txt
...
....
Args:
data_root (str): The root directory for Oxford-IIIT Pets dataset.
split (str, optional): The dataset split, supports "trainval" and "test".
Default to "trainval".
Examples:
>>> from mmpretrain.datasets import OxfordIIITPet
>>> train_dataset = OxfordIIITPet(data_root='data/Oxford-IIIT_Pets', split='trainval')
>>> train_dataset
Dataset OxfordIIITPet
Number of samples: 3680
Number of categories: 37
Root of dataset: data/Oxford-IIIT_Pets
>>> test_dataset = OxfordIIITPet(data_root='data/Oxford-IIIT_Pets', split='test')
>>> test_dataset
Dataset OxfordIIITPet
Number of samples: 3669
Number of categories: 37
Root of dataset: data/Oxford-IIIT_Pets
""" # noqa: E501
METAINFO = {'classes': OxfordIIITPet_CATEGORIES}
def __init__(self, data_root: str, split: str = 'trainval', **kwargs):
splits = ['trainval', 'test']
assert split in splits, \
f"The split must be one of {splits}, but get '{split}'"
self.split = split
self.backend = get_file_backend(data_root, enable_singleton=True)
if split == 'trainval':
ann_file = self.backend.join_path('annotations', 'trainval.txt')
else:
ann_file = self.backend.join_path('annotations', 'test.txt')
data_prefix = 'images'
test_mode = split == 'test'
super(OxfordIIITPet, self).__init__(
ann_file=ann_file,
data_root=data_root,
data_prefix=data_prefix,
test_mode=test_mode,
**kwargs)
def load_data_list(self):
"""Load images and ground truth labels."""
pairs = list_from_file(self.ann_file)
data_list = []
for pair in pairs:
img_name, class_id, _, _ = pair.split()
img_name = f'{img_name}.jpg'
img_path = self.backend.join_path(self.img_prefix, img_name)
gt_label = int(class_id) - 1
info = dict(img_path=img_path, gt_label=gt_label)
data_list.append(info)
return data_list
def extra_repr(self) -> List[str]:
"""The extra repr information of the dataset."""
body = [
f'Root of dataset: \t{self.data_root}',
]
return body

View File

@ -0,0 +1,148 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
import mat4py
from mmengine import get_file_backend
from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
from .categories import STANFORDCARS_CATEGORIES
@DATASETS.register_module()
class StanfordCars(BaseDataset):
"""The Stanford Cars Dataset.
Support the `Stanford Cars Dataset <https://ai.stanford.edu/~jkrause/cars/car_dataset.html>`_ Dataset.
The official website provides two ways to organize the dataset.
Therefore, after downloading and decompression, the dataset directory structure is as follows.
Stanford Cars dataset directory: ::
Stanford_Cars
car_ims
00001.jpg
00002.jpg
...
cars_annos.mat
or ::
Stanford_Cars
cars_train
00001.jpg
00002.jpg
...
cars_test
00001.jpg
00002.jpg
...
devkit
cars_meta.mat
cars_train_annos.mat
cars_test_annos.mat
cars_test_annoswithlabels.mat
eval_train.m
train_perfect_preds.txt
Args:
data_root (str): The root directory for Stanford Cars dataset.
split (str, optional): The dataset split, supports "train"
and "test". Default to "train".
Examples:
>>> from mmpretrain.datasets import StanfordCars
>>> train_dataset = StanfordCars(data_root='data/Stanford_Cars', split='train')
>>> train_dataset
Dataset StanfordCars
Number of samples: 8144
Number of categories: 196
Root of dataset: data/Stanford_Cars
>>> test_dataset = StanfordCars(data_root='data/Stanford_Cars', split='test')
>>> test_dataset
Dataset StanfordCars
Number of samples: 8041
Number of categories: 196
Root of dataset: data/Stanford_Cars
""" # noqa: E501
METAINFO = {'classes': STANFORDCARS_CATEGORIES}
def __init__(self, data_root: str, split: str = 'train', **kwargs):
splits = ['train', 'test']
assert split in splits, \
f"The split must be one of {splits}, but get '{split}'"
self.split = split
test_mode = split == 'test'
self.backend = get_file_backend(data_root, enable_singleton=True)
anno_file_path = self.backend.join_path(data_root, 'cars_annos.mat')
if self.backend.exists(anno_file_path):
ann_file = 'cars_annos.mat'
data_prefix = ''
else:
if test_mode:
ann_file = self.backend.join_path(
'devkit', 'cars_test_annos_withlabels.mat')
data_prefix = 'cars_test'
else:
ann_file = self.backend.join_path('devkit',
'cars_train_annos.mat')
data_prefix = 'cars_train'
if not self.backend.exists(
self.backend.join_path(data_root, ann_file)):
doc_url = 'https://mmpretrain.readthedocs.io/en/latest/api/datasets.html#stanfordcars' # noqa: E501
raise RuntimeError(
f'The dataset is incorrectly organized, please \
refer to {doc_url} and reorganize your folders.')
super(StanfordCars, self).__init__(
ann_file=ann_file,
data_root=data_root,
data_prefix=data_prefix,
test_mode=test_mode,
**kwargs)
def load_data_list(self):
data = mat4py.loadmat(self.ann_file)['annotations']
data_list = []
if 'test' in data.keys():
# first way
img_paths, labels, test = data['relative_im_path'], data[
'class'], data['test']
num = len(img_paths)
assert num == len(labels) == len(test), 'get error ann file'
for i in range(num):
if not self.test_mode and test[i] == 1:
continue
if self.test_mode and test[i] == 0:
continue
img_path = self.backend.join_path(self.img_prefix,
img_paths[i])
gt_label = labels[i] - 1
info = dict(img_path=img_path, gt_label=gt_label)
data_list.append(info)
else:
# second way
img_names, labels = data['fname'], data['class']
num = len(img_names)
assert num == len(labels), 'get error ann file'
for i in range(num):
img_path = self.backend.join_path(self.img_prefix,
img_names[i])
gt_label = labels[i] - 1
info = dict(img_path=img_path, gt_label=gt_label)
data_list.append(info)
return data_list
def extra_repr(self) -> List[str]:
"""The extra repr information of the dataset."""
body = [
f'Root of dataset: \t{self.data_root}',
]
return body

View File

@ -0,0 +1,225 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
from mmengine import get_file_backend, list_from_file
from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
from .categories import SUN397_CATEGORIES
# Note that some images are not a jpg file although the name ends
# with jpg and therefore cannot be read properly. So we provide
# a list to skip these files.
INVALID = [
'/a/assembly_line/sun_ajckcfldgdrdjogj.jpg',
'/a/auto_factory/sun_apfsprenzdnzbhmt.jpg',
'/b/baggage_claim/sun_avittiqqaiibgcau.jpg',
'/b/batters_box/sun_alqlfpgtbgggezyr.jpg',
'/b/bow_window/indoor/sun_ahsholsagvlrsboa.jpg',
'/b/bow_window/indoor/sun_aioomcoujmmcxkkx.jpg',
'/b/bow_window/outdoor/sun_atgtjdpqikjmllth.jpg',
'/c/carrousel/sun_atsgphqympojgxnc.jpg',
'/c/carrousel/sun_auzitjuirwolazns.jpg',
'/c/church/outdoor/sun_boagasgfltequmal.jpg',
'/c/church/outdoor/sun_brhmnwzzbkphcvfo.jpg',
'/c/church/outdoor/sun_byjkqzybxpjnuofa.jpg',
'/c/corridor/sun_aznefxvocwpgimko.jpg',
'/d/dentists_office/sun_aaefsoauqlcsihou.jpg',
'/d/diner/indoor/sun_apswilaujhntrybg.jpg',
'/e/elevator/door/sun_aaudobqlphijkjdv.jpg',
'/f/fastfood_restaurant/sun_axeniwtesffxqedr.jpg',
'/f/fire_station/sun_bjyapttwilyyuxqm.jpg',
'/f/fountain/sun_axgmpbdyvqhtkhee.jpg',
'/h/hospital_room/sun_ahokhhxjiclpxqqa.jpg',
'/o/oast_house/sun_bqsrrygxyrutgjve.jpg',
'/r/restaurant_patio/sun_aurwypviprwycame.jpg',
'/s/ski_resort/sun_bplmntyzoiobcqhp.jpg',
'/w/wine_cellar/bottle_storage/sun_afmzwxkzmxkbamqi.jpg',
'/w/wine_cellar/bottle_storage/sun_ahyymswdjejrbhyb.jpg',
'/w/wine_cellar/bottle_storage/sun_avnttpxamufejbfe.jpg',
'/a/archive/sun_awgsrbljlsvhqjij.jpg',
'/a/art_school/sun_aabogqsjulyvmcse.jpg',
'/a/art_school/sun_apnzojafyvkariue.jpg',
'/b/ball_pit/sun_atjhwqngtoeuwhso.jpg',
'/b/bow_window/indoor/sun_asxvsqbexmmtqmht.jpg',
'/b/bow_window/indoor/sun_abeugxecxrwzmffp.jpg',
'/b/bow_window/outdoor/sun_auwcqhrtzkgihvlv.jpg',
'/b/bow_window/outdoor/sun_apnvdyecnjjmcuhi.jpg',
'/c/childs_room/sun_alggivksjwwiklmt.jpg',
'/c/control_tower/outdoor/sun_avbcxakrvpomqdgr.jpg',
'/d/diner/indoor/sun_ajmzozstvsxisvgx.jpg',
'/e/elevator/door/sun_aaqsyluqbluugqgy.jpg',
'/f/fastfood_restaurant/sun_aevchxlxoruhxgrb.jpg',
'/f/firing_range/indoor/sun_affrzvahwjorpalo.jpg',
'/f/formal_garden/sun_bjvrlaeatjufekft.jpg',
'/g/garage/indoor/sun_akbocuwclkxqlofx.jpg',
'/g/greenhouse/indoor/sun_addirvgtxfbndlwf.jpg',
'/k/kindergarden_classroom/sun_ajtpaahilrqzarri.jpg',
'/l/laundromat/sun_afrrjykuhhlwiwun.jpg',
'/m/music_studio/sun_bsntklkmwqgnjrjj.jpg',
'/t/track/outdoor/sun_aophkoiosslinihb.jpg',
'/a/archive/sun_aegmzltkiwyevpwa.jpg',
'/a/auto_factory/sun_aybymzvbxgvcrwgn.jpg',
'/b/baggage_claim/sun_atpmiqmnxjpgqsxi.jpg',
'/b/baggage_claim/sun_ajffcdpsvgqfzoxx.jpg',
'/b/bamboo_forest/sun_ausmxphosyahoyjo.jpg',
'/b/batters_box/sun_aaeheulsicxtxnbu.jpg',
'/c/carrousel/sun_arjrjcxemhttubqz.jpg',
'/c/chicken_coop/outdoor/sun_abcegmmdbizqkpgh.jpg',
'/c/control_tower/outdoor/sun_axhjfpkxdvqdfkyr.jpg',
'/d/diner/indoor/sun_apaotiublwqeowck.jpg',
'/f/fastfood_restaurant/sun_anexashcgmxdbmxq.jpg',
'/l/landing_deck/sun_aizahnjfkuurjibw.jpg',
'/n/nuclear_power_plant/outdoor/sun_aoblfvgyleweqanr.jpg',
'/w/waiting_room/sun_aicytusmthfvqcwc.jpg',
'/b/bow_window/indoor/sun_asmvdfnjlulewkpr.jpg',
'/b/bus_interior/sun_adhktvidwzmodeou.jpg',
'/c/catacomb/sun_algnawesgjzzmcqd.jpg',
'/c/church/outdoor/sun_baihxlseimcsdhdx.jpg',
'/d/diner/indoor/sun_agoyalzcawgxodbm.jpg',
'/e/elevator_shaft/sun_awaitimkinrjaybl.jpg',
'/f/fastfood_restaurant/sun_aplvzfbmtqtbsvbx.jpg',
'/g/greenhouse/indoor/sun_bkccvyfpwetwjuhk.jpg',
'/c/car_interior/backseat/sun_adexwfoqdyhowxpu.jpg',
'/c/church/outdoor/sun_blmmweiumednscuf.jpg',
'/f/fire_station/sun_bibntbsuunbsdrum.jpg',
'/g/game_room/sun_aopfaqlllpvzhrak.jpg',
'/u/underwater/coral_reef/sun_biiueajvszaxqopo.jpg',
'/a/airplane_cabin/sun_arqyikigkyfpegug.jpg',
'/b/badminton_court/indoor/sun_amppvxecgtjpfold.jpg',
'/c/carrousel/sun_anxtrtieimkpmhvk.jpg',
'/c/computer_room/sun_aebgvpgtwoqbfyvl.jpg',
'/f/fire_escape/sun_atbraxuwwlvdoolv.jpg',
'/k/kasbah/sun_abxkkoielpavsouu.jpg',
'/t/tower/sun_bccqnzcvqkiwicjt.jpg',
'/a/archive/sun_afngadshxudodkct.jpg',
'/b/bow_window/indoor/sun_awnrlipyxpgxxgxz.jpg',
'/c/control_tower/outdoor/sun_arohngcbtsvbthho.jpg',
'/f/fire_station/sun_brbskkfgghbfvgkk.jpg',
'/r/restaurant_patio/sun_amjfbqzfgxarrpec.jpg',
'/v/vineyard/sun_bdxhnbgbnolddswz.jpg',
'/b/baggage_claim/sun_axrtsmillrglugia.jpg',
'/d/diner/indoor/sun_alaqevbwpjaqqdqz.jpg',
'/l/landing_deck/sun_acodgoamhgnnbmvr.jpg',
'/c/carrousel/sun_adsafgyrinnekycc.jpg',
'/c/church/outdoor/sun_bzqhuwshtdgakkay.jpg',
'/c/closet/sun_absahzamlrylkxyn.jpg',
'/f/fire_escape/sun_acdthenaosuqcoqn.jpg',
'/b/butchers_shop/sun_asrdgbefoszenfex.jpg',
'/c/church/outdoor/sun_bzfyucfrdigaqneg.jpg',
'/c/church/outdoor/sun_byzxhknqrejdajxi.jpg',
'/c/cockpit/sun_ajkulpqauavrmxae.jpg',
'/l/living_room/sun_aefoqbeatyufobtx.jpg',
'/s/supermarket/sun_attvxbzocurnddbz.jpg',
'/c/closet/sun_aqnutmwfkypmrnfy.jpg',
'/f/fire_station/sun_bttrtzktpbymxkmf.jpg',
'/s/shopping_mall/indoor/sun_avwzjsijaxnwuzjx.jpg',
'/w/windmill/sun_blvczkyqbmabzeej.jpg',
'/c/chicken_coop/outdoor/sun_amaonsnnkskxwmrj.jpg',
'/s/swimming_pool/outdoor/sun_bslaihiqlhfewtzn.jpg',
'/u/underwater/coral_reef/sun_bhcrnmvbgnkvcvkr.jpg',
'/d/dining_room/sun_azlxdhiajwrhaivq.jpg',
'/c/church/outdoor/sun_bnunxbznqnvgeykx.jpg',
'/c/corridor/sun_aspwpqqlcwzfanvl.jpg',
'/r/restaurant_patio/sun_awcbpizjbudjvrhs.jpg',
'/b/ball_pit/sun_avdnmemjrgrbkwjm.jpg',
]
@DATASETS.register_module()
class SUN397(BaseDataset):
"""The SUN397 Dataset.
Support the `SUN397 Dataset <https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/>`_ Dataset.
After downloading and decompression, the dataset directory structure is as follows.
SUN397 dataset directory: ::
SUN397
SUN397
a
abbey
| | sun_aaalbzqrimafwbiv.jpg
| | ...
airplane_cabin
| | sun_aadqdkqaslqqoblu.jpg
| | ...
| ...
b
...
c
...
...
Partitions
ClassName.txt
Training_01.txt
Testing_01.txt
...
Args:
data_root (str): The root directory for Stanford Cars dataset.
split (str, optional): The dataset split, supports "train" and "test".
Default to "train".
Examples:
>>> from mmpretrain.datasets import SUN397
>>> train_dataset = SUN397(data_root='data/SUN397', split='train')
>>> train_dataset
Dataset SUN397
Number of samples: 19824
Number of categories: 397
Root of dataset: data/SUN397
>>> test_dataset = SUN397(data_root='data/SUN397', split='test')
>>> test_dataset
Dataset SUN397
Number of samples: 19829
Number of categories: 397
Root of dataset: data/SUN397
""" # noqa: E501
METAINFO = {'classes': SUN397_CATEGORIES}
def __init__(self, data_root: str, split: str = 'train', **kwargs):
splits = ['train', 'test']
assert split in splits, \
f"The split must be one of {splits}, but get '{split}'"
self.split = split
self.backend = get_file_backend(data_root, enable_singleton=True)
if split == 'train':
ann_file = self.backend.join_path('Partitions', 'Training_01.txt')
else:
ann_file = self.backend.join_path('Partitions', 'Testing_01.txt')
data_prefix = 'SUN397'
test_mode = split == 'test'
super(SUN397, self).__init__(
ann_file=ann_file,
data_root=data_root,
test_mode=test_mode,
data_prefix=data_prefix,
**kwargs)
def load_data_list(self):
pairs = list_from_file(self.ann_file)
data_list = []
for pair in pairs:
if pair in INVALID:
continue
img_path = self.backend.join_path(self.img_prefix, pair[1:])
items = pair.split('/')
class_name = '_'.join(items[2:-1])
gt_label = self.METAINFO['classes'].index(class_name)
info = dict(img_path=img_path, gt_label=gt_label)
data_list.append(info)
return data_list
def extra_repr(self) -> List[str]:
"""The extra repr information of the dataset."""
body = [
f'Root of dataset: \t{self.data_root}',
]
return body

View File

@ -1,5 +1,6 @@
einops
importlib-metadata
mat4py
matplotlib
modelindex
numpy

File diff suppressed because it is too large Load Diff