2023-02-28 15:59:17 +08:00
|
|
|
# Copyright (c) OpenMMLab. All rights reserved.
|
|
|
|
import platform
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
import torch
|
|
|
|
|
2023-03-06 16:53:15 +08:00
|
|
|
from mmpretrain.models import CAE, CAEPretrainViT
|
|
|
|
from mmpretrain.structures import DataSample
|
2023-02-28 15:59:17 +08:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(platform.system() == 'Windows', reason='Windows mem limit')
|
|
|
|
def test_cae_vit():
|
2023-03-06 16:53:15 +08:00
|
|
|
backbone = dict(
|
|
|
|
arch='deit-tiny', patch_size=16, layer_scale_init_value=0.1)
|
|
|
|
|
|
|
|
cae_backbone = CAEPretrainViT(**backbone)
|
2023-02-28 15:59:17 +08:00
|
|
|
cae_backbone.init_weights()
|
2023-03-06 16:53:15 +08:00
|
|
|
fake_inputs = torch.randn((1, 3, 224, 224))
|
|
|
|
fake_mask = torch.zeros((1, 196)).bool()
|
2023-02-28 15:59:17 +08:00
|
|
|
fake_mask[:, 75:150] = 1
|
2023-03-06 16:53:15 +08:00
|
|
|
|
|
|
|
# test with mask
|
2023-02-28 15:59:17 +08:00
|
|
|
fake_outputs = cae_backbone(fake_inputs, fake_mask)
|
2023-03-06 16:53:15 +08:00
|
|
|
assert list(fake_outputs.shape) == [1, 122, 192]
|
|
|
|
|
|
|
|
# test without mask
|
|
|
|
fake_outputs = cae_backbone(fake_inputs, None)
|
2023-03-09 11:02:58 +08:00
|
|
|
assert fake_outputs[0].shape == torch.Size([1, 192])
|
2023-03-06 16:53:15 +08:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(platform.system() == 'Windows', reason='Windows mem limit')
|
|
|
|
def test_cae():
|
|
|
|
data_preprocessor = dict(
|
|
|
|
type='TwoNormDataPreprocessor',
|
|
|
|
mean=[123.675, 116.28, 103.53],
|
|
|
|
std=[58.395, 57.12, 57.375],
|
|
|
|
second_mean=[-31.875, -31.875, -31.875],
|
|
|
|
second_std=[318.75, 318.75, 318.75],
|
|
|
|
to_rgb=True)
|
|
|
|
|
|
|
|
# model settings
|
|
|
|
backbone = dict(
|
|
|
|
type='CAEPretrainViT',
|
|
|
|
arch='deit-tiny',
|
|
|
|
patch_size=16,
|
|
|
|
layer_scale_init_value=0.1)
|
|
|
|
neck = dict(
|
|
|
|
type='CAENeck',
|
|
|
|
embed_dims=192,
|
|
|
|
num_heads=12,
|
|
|
|
regressor_depth=4,
|
|
|
|
decoder_depth=4,
|
|
|
|
mlp_ratio=4,
|
|
|
|
layer_scale_init_value=0.1)
|
|
|
|
head = dict(type='CAEHead', loss=dict(type='CAELoss', lambd=2))
|
|
|
|
target_generator = dict(type='DALL-E')
|
|
|
|
|
|
|
|
model = CAE(
|
|
|
|
backbone=backbone,
|
|
|
|
neck=neck,
|
|
|
|
head=head,
|
|
|
|
target_generator=target_generator,
|
|
|
|
data_preprocessor=data_preprocessor)
|
|
|
|
|
|
|
|
fake_img = torch.rand((1, 3, 224, 224))
|
|
|
|
fake_target_img = torch.rand((1, 3, 112, 112))
|
|
|
|
fake_mask = torch.zeros((196)).bool()
|
|
|
|
fake_mask[75:150] = 1
|
|
|
|
fake_data_sample = DataSample()
|
|
|
|
fake_data_sample.set_mask(fake_mask)
|
|
|
|
fake_data = {
|
|
|
|
'inputs': [fake_img, fake_target_img],
|
|
|
|
'data_samples': [fake_data_sample]
|
|
|
|
}
|
2023-02-28 15:59:17 +08:00
|
|
|
|
2023-03-06 16:53:15 +08:00
|
|
|
fake_inputs = model.data_preprocessor(fake_data)
|
|
|
|
fake_outputs = model(**fake_inputs, mode='loss')
|
|
|
|
assert isinstance(fake_outputs['loss'].item(), float)
|