2021-08-17 19:52:42 +08:00
|
|
|
# Copyright (c) OpenMMLab. All rights reserved.
|
2020-07-08 00:05:43 +08:00
|
|
|
import argparse
|
2021-09-10 11:42:38 +08:00
|
|
|
import datetime
|
2023-03-17 10:50:51 +08:00
|
|
|
import hashlib
|
|
|
|
import shutil
|
|
|
|
import warnings
|
|
|
|
from collections import OrderedDict
|
2021-09-26 11:21:19 +08:00
|
|
|
from pathlib import Path
|
2020-07-08 00:05:43 +08:00
|
|
|
|
|
|
|
import torch
|
2023-03-17 10:50:51 +08:00
|
|
|
|
|
|
|
import mmpretrain
|
2020-07-08 00:05:43 +08:00
|
|
|
|
|
|
|
|
|
|
|
def parse_args():
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
description='Process a checkpoint to be published')
|
|
|
|
parser.add_argument('in_file', help='input checkpoint filename')
|
|
|
|
parser.add_argument('out_file', help='output checkpoint filename')
|
2023-03-17 10:50:51 +08:00
|
|
|
parser.add_argument(
|
|
|
|
'--no-ema',
|
|
|
|
action='store_true',
|
|
|
|
help='Use keys in `ema_state_dict` (no-ema keys).')
|
|
|
|
parser.add_argument(
|
|
|
|
'--dataset-type',
|
|
|
|
type=str,
|
|
|
|
help='The type of the dataset. If the checkpoint is converted '
|
|
|
|
'from other repository, this option is used to fill the dataset '
|
|
|
|
'meta information to the published checkpoint, like "ImageNet", '
|
|
|
|
'"CIFAR10" and others.')
|
2020-07-08 00:05:43 +08:00
|
|
|
args = parser.parse_args()
|
|
|
|
return args
|
|
|
|
|
|
|
|
|
2023-03-17 10:50:51 +08:00
|
|
|
def process_checkpoint(in_file, out_file, args):
|
2020-07-08 00:05:43 +08:00
|
|
|
checkpoint = torch.load(in_file, map_location='cpu')
|
2023-03-17 10:50:51 +08:00
|
|
|
# remove unnecessary fields for smaller file size
|
|
|
|
for key in ['optimizer', 'param_schedulers', 'hook_msgs', 'message_hub']:
|
|
|
|
checkpoint.pop(key, None)
|
|
|
|
|
|
|
|
# For checkpoint converted from the official weight
|
|
|
|
if 'state_dict' not in checkpoint:
|
|
|
|
checkpoint = dict(state_dict=checkpoint)
|
|
|
|
|
|
|
|
meta = checkpoint.get('meta', {})
|
|
|
|
meta.setdefault('mmpretrain_version', mmpretrain.__version__)
|
|
|
|
|
|
|
|
# handle dataset meta information
|
|
|
|
if args.dataset_type is not None:
|
|
|
|
from mmpretrain.registry import DATASETS
|
|
|
|
dataset_class = DATASETS.get(args.dataset_type)
|
|
|
|
dataset_meta = getattr(dataset_class, 'METAINFO', {})
|
2021-09-10 11:42:38 +08:00
|
|
|
else:
|
2023-03-17 10:50:51 +08:00
|
|
|
dataset_meta = {}
|
|
|
|
|
|
|
|
meta.setdefault('dataset_meta', dataset_meta)
|
|
|
|
|
|
|
|
if len(meta['dataset_meta']) == 0:
|
|
|
|
warnings.warn('Missing dataset meta information.')
|
|
|
|
|
|
|
|
checkpoint['meta'] = meta
|
|
|
|
|
|
|
|
ema_state_dict = OrderedDict()
|
|
|
|
if 'ema_state_dict' in checkpoint:
|
|
|
|
for k, v in checkpoint['ema_state_dict'].items():
|
|
|
|
# The ema static dict has some extra fields
|
|
|
|
if k.startswith('module.'):
|
|
|
|
origin_k = k[len('module.'):]
|
|
|
|
assert origin_k in checkpoint['state_dict']
|
|
|
|
ema_state_dict[origin_k] = v
|
|
|
|
del checkpoint['ema_state_dict']
|
|
|
|
print('The input checkpoint has EMA weights, ', end='')
|
|
|
|
if args.no_ema:
|
|
|
|
# The values stored in `ema_state_dict` is original values.
|
|
|
|
print('and drop the EMA weights.')
|
|
|
|
assert ema_state_dict.keys() <= checkpoint['state_dict'].keys()
|
|
|
|
checkpoint['state_dict'].update(ema_state_dict)
|
|
|
|
else:
|
|
|
|
print('and use the EMA weights.')
|
|
|
|
|
|
|
|
temp_out_file = Path(out_file).with_name('temp_' + Path(out_file).name)
|
|
|
|
torch.save(checkpoint, temp_out_file)
|
2021-09-10 11:42:38 +08:00
|
|
|
|
2023-03-17 10:50:51 +08:00
|
|
|
with open(temp_out_file, 'rb') as f:
|
|
|
|
sha = hashlib.sha256(f.read()).hexdigest()[:8]
|
2020-07-08 00:05:43 +08:00
|
|
|
if out_file.endswith('.pth'):
|
|
|
|
out_file_name = out_file[:-4]
|
|
|
|
else:
|
|
|
|
out_file_name = out_file
|
2021-09-10 11:42:38 +08:00
|
|
|
|
|
|
|
current_date = datetime.datetime.now().strftime('%Y%m%d')
|
|
|
|
final_file = out_file_name + f'_{current_date}-{sha[:8]}.pth'
|
2023-03-17 10:50:51 +08:00
|
|
|
shutil.move(temp_out_file, final_file)
|
2020-07-08 00:05:43 +08:00
|
|
|
|
2021-09-10 11:42:38 +08:00
|
|
|
print(f'Successfully generated the publish-ckpt as {final_file}.')
|
|
|
|
|
2020-07-08 00:05:43 +08:00
|
|
|
|
|
|
|
def main():
|
|
|
|
args = parse_args()
|
2021-09-26 11:21:19 +08:00
|
|
|
out_dir = Path(args.out_file).parent
|
|
|
|
if not out_dir.exists():
|
2021-09-10 11:42:38 +08:00
|
|
|
raise ValueError(f'Directory {out_dir} does not exist, '
|
|
|
|
'please generate it manually.')
|
2023-03-17 10:50:51 +08:00
|
|
|
process_checkpoint(args.in_file, args.out_file, args)
|
2020-07-08 00:05:43 +08:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|