[Enchancement] Only keep meta and state_dict when publish model (#1729)

* Only keep meta and state_dict when publish model

* simpy code
pull/1731/head
liukuikun 2023-02-15 19:45:12 +08:00 committed by GitHub
parent 3240bace4a
commit 7cea6a6419
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 27 additions and 9 deletions

View File

@ -1,9 +1,9 @@
#!/usr/bin/env python
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import argparse import argparse
import subprocess import subprocess
import torch import torch
from mmengine.logging import print_log
def parse_args(): def parse_args():
@ -11,28 +11,46 @@ def parse_args():
description='Process a checkpoint to be published') description='Process a checkpoint to be published')
parser.add_argument('in_file', help='input checkpoint filename') parser.add_argument('in_file', help='input checkpoint filename')
parser.add_argument('out_file', help='output checkpoint filename') parser.add_argument('out_file', help='output checkpoint filename')
parser.add_argument(
'--save-keys',
nargs='+',
type=str,
default=['meta', 'state_dict'],
help='keys to save in the published checkpoint')
args = parser.parse_args() args = parser.parse_args()
return args return args
def process_checkpoint(in_file, out_file): def process_checkpoint(in_file, out_file, save_keys=['meta', 'state_dict']):
checkpoint = torch.load(in_file, map_location='cpu') checkpoint = torch.load(in_file, map_location='cpu')
# remove optimizer for smaller file size
if 'optimizer' in checkpoint: # only keep `meta` and `state_dict` for smaller file size
del checkpoint['optimizer'] ckpt_keys = list(checkpoint.keys())
for k in ckpt_keys:
if k not in save_keys:
print_log(
f'Key `{k}` will be removed because it is not in '
f'save_keys. If you want to keep it, '
f'please set --save-keys.',
logger='current')
checkpoint.pop(k, None)
# if it is necessary to remove some sensitive data in checkpoint['meta'], # if it is necessary to remove some sensitive data in checkpoint['meta'],
# add the code here. # add the code here.
if 'meta' in checkpoint: if torch.__version__ >= '1.6':
checkpoint['meta'] = {'CLASSES': 0} torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False)
torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False) else:
torch.save(checkpoint, out_file)
sha = subprocess.check_output(['sha256sum', out_file]).decode() sha = subprocess.check_output(['sha256sum', out_file]).decode()
final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth' final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth'
subprocess.Popen(['mv', out_file, final_file]) subprocess.Popen(['mv', out_file, final_file])
print_log(
f'The published model is saved at {final_file}.', logger='current')
def main(): def main():
args = parse_args() args = parse_args()
process_checkpoint(args.in_file, args.out_file) process_checkpoint(args.in_file, args.out_file, args.save_keys)
if __name__ == '__main__': if __name__ == '__main__':