116 lines
3.3 KiB
Python
116 lines
3.3 KiB
Python
#!/usr/bin/env python
|
|
import re
|
|
from collections import defaultdict
|
|
from pathlib import Path
|
|
|
|
from modelindex.load_model_index import load
|
|
|
|
MMSELFSUP_ROOT = Path(__file__).absolute().parents[2]
|
|
PAPERS_ROOT = Path('papers') # Path to save generated paper pages.
|
|
GITHUB_PREFIX = 'https://github.com/open-mmlab/mmselfsup/blob/1.x/'
|
|
MODELZOO_TEMPLATE = """
|
|
# 模型库数据汇总
|
|
|
|
* Number of papers: {num_papers}
|
|
{type_msg}
|
|
|
|
* Number of checkpoints: {num_ckpts}
|
|
{paper_msg}
|
|
"""
|
|
|
|
model_index = load(str(MMSELFSUP_ROOT / 'model-index.yml'))
|
|
|
|
|
|
def build_collections(model_index):
|
|
col_by_name = {}
|
|
for col in model_index.collections:
|
|
setattr(col, 'models', [])
|
|
col_by_name[col.name] = col
|
|
|
|
for model in model_index.models:
|
|
col = col_by_name[model.in_collection]
|
|
col.models.append(model)
|
|
setattr(model, 'collection', col)
|
|
|
|
|
|
build_collections(model_index)
|
|
|
|
|
|
def count_papers(model_index):
|
|
ckpt_dict = dict()
|
|
type_count = defaultdict(int)
|
|
paper_msgs = []
|
|
|
|
for model in model_index.models:
|
|
if model.collection.name in ckpt_dict.keys():
|
|
if model.weights:
|
|
ckpt_dict[model.collection.name] += 1
|
|
else:
|
|
ckpt_dict[model.collection.name] = 1
|
|
|
|
downstream_info = model.data.get('Downstream', [])
|
|
for downstream_task in downstream_info:
|
|
if downstream_task.get('Weights', None):
|
|
ckpt_dict[model.collection.name] += 1
|
|
|
|
for collection in model_index.collections:
|
|
name = collection.name
|
|
title = collection.paper['Title']
|
|
papertype = collection.data.get('type', 'Algorithm')
|
|
type_count[papertype] += 1
|
|
|
|
with open(MMSELFSUP_ROOT / collection.readme) as f:
|
|
readme = f.read()
|
|
readme = PAPERS_ROOT / Path(
|
|
collection.filepath).parent.with_suffix('.md').name
|
|
paper_msgs.append(
|
|
f'\t- [{papertype}] [{title}]({readme}) ({ckpt_dict[name]} '
|
|
f'ckpts)')
|
|
|
|
type_msg = '\n'.join(
|
|
[f'\t- {type_}: {count}' for type_, count in type_count.items()])
|
|
paper_msg = '\n'.join(paper_msgs)
|
|
|
|
modelzoo = MODELZOO_TEMPLATE.format(
|
|
num_papers=sum(type_count.values()),
|
|
num_ckpts=sum(ckpt_dict.values()),
|
|
type_msg=type_msg,
|
|
paper_msg=paper_msg,
|
|
)
|
|
|
|
with open('model_zoo_statistics.md', 'w') as f:
|
|
f.write(modelzoo)
|
|
|
|
|
|
count_papers(model_index)
|
|
|
|
|
|
def generate_paper_page(collection):
|
|
PAPERS_ROOT.mkdir(exist_ok=True)
|
|
|
|
# Write a copy of README
|
|
with open(MMSELFSUP_ROOT / collection.readme) as f:
|
|
readme = f.read()
|
|
folder = Path(collection.filepath).parent
|
|
copy = PAPERS_ROOT / folder.with_suffix('.md').name
|
|
|
|
def replace_link(matchobj):
|
|
# Replace relative link to GitHub link.
|
|
name = matchobj.group(1)
|
|
link = matchobj.group(2)
|
|
if not link.startswith('http'):
|
|
assert (folder / link).exists(), \
|
|
f'Link not found:\n{collection.readme}: {link}'
|
|
rel_link = (folder / link).absolute().relative_to(MMSELFSUP_ROOT)
|
|
link = GITHUB_PREFIX + str(rel_link)
|
|
return f'[{name}]({link})'
|
|
|
|
content = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', replace_link, readme)
|
|
|
|
with open(copy, 'w') as copy_file:
|
|
copy_file.write(content)
|
|
|
|
|
|
for collection in model_index.collections:
|
|
generate_paper_page(collection)
|