mmselfsup/docs/zh_cn/stat.py

116 lines
3.3 KiB
Python

#!/usr/bin/env python
import re
from collections import defaultdict
from pathlib import Path
from modelindex.load_model_index import load
MMSELFSUP_ROOT = Path(__file__).absolute().parents[2]
PAPERS_ROOT = Path('papers') # Path to save generated paper pages.
GITHUB_PREFIX = 'https://github.com/open-mmlab/mmselfsup/blob/1.x/'
MODELZOO_TEMPLATE = """
# 模型库数据汇总
* Number of papers: {num_papers}
{type_msg}
* Number of checkpoints: {num_ckpts}
{paper_msg}
"""
model_index = load(str(MMSELFSUP_ROOT / 'model-index.yml'))
def build_collections(model_index):
col_by_name = {}
for col in model_index.collections:
setattr(col, 'models', [])
col_by_name[col.name] = col
for model in model_index.models:
col = col_by_name[model.in_collection]
col.models.append(model)
setattr(model, 'collection', col)
build_collections(model_index)
def count_papers(model_index):
ckpt_dict = dict()
type_count = defaultdict(int)
paper_msgs = []
for model in model_index.models:
if model.collection.name in ckpt_dict.keys():
if model.weights:
ckpt_dict[model.collection.name] += 1
else:
ckpt_dict[model.collection.name] = 1
downstream_info = model.data.get('Downstream', [])
for downstream_task in downstream_info:
if downstream_task.get('Weights', None):
ckpt_dict[model.collection.name] += 1
for collection in model_index.collections:
name = collection.name
title = collection.paper['Title']
papertype = collection.data.get('type', 'Algorithm')
type_count[papertype] += 1
with open(MMSELFSUP_ROOT / collection.readme) as f:
readme = f.read()
readme = PAPERS_ROOT / Path(
collection.filepath).parent.with_suffix('.md').name
paper_msgs.append(
f'\t- [{papertype}] [{title}]({readme}) ({ckpt_dict[name]} '
f'ckpts)')
type_msg = '\n'.join(
[f'\t- {type_}: {count}' for type_, count in type_count.items()])
paper_msg = '\n'.join(paper_msgs)
modelzoo = MODELZOO_TEMPLATE.format(
num_papers=sum(type_count.values()),
num_ckpts=sum(ckpt_dict.values()),
type_msg=type_msg,
paper_msg=paper_msg,
)
with open('model_zoo_statistics.md', 'w') as f:
f.write(modelzoo)
count_papers(model_index)
def generate_paper_page(collection):
PAPERS_ROOT.mkdir(exist_ok=True)
# Write a copy of README
with open(MMSELFSUP_ROOT / collection.readme) as f:
readme = f.read()
folder = Path(collection.filepath).parent
copy = PAPERS_ROOT / folder.with_suffix('.md').name
def replace_link(matchobj):
# Replace relative link to GitHub link.
name = matchobj.group(1)
link = matchobj.group(2)
if not link.startswith('http'):
assert (folder / link).exists(), \
f'Link not found:\n{collection.readme}: {link}'
rel_link = (folder / link).absolute().relative_to(MMSELFSUP_ROOT)
link = GITHUB_PREFIX + str(rel_link)
return f'[{name}]({link})'
content = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', replace_link, readme)
with open(copy, 'w') as copy_file:
copy_file.write(content)
for collection in model_index.collections:
generate_paper_page(collection)