mmpretrain/docs/zh_CN/stat.py

250 lines
7.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python
import re
import warnings
from collections import defaultdict
from pathlib import Path
from modelindex.load_model_index import load
from modelindex.models.Result import Result
from tabulate import tabulate
MMPT_ROOT = Path(__file__).absolute().parents[2]
PAPERS_ROOT = Path('papers') # Path to save generated paper pages.
GITHUB_PREFIX = 'https://github.com/open-mmlab/mmpretrain/blob/main/'
MODELZOO_TEMPLATE = """\
# 模型库统计
在本页面中我们列举了我们支持的[所有算法](#所有已支持的算法)。你可以点击链接跳转至对应的模型详情页面。
另外我们还列出了我们提供的所有模型权重文件你可以使用排序和搜索功能找到需要的模型权重并使用链接跳转至模型详情页面
## 所有已支持的算法
* 论文数量{num_papers}
{type_msg}
* 模型权重文件数量{num_ckpts}
{paper_msg}
""" # noqa: E501
METRIC_ALIAS = {
'Top 1 Accuracy': 'Top-1 (%)',
'Top 5 Accuracy': 'Top-5 (%)',
}
model_index = load(str(MMPT_ROOT / 'model-index.yml'))
def build_collections(model_index):
col_by_name = {}
for col in model_index.collections:
setattr(col, 'models', [])
col_by_name[col.name] = col
for model in model_index.models:
col = col_by_name[model.in_collection]
col.models.append(model)
setattr(model, 'collection', col)
if model.results is None:
setattr(model, 'tasks', [])
else:
setattr(model, 'tasks', [result.task for result in model.results])
build_collections(model_index)
def count_papers(collections):
total_num_ckpts = 0
type_count = defaultdict(int)
paper_msgs = []
for collection in collections:
with open(MMPT_ROOT / collection.readme) as f:
readme = f.read()
ckpts = set(x.lower().strip()
for x in re.findall(r'\[model\]\((https?.*)\)', readme))
total_num_ckpts += len(ckpts)
title = collection.paper['Title']
papertype = collection.data.get('type', 'Algorithm')
type_count[papertype] += 1
readme = PAPERS_ROOT / Path(
collection.filepath).parent.with_suffix('.md').name
paper_msgs.append(
f'\t- [{papertype}] [{title}]({readme}) ({len(ckpts)} ckpts)')
type_msg = '\n'.join(
[f'\t- {type_}: {count}' for type_, count in type_count.items()])
paper_msg = '\n'.join(paper_msgs)
modelzoo = MODELZOO_TEMPLATE.format(
num_papers=len(collections),
num_ckpts=total_num_ckpts,
type_msg=type_msg,
paper_msg=paper_msg,
)
with open('modelzoo_statistics.md', 'w') as f:
f.write(modelzoo)
count_papers(model_index.collections)
def generate_paper_page(collection):
PAPERS_ROOT.mkdir(exist_ok=True)
# Write a copy of README
with open(MMPT_ROOT / collection.readme) as f:
readme = f.read()
folder = Path(collection.filepath).parent
copy = PAPERS_ROOT / folder.with_suffix('.md').name
def replace_link(matchobj):
# Replace relative link to GitHub link.
name = matchobj.group(1)
link = matchobj.group(2)
if not link.startswith('http'):
assert (folder / link).exists(), \
f'Link not found:\n{collection.readme}: {link}'
rel_link = (folder / link).absolute().relative_to(MMPT_ROOT)
link = GITHUB_PREFIX + str(rel_link)
return f'[{name}]({link})'
content = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', replace_link, readme)
content = f'---\ngithub_page: /{collection.readme}\n---\n' + content
def make_tabs(matchobj):
"""modify the format from emphasis black symbol to tabs."""
content = matchobj.group()
content = content.replace('<!-- [TABS-BEGIN] -->', '')
content = content.replace('<!-- [TABS-END] -->', '')
# split the content by "**{Tab-Name}**""
splits = re.split(r'^\*\*(.*)\*\*$', content, flags=re.M)[1:]
tabs_list = []
for title, tab_content in zip(splits[::2], splits[1::2]):
title = ':::{tab} ' + title + '\n'
tab_content = tab_content.strip() + '\n:::\n'
tabs_list.append(title + tab_content)
return '::::{tabs}\n' + ''.join(tabs_list) + '::::'
if '<!-- [TABS-BEGIN] -->' in content and '<!-- [TABS-END] -->' in content:
# Make TABS block a selctive tabs
try:
pattern = r'<!-- \[TABS-BEGIN\] -->([\d\D]*?)<!-- \[TABS-END\] -->'
content = re.sub(pattern, make_tabs, content)
except Exception as e:
warnings.warn(f'Can not parse the TABS, get an error : {e}')
with open(copy, 'w') as copy_file:
copy_file.write(content)
for collection in model_index.collections:
generate_paper_page(collection)
def scatter_results(models):
model_result_pairs = []
for model in models:
if model.results is None:
result = Result(task=None, dataset=None, metrics={})
model_result_pairs.append((model, result))
else:
for result in model.results:
model_result_pairs.append((model, result))
return model_result_pairs
def generate_summary_table(task, model_result_pairs, title=None):
metrics = set()
for model, result in model_result_pairs:
if result.task == task:
metrics = metrics.union(result.metrics.keys())
metrics = sorted(list(metrics))
rows = []
for model, result in model_result_pairs:
if result.task != task:
continue
name = model.name
params = f'{model.metadata.parameters / 1e6:.2f}' # Params
[Feature] Support multiple multi-modal algorithms and inferencers. (#1561) * [Feat] Migrate blip caption to mmpretrain. (#50) * Migrate blip caption to mmpretrain * minor fix * support train * [Feature] Support OFA caption task. (#51) * [Feature] Support OFA caption task. * Remove duplicated files. * [Feature] Support OFA vqa task. (#58) * [Feature] Support OFA vqa task. * Fix lint. * [Feat] Add BLIP retrieval to mmpretrain. (#55) * init * minor fix for train * fix according to comments * refactor * Update Blip retrieval. (#62) * [Feature] Support OFA visual grounding task. (#59) * [Feature] Support OFA visual grounding task. * minor add TODO --------- Co-authored-by: yingfhu <yingfhu@gmail.com> * [Feat] Add flamingos coco caption and vqa. (#60) * first init * init flamingo coco * add vqa * minor fix * remove unnecessary modules * Update config * Use `ApplyToList`. --------- Co-authored-by: mzr1996 <mzr1996@163.com> * [Feature]: BLIP2 coco retrieval (#53) * [Feature]: Add blip2 retriever * [Feature]: Add blip2 all modules * [Feature]: Refine model * [Feature]: x1 * [Feature]: Runnable coco ret * [Feature]: Runnable version * [Feature]: Fix lint * [Fix]: Fix lint * [Feature]: Use 364 img size * [Feature]: Refactor blip2 * [Fix]: Fix lint * refactor files * minor fix * minor fix --------- Co-authored-by: yingfhu <yingfhu@gmail.com> * Remove * fix blip caption inputs (#68) * [Feat] Add BLIP NLVR support. (#67) * first init * init flamingo coco * add vqa * add nlvr * refactor nlvr * minor fix * minor fix * Update dataset --------- Co-authored-by: mzr1996 <mzr1996@163.com> * [Feature]: BLIP2 Caption (#70) * [Feature]: Add language model * [Feature]: blip2 caption forward * [Feature]: Reproduce the results * [Feature]: Refactor caption * refine config --------- Co-authored-by: yingfhu <yingfhu@gmail.com> * [Feat] Migrate BLIP VQA to mmpretrain (#69) * reformat * change * change * change * change * change * change * change * change * change * change * change * change * change * change * change * change * change * change * change * refactor code --------- Co-authored-by: yingfhu <yingfhu@gmail.com> * Update RefCOCO dataset * [Fix] fix lint * [Feature] Implement inference APIs for multi-modal tasks. (#65) * [Feature] Implement inference APIs for multi-modal tasks. * [Project] Add gradio demo. * [Improve] Update requirements * Update flamingo * Update blip * Add NLVR inferencer * Update flamingo * Update hugging face model register * Update ofa vqa * Update BLIP-vqa (#71) * Update blip-vqa docstring (#72) * Refine flamingo docstring (#73) * [Feature]: BLIP2 VQA (#61) * [Feature]: VQA forward * [Feature]: Reproduce accuracy * [Fix]: Fix lint * [Fix]: Add blank line * minor fix --------- Co-authored-by: yingfhu <yingfhu@gmail.com> * [Feature]: BLIP2 docstring (#74) * [Feature]: Add caption docstring * [Feature]: Add docstring to blip2 vqa * [Feature]: Add docstring to retrieval * Update BLIP-2 metafile and README (#75) * [Feature]: Add readme and docstring * Update blip2 results --------- Co-authored-by: mzr1996 <mzr1996@163.com> * [Feature] BLIP Visual Grounding on MMPretrain Branch (#66) * blip grounding merge with mmpretrain * remove commit * blip grounding test and inference api * refcoco dataset * refcoco dataset refine config * rebasing * gitignore * rebasing * minor edit * minor edit * Update blip-vqa docstring (#72) * rebasing * Revert "minor edit" This reverts commit 639cec757c215e654625ed0979319e60f0be9044. * blip grounding final * precommit * refine config * refine config * Update blip visual grounding --------- Co-authored-by: Yiqin Wang 王逸钦 <wyq1217@outlook.com> Co-authored-by: mzr1996 <mzr1996@163.com> * Update visual grounding metric * Update OFA docstring, README and metafiles. (#76) * [Docs] Update installation docs and gradio demo docs. (#77) * Update OFA name * Update Visual Grounding Visualizer * Integrate accelerate support * Fix imports. * Fix timm backbone * Update imports * Update README * Update circle ci * Update flamingo config * Add gradio demo README * [Feature]: Add scienceqa (#1571) * [Feature]: Add scienceqa * [Feature]: Change param name * Update docs * Update video --------- Co-authored-by: Hubert <42952108+yingfhu@users.noreply.github.com> Co-authored-by: yingfhu <yingfhu@gmail.com> Co-authored-by: Yuan Liu <30762564+YuanLiuuuuuu@users.noreply.github.com> Co-authored-by: Yiqin Wang 王逸钦 <wyq1217@outlook.com> Co-authored-by: Rongjie Li <limo97@163.com>
2023-05-19 16:50:04 +08:00
if model.metadata.flops is not None:
flops = f'{model.metadata.flops / 1e9:.2f}' # Flops
else:
flops = None
readme = Path(model.collection.filepath).parent.with_suffix('.md').name
page = f'[链接]({PAPERS_ROOT / readme})'
model_metrics = []
for metric in metrics:
model_metrics.append(str(result.metrics.get(metric, '')))
rows.append([name, params, flops, *model_metrics, page])
with open('modelzoo_statistics.md', 'a') as f:
if title is not None:
f.write(f'\n{title}')
f.write("""\n```{table}\n:class: model-summary\n""")
header = [
'模型',
'参数量 (M)',
'Flops (G)',
*[METRIC_ALIAS.get(metric, metric) for metric in metrics],
'Readme',
]
table_cfg = dict(
tablefmt='pipe',
floatfmt='.2f',
numalign='right',
stralign='center')
f.write(tabulate(rows, header, **table_cfg))
f.write('\n```\n')
def generate_dataset_wise_table(task, model_result_pairs, title=None):
dataset_rows = defaultdict(list)
for model, result in model_result_pairs:
if result.task == task:
dataset_rows[result.dataset].append((model, result))
if title is not None:
with open('modelzoo_statistics.md', 'a') as f:
f.write(f'\n{title}')
for dataset, pairs in dataset_rows.items():
generate_summary_table(task, pairs, title=f'### {dataset}')
model_result_pairs = scatter_results(model_index.models)
# Generate Pretrain Summary
generate_summary_table(
task=None,
model_result_pairs=model_result_pairs,
title='## 预训练模型',
)
# Generate Image Classification Summary
generate_dataset_wise_table(
task='Image Classification',
model_result_pairs=model_result_pairs,
title='## 图像分类',
)
# Generate Multi-Label Classification Summary
generate_dataset_wise_table(
task='Multi-Label Classification',
model_result_pairs=model_result_pairs,
title='## 图像多标签分类',
)
# Generate Image Retrieval Summary
generate_dataset_wise_table(
task='Image Retrieval',
model_result_pairs=model_result_pairs,
title='## 图像检索',
)