[Feature] add log collector (#1175)

* [Feature] add log collector

* Update .dev/log_collector/readme.md

Co-authored-by: Miao Zheng <76149310+MeowZheng@users.noreply.github.com>

* Update .dev/log_collector/example_config.py

Co-authored-by: Miao Zheng <76149310+MeowZheng@users.noreply.github.com>

* fix typo and so on

* modify readme

* fix some bugs and revise the readme.md

* more elegant

* Update .dev/log_collector/readme.md

Co-authored-by: Junjun2016 <hejunjun@sjtu.edu.cn>

Co-authored-by: Miao Zheng <76149310+MeowZheng@users.noreply.github.com>
Co-authored-by: Junjun2016 <hejunjun@sjtu.edu.cn>
pull/1801/head
Rockey 2022-01-14 15:19:23 +08:00 committed by GitHub
parent 2f4d52f4d1
commit ee5fbcff74
4 changed files with 324 additions and 0 deletions

View File

@ -0,0 +1,18 @@
work_dir = '../../work_dirs'
metric = 'mIoU'
# specify the log files we would like to collect in `log_items`
log_items = [
'segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup',
'segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr',
'segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr',
'segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr'
]
# or specify ignore_keywords, then the folders whose name contain
# `'segformer'` won't be collected
# ignore_keywords = ['segformer']
# should not include metric
other_info_keys = ['mAcc']
markdown_file = 'markdowns/lr_in_trans.json.md'
json_file = 'jsons/trans_in_cnn.json'

View File

@ -0,0 +1,143 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import datetime
import json
import os
import os.path as osp
from collections import OrderedDict
from utils import load_config
# automatically collect all the results
# The structure of the directory:
# ├── work-dir
# │ ├── config_1
# │ │ ├── time1.log.json
# │ │ ├── time2.log.json
# │ │ ├── time3.log.json
# │ │ ├── time4.log.json
# │ ├── config_2
# │ │ ├── time5.log.json
# │ │ ├── time6.log.json
# │ │ ├── time7.log.json
# │ │ ├── time8.log.json
def parse_args():
parser = argparse.ArgumentParser(description='extract info from log.json')
parser.add_argument('config_dir')
args = parser.parse_args()
return args
def has_keyword(name: str, keywords: list):
for a_keyword in keywords:
if a_keyword in name:
return True
return False
def main():
args = parse_args()
cfg = load_config(args.config_dir)
work_dir = cfg['work_dir']
metric = cfg['metric']
log_items = cfg.get('log_items', [])
ignore_keywords = cfg.get('ignore_keywords', [])
other_info_keys = cfg.get('other_info_keys', [])
markdown_file = cfg.get('markdown_file', None)
json_file = cfg.get('json_file', None)
if json_file and osp.split(json_file)[0] != '':
os.makedirs(osp.split(json_file)[0], exist_ok=True)
if markdown_file and osp.split(markdown_file)[0] != '':
os.makedirs(osp.split(markdown_file)[0], exist_ok=True)
assert not (log_items and ignore_keywords), \
'log_items and ignore_keywords cannot be specified at the same time'
assert metric not in other_info_keys, \
'other_info_keys should not contain metric'
if ignore_keywords and isinstance(ignore_keywords, str):
ignore_keywords = [ignore_keywords]
if other_info_keys and isinstance(other_info_keys, str):
other_info_keys = [other_info_keys]
if log_items and isinstance(log_items, str):
log_items = [log_items]
if not log_items:
log_items = [
item for item in sorted(os.listdir(work_dir))
if not has_keyword(item, ignore_keywords)
]
experiment_info_list = []
for config_dir in log_items:
preceding_path = os.path.join(work_dir, config_dir)
log_list = [
item for item in os.listdir(preceding_path)
if item.endswith('.log.json')
]
log_list = sorted(
log_list,
key=lambda time_str: datetime.datetime.strptime(
time_str, '%Y%m%d_%H%M%S.log.json'))
val_list = []
last_iter = 0
for log_name in log_list:
with open(os.path.join(preceding_path, log_name), 'r') as f:
# ignore the info line
f.readline()
all_lines = f.readlines()
val_list.extend([
json.loads(line) for line in all_lines
if json.loads(line)['mode'] == 'val'
])
for index in range(len(all_lines) - 1, -1, -1):
line_dict = json.loads(all_lines[index])
if line_dict['mode'] == 'train':
last_iter = max(last_iter, line_dict['iter'])
break
new_log_dict = dict(
method=config_dir, metric_used=metric, last_iter=last_iter)
for index, log in enumerate(val_list, 1):
new_ordered_dict = OrderedDict()
new_ordered_dict['eval_index'] = index
new_ordered_dict[metric] = log[metric]
for key in other_info_keys:
if key in log:
new_ordered_dict[key] = log[key]
val_list[index - 1] = new_ordered_dict
assert len(val_list) >= 1, \
f"work dir {config_dir} doesn't contain any evaluation."
new_log_dict['last eval'] = val_list[-1]
new_log_dict['best eval'] = max(val_list, key=lambda x: x[metric])
experiment_info_list.append(new_log_dict)
print(f'{config_dir} is processed')
if json_file:
with open(json_file, 'w') as f:
json.dump(experiment_info_list, f, indent=4)
if markdown_file:
lines_to_write = []
for index, log in enumerate(experiment_info_list, 1):
lines_to_write.append(
f"|{index}|{log['method']}|{log['best eval'][metric]}"
f"|{log['best eval']['eval_index']}|"
f"{log['last eval'][metric]}|"
f"{log['last eval']['eval_index']}|{log['last_iter']}|\n")
with open(markdown_file, 'w') as f:
f.write(f'|exp_num|method|{metric} best|best index|'
f'{metric} last|last index|last iter num|\n')
f.write('|:---:|:---:|:---:|:---:|:---:|:---:|:---:|\n')
f.writelines(lines_to_write)
print('processed successfully')
if __name__ == '__main__':
main()

View File

@ -0,0 +1,143 @@
# Log Collector
## Function
Automatically collect logs and write the result in a json file or markdown file.
If there are several `.log.json` files in one folder, Log Collector assumes that the `.log.json` files other than the first one are resume from the preceding `.log.json` file. Log Collector returns the result considering all `.log.json` files.
## Usage:
To use log collector, you need to write a config file to configure the log collector first.
For example:
example_config.py:
```python
# The work directory that contains folders that contains .log.json files.
work_dir = '../../work_dirs'
# The metric used to find the best evaluation.
metric = 'mIoU'
# **Don't specify the log_items and ignore_keywords at the same time.**
# Specify the log files we would like to collect in `log_items`.
# The folders specified should be the subdirectories of `work_dir`.
log_items = [
'segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup',
'segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr',
'segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr',
'segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr'
]
# Or specify `ignore_keywords`. The folders whose name contain one
# of the keywords in the `ignore_keywords` list(e.g., `'segformer'`)
# won't be collected.
# ignore_keywords = ['segformer']
# Other log items in .log.json that you want to collect.
# should not include metric.
other_info_keys = ["mAcc"]
# The output markdown file's name.
markdown_file ='markdowns/lr_in_trans.json.md'
# The output json file's name. (optional)
json_file = 'jsons/trans_in_cnn.json'
```
The structure of the work-dir directory should be like
```text
├── work-dir
│ ├── folder1
│ │ ├── time1.log.json
│ │ ├── time2.log.json
│ │ ├── time3.log.json
│ │ ├── time4.log.json
│ ├── folder2
│ │ ├── time5.log.json
│ │ ├── time6.log.json
│ │ ├── time7.log.json
│ │ ├── time8.log.json
```
Then , cd to the log collector folder.
Now you can run log_collector.py by using command:
```bash
python log_collector.py ./example_config.py
```
The output markdown file is like:
|exp_num|method|mIoU best|best index|mIoU last|last index|last iter num|
|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
|1|segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup|0.2776|10|0.2776|10|160000|
|2|segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr|0.2802|10|0.2802|10|160000|
|3|segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr|0.4943|11|0.4943|11|160000|
|4|segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr|0.4883|11|0.4883|11|160000|
The output json file is like:
```json
[
{
"method": "segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup",
"metric_used": "mIoU",
"last_iter": 160000,
"last eval": {
"eval_index": 10,
"mIoU": 0.2776,
"mAcc": 0.3779
},
"best eval": {
"eval_index": 10,
"mIoU": 0.2776,
"mAcc": 0.3779
}
},
{
"method": "segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr",
"metric_used": "mIoU",
"last_iter": 160000,
"last eval": {
"eval_index": 10,
"mIoU": 0.2802,
"mAcc": 0.3764
},
"best eval": {
"eval_index": 10,
"mIoU": 0.2802,
"mAcc": 0.3764
}
},
{
"method": "segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr",
"metric_used": "mIoU",
"last_iter": 160000,
"last eval": {
"eval_index": 11,
"mIoU": 0.4943,
"mAcc": 0.6097
},
"best eval": {
"eval_index": 11,
"mIoU": 0.4943,
"mAcc": 0.6097
}
},
{
"method": "segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr",
"metric_used": "mIoU",
"last_iter": 160000,
"last eval": {
"eval_index": 11,
"mIoU": 0.4883,
"mAcc": 0.6061
},
"best eval": {
"eval_index": 11,
"mIoU": 0.4883,
"mAcc": 0.6061
}
}
]
```

View File

@ -0,0 +1,20 @@
# Copyright (c) OpenMMLab. All rights reserved.
# modified from https://github.dev/open-mmlab/mmcv
import os.path as osp
import sys
from importlib import import_module
def load_config(cfg_dir: str) -> dict:
assert cfg_dir.endswith('.py')
root_path, file_name = osp.split(cfg_dir)
temp_module = osp.splitext(file_name)[0]
sys.path.insert(0, root_path)
mod = import_module(temp_module)
sys.path.pop(0)
cfg_dict = {
k: v
for k, v in mod.__dict__.items() if not k.startswith('__')
}
del sys.modules[temp_module]
return cfg_dict