[Feature] add log collector (#1175)

* [Feature] add log collector * Update .dev/log_collector/readme.md Co-authored-by: Miao Zheng <76149310+MeowZheng@users.noreply.github.com> * Update .dev/log_collector/example_config.py Co-authored-by: Miao Zheng <76149310+MeowZheng@users.noreply.github.com> * fix typo and so on * modify readme * fix some bugs and revise the readme.md * more elegant * Update .dev/log_collector/readme.md Co-authored-by: Junjun2016 <hejunjun@sjtu.edu.cn> Co-authored-by: Miao Zheng <76149310+MeowZheng@users.noreply.github.com> Co-authored-by: Junjun2016 <hejunjun@sjtu.edu.cn>
2025-06-03 22:03:48 +08:00 · 2022-01-14 15:19:23 +08:00 · 2022-01-14 15:19:23 +08:00 · ee5fbcff74
commit ee5fbcff74
parent 2f4d52f4d1
4 changed files with 324 additions and 0 deletions
--- a/.dev/log_collector/example_config.py
+++ b/.dev/log_collector/example_config.py
@ -0,0 +1,18 @@
 work_dir = '../../work_dirs'
 metric = 'mIoU'
 # specify the log files we would like to collect in `log_items`
 log_items = [
    'segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup',
    'segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr',
    'segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr',
    'segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr'
 ]
 # or specify ignore_keywords, then the folders whose name contain
 # `'segformer'` won't be collected
 # ignore_keywords = ['segformer']
 # should not include metric
 other_info_keys = ['mAcc']
 markdown_file = 'markdowns/lr_in_trans.json.md'
 json_file = 'jsons/trans_in_cnn.json'
--- a/.dev/log_collector/log_collector.py
+++ b/.dev/log_collector/log_collector.py
@ -0,0 +1,143 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import argparse
 import datetime
 import json
 import os
 import os.path as osp
 from collections import OrderedDict
 from utils import load_config
 # automatically collect all the results
 # The structure of the directory:
 #     ├── work-dir
 #     │   ├── config_1
 #     │   │   ├── time1.log.json
 #     │   │   ├── time2.log.json
 #     │   │   ├── time3.log.json
 #     │   │   ├── time4.log.json
 #     │   ├── config_2
 #     │   │   ├── time5.log.json
 #     │   │   ├── time6.log.json
 #     │   │   ├── time7.log.json
 #     │   │   ├── time8.log.json
 def parse_args():
    parser = argparse.ArgumentParser(description='extract info from log.json')
    parser.add_argument('config_dir')
    args = parser.parse_args()
    return args
 def has_keyword(name: str, keywords: list):
    for a_keyword in keywords:
        if a_keyword in name:
            return True
    return False
 def main():
    args = parse_args()
    cfg = load_config(args.config_dir)
    work_dir = cfg['work_dir']
    metric = cfg['metric']
    log_items = cfg.get('log_items', [])
    ignore_keywords = cfg.get('ignore_keywords', [])
    other_info_keys = cfg.get('other_info_keys', [])
    markdown_file = cfg.get('markdown_file', None)
    json_file = cfg.get('json_file', None)
    if json_file and osp.split(json_file)[0] != '':
        os.makedirs(osp.split(json_file)[0], exist_ok=True)
    if markdown_file and osp.split(markdown_file)[0] != '':
        os.makedirs(osp.split(markdown_file)[0], exist_ok=True)
    assert not (log_items and ignore_keywords), \
        'log_items and ignore_keywords cannot be specified at the same time'
    assert metric not in other_info_keys, \
        'other_info_keys should not contain metric'
    if ignore_keywords and isinstance(ignore_keywords, str):
        ignore_keywords = [ignore_keywords]
    if other_info_keys and isinstance(other_info_keys, str):
        other_info_keys = [other_info_keys]
    if log_items and isinstance(log_items, str):
        log_items = [log_items]
    if not log_items:
        log_items = [
            item for item in sorted(os.listdir(work_dir))
            if not has_keyword(item, ignore_keywords)
        ]
    experiment_info_list = []
    for config_dir in log_items:
        preceding_path = os.path.join(work_dir, config_dir)
        log_list = [
            item for item in os.listdir(preceding_path)
            if item.endswith('.log.json')
        ]
        log_list = sorted(
            log_list,
            key=lambda time_str: datetime.datetime.strptime(
                time_str, '%Y%m%d_%H%M%S.log.json'))
        val_list = []
        last_iter = 0
        for log_name in log_list:
            with open(os.path.join(preceding_path, log_name), 'r') as f:
                # ignore the info line
                f.readline()
                all_lines = f.readlines()
                val_list.extend([
                    json.loads(line) for line in all_lines
                    if json.loads(line)['mode'] == 'val'
                ])
                for index in range(len(all_lines) - 1, -1, -1):
                    line_dict = json.loads(all_lines[index])
                    if line_dict['mode'] == 'train':
                        last_iter = max(last_iter, line_dict['iter'])
                        break
        new_log_dict = dict(
            method=config_dir, metric_used=metric, last_iter=last_iter)
        for index, log in enumerate(val_list, 1):
            new_ordered_dict = OrderedDict()
            new_ordered_dict['eval_index'] = index
            new_ordered_dict[metric] = log[metric]
            for key in other_info_keys:
                if key in log:
                    new_ordered_dict[key] = log[key]
            val_list[index - 1] = new_ordered_dict
        assert len(val_list) >= 1, \
            f"work dir {config_dir} doesn't contain any evaluation."
        new_log_dict['last eval'] = val_list[-1]
        new_log_dict['best eval'] = max(val_list, key=lambda x: x[metric])
        experiment_info_list.append(new_log_dict)
        print(f'{config_dir} is processed')
    if json_file:
        with open(json_file, 'w') as f:
            json.dump(experiment_info_list, f, indent=4)
    if markdown_file:
        lines_to_write = []
        for index, log in enumerate(experiment_info_list, 1):
            lines_to_write.append(
                f"|{index}|{log['method']}|{log['best eval'][metric]}"
                f"|{log['best eval']['eval_index']}|"
                f"{log['last eval'][metric]}|"
                f"{log['last eval']['eval_index']}|{log['last_iter']}|\n")
        with open(markdown_file, 'w') as f:
            f.write(f'|exp_num|method|{metric} best|best index|'
                    f'{metric} last|last index|last iter num|\n')
            f.write('|:---:|:---:|:---:|:---:|:---:|:---:|:---:|\n')
            f.writelines(lines_to_write)
    print('processed successfully')
 if __name__ == '__main__':
    main()
--- a/.dev/log_collector/readme.md
+++ b/.dev/log_collector/readme.md
@ -0,0 +1,143 @@
 # Log Collector
 ## Function
 Automatically collect logs and write the result in a json file or markdown file.
 If there are several `.log.json` files in one folder, Log Collector assumes that the `.log.json` files other than the first one are resume from the preceding `.log.json` file. Log Collector returns the result considering all `.log.json` files.
 ## Usage:
 To use log collector, you need to write a config file to configure the log collector first.
 For example:
 example_config.py:
 ```python
 # The work directory that contains folders that contains .log.json files.
 work_dir = '../../work_dirs'
 # The metric used to find the best evaluation.
 metric = 'mIoU'
 # **Don't specify the log_items and ignore_keywords at the same time.**
 # Specify the log files we would like to collect in `log_items`.
 # The folders specified should be the subdirectories of `work_dir`.
 log_items = [
    'segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup',
    'segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr',
    'segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr',
    'segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr'
 ]
 # Or specify `ignore_keywords`. The folders whose name contain one
 # of the keywords in the `ignore_keywords` list(e.g., `'segformer'`)
 # won't be collected.
 # ignore_keywords = ['segformer']
 # Other log items in .log.json that you want to collect.
 # should not include metric.
 other_info_keys = ["mAcc"]
 # The output markdown file's name.
 markdown_file ='markdowns/lr_in_trans.json.md'
 # The output json file's name. (optional)
 json_file = 'jsons/trans_in_cnn.json'
 ```
 The structure of the work-dir directory should be like：
 ```text
 ├── work-dir
 │   ├── folder1
 │   │   ├── time1.log.json
 │   │   ├── time2.log.json
 │   │   ├── time3.log.json
 │   │   ├── time4.log.json
 │   ├── folder2
 │   │   ├── time5.log.json
 │   │   ├── time6.log.json
 │   │   ├── time7.log.json
 │   │   ├── time8.log.json
 ```
 Then , cd to the log collector folder.
 Now you can run log_collector.py by using command:
 ```bash
 python log_collector.py ./example_config.py
 ```
 The output markdown file is like:
 |exp_num|method|mIoU best|best index|mIoU last|last index|last iter num|
 |:---:|:---:|:---:|:---:|:---:|:---:|:---:|
 |1|segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup|0.2776|10|0.2776|10|160000|
 |2|segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr|0.2802|10|0.2802|10|160000|
 |3|segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr|0.4943|11|0.4943|11|160000|
 |4|segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr|0.4883|11|0.4883|11|160000|
 The output json file is like:
 ```json
 [
    {
        "method": "segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup",
        "metric_used": "mIoU",
        "last_iter": 160000,
        "last eval": {
            "eval_index": 10,
            "mIoU": 0.2776,
            "mAcc": 0.3779
        },
        "best eval": {
            "eval_index": 10,
            "mIoU": 0.2776,
            "mAcc": 0.3779
        }
    },
    {
        "method": "segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr",
        "metric_used": "mIoU",
        "last_iter": 160000,
        "last eval": {
            "eval_index": 10,
            "mIoU": 0.2802,
            "mAcc": 0.3764
        },
        "best eval": {
            "eval_index": 10,
            "mIoU": 0.2802,
            "mAcc": 0.3764
        }
    },
    {
        "method": "segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr",
        "metric_used": "mIoU",
        "last_iter": 160000,
        "last eval": {
            "eval_index": 11,
            "mIoU": 0.4943,
            "mAcc": 0.6097
        },
        "best eval": {
            "eval_index": 11,
            "mIoU": 0.4943,
            "mAcc": 0.6097
        }
    },
    {
        "method": "segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr",
        "metric_used": "mIoU",
        "last_iter": 160000,
        "last eval": {
            "eval_index": 11,
            "mIoU": 0.4883,
            "mAcc": 0.6061
        },
        "best eval": {
            "eval_index": 11,
            "mIoU": 0.4883,
            "mAcc": 0.6061
        }
    }
 ]
 ```
--- a/.dev/log_collector/utils.py
+++ b/.dev/log_collector/utils.py
@ -0,0 +1,20 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 # modified from https://github.dev/open-mmlab/mmcv
 import os.path as osp
 import sys
 from importlib import import_module
 def load_config(cfg_dir: str) -> dict:
    assert cfg_dir.endswith('.py')
    root_path, file_name = osp.split(cfg_dir)
    temp_module = osp.splitext(file_name)[0]
    sys.path.insert(0, root_path)
    mod = import_module(temp_module)
    sys.path.pop(0)
    cfg_dict = {
        k: v
        for k, v in mod.__dict__.items() if not k.startswith('__')
    }
    del sys.modules[temp_module]
    return cfg_dict