[Feature] Add NPUProfilerHook to profile performance in Ascend device (#925)

* Feature NPUProfilerHook

* Feature NPUProfilerHook
pull/946/head
luomaoling 2023-02-21 17:20:40 +08:00 committed by GitHub
parent e16dacf7e3
commit 3dc2be05d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 179 additions and 3 deletions

View File

@ -23,4 +23,5 @@ mmengine.hooks
SyncBuffersHook
EmptyCacheHook
ProfilerHook
NPUProfilerHook
PrepareTTAHook

View File

@ -23,4 +23,5 @@ mmengine.hooks
SyncBuffersHook
EmptyCacheHook
ProfilerHook
NPUProfilerHook
PrepareTTAHook

View File

@ -7,7 +7,7 @@ from .iter_timer_hook import IterTimerHook
from .logger_hook import LoggerHook
from .naive_visualization_hook import NaiveVisualizationHook
from .param_scheduler_hook import ParamSchedulerHook
from .profiler_hook import ProfilerHook
from .profiler_hook import NPUProfilerHook, ProfilerHook
from .runtime_info_hook import RuntimeInfoHook
from .sampler_seed_hook import DistSamplerSeedHook
from .sync_buffer_hook import SyncBuffersHook
@ -17,5 +17,5 @@ __all__ = [
'Hook', 'IterTimerHook', 'DistSamplerSeedHook', 'ParamSchedulerHook',
'SyncBuffersHook', 'EmptyCacheHook', 'CheckpointHook', 'LoggerHook',
'NaiveVisualizationHook', 'EMAHook', 'RuntimeInfoHook', 'ProfilerHook',
'PrepareTTAHook'
'NPUProfilerHook', 'PrepareTTAHook'
]

View File

@ -1,5 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import sys
import warnings
from typing import Callable, Optional, Union
@ -230,3 +232,103 @@ class ProfilerHook(Hook):
self.profiler.__exit__(None, None, None)
if self.json_trace_path is not None:
self.profiler.export_chrome_trace(self.json_trace_path)
@HOOKS.register_module()
class NPUProfilerHook(Hook):
"""NPUProfiler to analyze performance during training.
NPU Profiling is used to count the device execution time of all operators.
The torch_npu.npu.profile interface is used to complete the profiling data
collection at each stage of the project, and the data is analyzed by the
msprof tool and the data can be dumped to further manually analyze the
key performance bottlenecks. For more details on the torch_npu.npu.profile
interface, please visit
https://gitee.com/ascend/pytorch/blob/master/torch_npu/npu/profiler.py#profile
Args:
begin (int): Number of start iterations for profiling. Defaults to 0.
end (int): Number of end iterations for profiling. Defaults to 1.
result_path (str): The path to save the profiling results file.
Defaults to 'cann_profiling'.
exit_after_profiling (bool): Whether to exit the program after
profiling. Defaults to True.
use_e2e_profiler (bool): Turn on E2E profiling, E2E profiling combines
performance data at the Pytorch level and the NPU level to analyze
the bottlenecks of model performance end-to-end, and cannot show
detailed content, and only as an auxiliary analysis.
Defaults to False.
ge_profiling_to_std_out (bool): Turn on GE profiling, GE uses to
collect the profiling data of the host side scheduling of the
Assend device. Defaults to False.
Examples:
>>> cfg = ...
>>> profiler_config = dict(type='NPUProfilerHook', end=2)
>>> cfg.merge_from_dict({'custom_hooks': custom_hooks})
>>> runner = Runner.from_cfg(cfg)
>>> runner.train()
"""
priority = 'VERY_LOW'
def __init__(self,
*,
begin: int = 0,
end: int = 1,
result_path: str = 'cann_profiling',
exit_after_profiling: bool = True,
use_e2e_profiler: bool = False,
ge_profiling_to_std_out: bool = False):
try:
import torch_npu
except ImportError:
raise ImportError('Failed to import torch_npu module')
if begin >= end:
raise ValueError(
'The iteration to start profiling should not be greater'
'than or equal to profile end')
self.begin = begin
self.end = end
self.result_path = result_path
self.exit_after_profiling = exit_after_profiling
if ge_profiling_to_std_out:
os.environ['GE_PROFILING_TO_STD_OUT'] = '1'
if not osp.exists(self.result_path):
os.makedirs(self.result_path, exist_ok=True)
self.profiler = torch_npu.npu.profile(
self.result_path, use_e2e_profiler=use_e2e_profiler)
@master_only
def before_run(self, runner):
if self.end > runner.max_iters:
raise ValueError(
'The profiling end iteration should not be greater'
'than the max iteration')
@master_only
def before_train_iter(self, runner, batch_idx, data_batch=None):
if runner.iter == self.begin:
self.profiler.__enter__()
runner.logger.info('NPUProfiler starts profiling...')
@master_only
def after_train_iter(self,
runner,
batch_idx,
data_batch=None,
outputs=None):
if runner.iter == self.end - 1:
runner.logger.info('profiler may take a few minutes to'
' save the profiling result.')
self.profiler.__exit__(None, None, None)
if self.exit_after_profiling:
sys.exit()

View File

@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as ops
import unittest
from unittest.mock import MagicMock
@ -7,7 +8,8 @@ from unittest.mock import MagicMock
import torch
import mmengine.hooks
from mmengine.hooks import ProfilerHook
from mmengine.device import is_npu_available
from mmengine.hooks import NPUProfilerHook, ProfilerHook
from mmengine.logging import MMLogger
from mmengine.testing import RunnerTestCase
from mmengine.utils import is_installed
@ -202,3 +204,73 @@ class TestProfilerHook(RunnerTestCase):
]
runner = self.build_runner(self.epoch_based_cfg)
runner.train()
@unittest.skipIf(
not is_npu_available(), reason='Ascend PyTorch and npu devices not exist')
class TestNPUProfilerHook(RunnerTestCase):
def test_init(self):
result_path = ops.join(self.temp_dir.name, 'test/cann_profiling')
NPUProfilerHook(result_path=result_path)
with self.assertRaises(ValueError):
NPUProfilerHook(begin=1, end=0, result_path=result_path)
def test_before_run(self):
result_path = ops.join(self.temp_dir.name, 'test/cann_profiling')
runner = MagicMock()
runner.max_iters = 1
runner.logger = MMLogger.get_instance('test_npu_profiler')
hook = NPUProfilerHook(result_path=result_path)
hook.before_run(runner)
with self.assertRaises(ValueError):
hook = NPUProfilerHook(begin=0, end=10, result_path=result_path)
hook.before_run(runner)
def test_after_train_iter(self):
result_path = ops.join(self.temp_dir.name, 'test/cann_profiling')
runner = MagicMock()
runner.max_iters = 10000
runner.logger = MMLogger.get_instance('test_npu_profiler')
runner.iter = 0
hook = NPUProfilerHook(begin=0, end=10, result_path=result_path)
hook.before_run(runner)
hook.profiler = MagicMock()
hook.after_train_iter(runner, 1)
def test_with_runner(self):
result_path = ops.join(self.temp_dir.name, 'test/cann_profiling')
self.epoch_based_cfg['custom_hooks'] = [
dict(
type='NPUProfilerHook',
begin=0,
result_path=result_path,
exit_after_profiling=False)
]
runner = self.build_runner(self.epoch_based_cfg)
runner.train()
self.epoch_based_cfg['custom_hooks'] = [
dict(
type='NPUProfilerHook',
result_path=result_path,
ge_profiling_to_std_out=True,
exit_after_profiling=False)
]
runner = self.build_runner(self.epoch_based_cfg)
runner.train()
self.assertTrue(
ops.exists(result_path), 'profiler result path is not generated!')
self.assertTrue(
os.getenv('GE_PROFILING_TO_STD_OUT', '0') == '1',
'GE PROFILING failed to start!')