[Feature] Add NPUProfilerHook to profile performance in Ascend device (#925)
* Feature NPUProfilerHook * Feature NPUProfilerHookpull/946/head
parent
e16dacf7e3
commit
3dc2be05d5
|
@ -23,4 +23,5 @@ mmengine.hooks
|
|||
SyncBuffersHook
|
||||
EmptyCacheHook
|
||||
ProfilerHook
|
||||
NPUProfilerHook
|
||||
PrepareTTAHook
|
||||
|
|
|
@ -23,4 +23,5 @@ mmengine.hooks
|
|||
SyncBuffersHook
|
||||
EmptyCacheHook
|
||||
ProfilerHook
|
||||
NPUProfilerHook
|
||||
PrepareTTAHook
|
||||
|
|
|
@ -7,7 +7,7 @@ from .iter_timer_hook import IterTimerHook
|
|||
from .logger_hook import LoggerHook
|
||||
from .naive_visualization_hook import NaiveVisualizationHook
|
||||
from .param_scheduler_hook import ParamSchedulerHook
|
||||
from .profiler_hook import ProfilerHook
|
||||
from .profiler_hook import NPUProfilerHook, ProfilerHook
|
||||
from .runtime_info_hook import RuntimeInfoHook
|
||||
from .sampler_seed_hook import DistSamplerSeedHook
|
||||
from .sync_buffer_hook import SyncBuffersHook
|
||||
|
@ -17,5 +17,5 @@ __all__ = [
|
|||
'Hook', 'IterTimerHook', 'DistSamplerSeedHook', 'ParamSchedulerHook',
|
||||
'SyncBuffersHook', 'EmptyCacheHook', 'CheckpointHook', 'LoggerHook',
|
||||
'NaiveVisualizationHook', 'EMAHook', 'RuntimeInfoHook', 'ProfilerHook',
|
||||
'PrepareTTAHook'
|
||||
'NPUProfilerHook', 'PrepareTTAHook'
|
||||
]
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os
|
||||
import os.path as osp
|
||||
import sys
|
||||
import warnings
|
||||
from typing import Callable, Optional, Union
|
||||
|
||||
|
@ -230,3 +232,103 @@ class ProfilerHook(Hook):
|
|||
self.profiler.__exit__(None, None, None)
|
||||
if self.json_trace_path is not None:
|
||||
self.profiler.export_chrome_trace(self.json_trace_path)
|
||||
|
||||
|
||||
@HOOKS.register_module()
|
||||
class NPUProfilerHook(Hook):
|
||||
"""NPUProfiler to analyze performance during training.
|
||||
|
||||
NPU Profiling is used to count the device execution time of all operators.
|
||||
The torch_npu.npu.profile interface is used to complete the profiling data
|
||||
collection at each stage of the project, and the data is analyzed by the
|
||||
msprof tool and the data can be dumped to further manually analyze the
|
||||
key performance bottlenecks. For more details on the torch_npu.npu.profile
|
||||
interface, please visit
|
||||
https://gitee.com/ascend/pytorch/blob/master/torch_npu/npu/profiler.py#profile
|
||||
|
||||
Args:
|
||||
begin (int): Number of start iterations for profiling. Defaults to 0.
|
||||
end (int): Number of end iterations for profiling. Defaults to 1.
|
||||
result_path (str): The path to save the profiling results file.
|
||||
Defaults to 'cann_profiling'.
|
||||
exit_after_profiling (bool): Whether to exit the program after
|
||||
profiling. Defaults to True.
|
||||
use_e2e_profiler (bool): Turn on E2E profiling, E2E profiling combines
|
||||
performance data at the Pytorch level and the NPU level to analyze
|
||||
the bottlenecks of model performance end-to-end, and cannot show
|
||||
detailed content, and only as an auxiliary analysis.
|
||||
Defaults to False.
|
||||
ge_profiling_to_std_out (bool): Turn on GE profiling, GE uses to
|
||||
collect the profiling data of the host side scheduling of the
|
||||
Assend device. Defaults to False.
|
||||
|
||||
Examples:
|
||||
>>> cfg = ...
|
||||
>>> profiler_config = dict(type='NPUProfilerHook', end=2)
|
||||
>>> cfg.merge_from_dict({'custom_hooks': custom_hooks})
|
||||
>>> runner = Runner.from_cfg(cfg)
|
||||
>>> runner.train()
|
||||
"""
|
||||
priority = 'VERY_LOW'
|
||||
|
||||
def __init__(self,
|
||||
*,
|
||||
begin: int = 0,
|
||||
end: int = 1,
|
||||
result_path: str = 'cann_profiling',
|
||||
exit_after_profiling: bool = True,
|
||||
use_e2e_profiler: bool = False,
|
||||
ge_profiling_to_std_out: bool = False):
|
||||
|
||||
try:
|
||||
import torch_npu
|
||||
except ImportError:
|
||||
raise ImportError('Failed to import torch_npu module')
|
||||
|
||||
if begin >= end:
|
||||
raise ValueError(
|
||||
'The iteration to start profiling should not be greater'
|
||||
'than or equal to profile end')
|
||||
|
||||
self.begin = begin
|
||||
self.end = end
|
||||
self.result_path = result_path
|
||||
self.exit_after_profiling = exit_after_profiling
|
||||
|
||||
if ge_profiling_to_std_out:
|
||||
os.environ['GE_PROFILING_TO_STD_OUT'] = '1'
|
||||
|
||||
if not osp.exists(self.result_path):
|
||||
os.makedirs(self.result_path, exist_ok=True)
|
||||
|
||||
self.profiler = torch_npu.npu.profile(
|
||||
self.result_path, use_e2e_profiler=use_e2e_profiler)
|
||||
|
||||
@master_only
|
||||
def before_run(self, runner):
|
||||
|
||||
if self.end > runner.max_iters:
|
||||
raise ValueError(
|
||||
'The profiling end iteration should not be greater'
|
||||
'than the max iteration')
|
||||
|
||||
@master_only
|
||||
def before_train_iter(self, runner, batch_idx, data_batch=None):
|
||||
|
||||
if runner.iter == self.begin:
|
||||
self.profiler.__enter__()
|
||||
runner.logger.info('NPUProfiler starts profiling...')
|
||||
|
||||
@master_only
|
||||
def after_train_iter(self,
|
||||
runner,
|
||||
batch_idx,
|
||||
data_batch=None,
|
||||
outputs=None):
|
||||
|
||||
if runner.iter == self.end - 1:
|
||||
runner.logger.info('profiler may take a few minutes to'
|
||||
' save the profiling result.')
|
||||
self.profiler.__exit__(None, None, None)
|
||||
if self.exit_after_profiling:
|
||||
sys.exit()
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
import os
|
||||
import os.path as ops
|
||||
import unittest
|
||||
from unittest.mock import MagicMock
|
||||
|
@ -7,7 +8,8 @@ from unittest.mock import MagicMock
|
|||
import torch
|
||||
|
||||
import mmengine.hooks
|
||||
from mmengine.hooks import ProfilerHook
|
||||
from mmengine.device import is_npu_available
|
||||
from mmengine.hooks import NPUProfilerHook, ProfilerHook
|
||||
from mmengine.logging import MMLogger
|
||||
from mmengine.testing import RunnerTestCase
|
||||
from mmengine.utils import is_installed
|
||||
|
@ -202,3 +204,73 @@ class TestProfilerHook(RunnerTestCase):
|
|||
]
|
||||
runner = self.build_runner(self.epoch_based_cfg)
|
||||
runner.train()
|
||||
|
||||
|
||||
@unittest.skipIf(
|
||||
not is_npu_available(), reason='Ascend PyTorch and npu devices not exist')
|
||||
class TestNPUProfilerHook(RunnerTestCase):
|
||||
|
||||
def test_init(self):
|
||||
|
||||
result_path = ops.join(self.temp_dir.name, 'test/cann_profiling')
|
||||
|
||||
NPUProfilerHook(result_path=result_path)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
NPUProfilerHook(begin=1, end=0, result_path=result_path)
|
||||
|
||||
def test_before_run(self):
|
||||
result_path = ops.join(self.temp_dir.name, 'test/cann_profiling')
|
||||
runner = MagicMock()
|
||||
runner.max_iters = 1
|
||||
runner.logger = MMLogger.get_instance('test_npu_profiler')
|
||||
|
||||
hook = NPUProfilerHook(result_path=result_path)
|
||||
hook.before_run(runner)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
hook = NPUProfilerHook(begin=0, end=10, result_path=result_path)
|
||||
hook.before_run(runner)
|
||||
|
||||
def test_after_train_iter(self):
|
||||
result_path = ops.join(self.temp_dir.name, 'test/cann_profiling')
|
||||
runner = MagicMock()
|
||||
runner.max_iters = 10000
|
||||
runner.logger = MMLogger.get_instance('test_npu_profiler')
|
||||
|
||||
runner.iter = 0
|
||||
|
||||
hook = NPUProfilerHook(begin=0, end=10, result_path=result_path)
|
||||
hook.before_run(runner)
|
||||
|
||||
hook.profiler = MagicMock()
|
||||
hook.after_train_iter(runner, 1)
|
||||
|
||||
def test_with_runner(self):
|
||||
result_path = ops.join(self.temp_dir.name, 'test/cann_profiling')
|
||||
self.epoch_based_cfg['custom_hooks'] = [
|
||||
dict(
|
||||
type='NPUProfilerHook',
|
||||
begin=0,
|
||||
result_path=result_path,
|
||||
exit_after_profiling=False)
|
||||
]
|
||||
runner = self.build_runner(self.epoch_based_cfg)
|
||||
runner.train()
|
||||
|
||||
self.epoch_based_cfg['custom_hooks'] = [
|
||||
dict(
|
||||
type='NPUProfilerHook',
|
||||
result_path=result_path,
|
||||
ge_profiling_to_std_out=True,
|
||||
exit_after_profiling=False)
|
||||
]
|
||||
runner = self.build_runner(self.epoch_based_cfg)
|
||||
runner.train()
|
||||
|
||||
self.assertTrue(
|
||||
ops.exists(result_path), 'profiler result path is not generated!')
|
||||
|
||||
self.assertTrue(
|
||||
os.getenv('GE_PROFILING_TO_STD_OUT', '0') == '1',
|
||||
'GE PROFILING failed to start!')
|
||||
|
|
Loading…
Reference in New Issue