Update profiler (#4236)
parent
8d3c3ef45c
commit
9468657502
|
@ -22,6 +22,8 @@ try:
|
|||
import thop # for FLOPs computation
|
||||
except ImportError:
|
||||
thop = None
|
||||
|
||||
logging.basicConfig(format="%(message)s", level=logging.INFO)
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -103,11 +105,10 @@ def profile(x, ops, n=100, device=None):
|
|||
# m2 = nn.SiLU()
|
||||
# profile(x, [m1, m2], n=100) # profile speed over 100 iterations
|
||||
|
||||
device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
||||
device = device or select_device()
|
||||
x = x.to(device)
|
||||
x.requires_grad = True
|
||||
print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '')
|
||||
print(f"\n{'Params':>12s}{'GFLOPs':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}")
|
||||
print(f"{'Params':>12s}{'GFLOPs':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}")
|
||||
for m in ops if isinstance(ops, list) else [ops]:
|
||||
m = m.to(device) if hasattr(m, 'to') else m # device
|
||||
m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m # type
|
||||
|
|
Loading…
Reference in New Issue