Update benchmark and validate scripts to output results in JSON with a fixed delimiter for use in multi-process launcher

2025-06-03 15:01:08 +08:00 · 2022-01-24 11:13:58 -08:00 · 2022-01-24 11:13:58 -08:00 · cf4334391e
commit cf4334391e
parent 1331c145a3
2 changed files with 48 additions and 32 deletions
--- a/benchmark.py
+++ b/benchmark.py
@ -473,6 +473,7 @@ def decay_batch_exp(batch_size, factor=0.5, divisor=16):
 def _try_run(model_name, bench_fn, initial_batch_size, bench_kwargs):
    batch_size = initial_batch_size
    results = dict()
+    error_str = 'Unknown'
    while batch_size >= 1:
        torch.cuda.empty_cache()
        try:
@ -480,13 +481,13 @@ def _try_run(model_name, bench_fn, initial_batch_size, bench_kwargs):
            results = bench.run()
            return results
        except RuntimeError as e:
-            e_str = str(e)
-            print(e_str)
-            if 'channels_last' in e_str:
-                print(f'Error: {model_name} not supported in channels_last, skipping.')
+            error_str = str(e)
+            if 'channels_last' in error_str:
+                _logger.error(f'{model_name} not supported in channels_last, skipping.')
                break
-            print(f'Error: "{e_str}" while running benchmark. Reducing batch size to {batch_size} for retry.')
+            _logger.warning(f'"{error_str}" while running benchmark. Reducing batch size to {batch_size} for retry.')
        batch_size = decay_batch_exp(batch_size)
+    results['error'] = error_str
    return results


@ -528,13 +529,14 @@ def benchmark(args):
    model_results = OrderedDict(model=model)
    for prefix, bench_fn in zip(prefixes, bench_fns):
        run_results = _try_run(model, bench_fn, initial_batch_size=batch_size, bench_kwargs=bench_kwargs)
-        if prefix:
+        if prefix and 'error' not in run_results:
            run_results = {'_'.join([prefix, k]): v for k, v in run_results.items()}
        model_results.update(run_results)
-    param_count = model_results.pop('infer_param_count', model_results.pop('train_param_count', 0))
-    model_results.setdefault('param_count', param_count)
-    model_results.pop('train_param_count', 0)
-    return model_results if model_results['param_count'] else dict()
+    if 'error' not in model_results:
+        param_count = model_results.pop('infer_param_count', model_results.pop('train_param_count', 0))
+        model_results.setdefault('param_count', param_count)
+        model_results.pop('train_param_count', 0)
+    return model_results


 def main():
@ -578,13 +580,15 @@ def main():
            sort_key = 'train_samples_per_sec'
        elif 'profile' in args.bench:
            sort_key = 'infer_gmacs'
+        results = filter(lambda x: sort_key in x, results)
        results = sorted(results, key=lambda x: x[sort_key], reverse=True)
        if len(results):
            write_results(results_file, results)
    else:
        results = benchmark(args)
-    json_str = json.dumps(results, indent=4)
-    print(json_str)
+
+    # output results in JSON to stdout w/ delimiter for runner script
+    print(f'--result\n{json.dumps(results, indent=4)}')


 def write_results(results_file, results):
--- a/validate.py
+++ b/validate.py
@ -11,6 +11,7 @@ import argparse
 import os
 import csv
 import glob
+import json
 import time
 import logging
 import torch
@ -263,6 +264,7 @@ def validate(args):
    else:
        top1a, top5a = top1.avg, top5.avg
    results = OrderedDict(
+        model=args.model,
        top1=round(top1a, 4), top1_err=round(100 - top1a, 4),
        top5=round(top5a, 4), top5_err=round(100 - top5a, 4),
        param_count=round(param_count / 1e6, 2),
@ -276,6 +278,27 @@ def validate(args):
    return results


+def _try_run(args, initial_batch_size):
+    batch_size = initial_batch_size
+    results = OrderedDict()
+    error_str = 'Unknown'
+    while batch_size >= 1:
+        args.batch_size = batch_size
+        torch.cuda.empty_cache()
+        try:
+            results = validate(args)
+            return results
+        except RuntimeError as e:
+            error_str = str(e)
+            if 'channels_last' in error_str:
+                break
+            _logger.warning(f'"{error_str}" while running validation. Reducing batch size to {batch_size} for retry.')
+        batch_size = batch_size // 2
+    results['error'] = error_str
+    _logger.error(f'{args.model} failed to validate ({error_str}).')
+    return results
+
+
 def main():
    setup_default_logging()
    args = parser.parse_args()
@ -308,36 +331,25 @@ def main():
        _logger.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names)))
        results = []
        try:
-            start_batch_size = args.batch_size
+            initial_batch_size = args.batch_size
            for m, c in model_cfgs:
-                batch_size = start_batch_size
                args.model = m
                args.checkpoint = c
-                result = OrderedDict(model=args.model)
-                r = {}
-                while not r and batch_size >= args.num_gpu:
-                    torch.cuda.empty_cache()
-                    try:
-                        args.batch_size = batch_size
-                        print('Validating with batch size: %d' % args.batch_size)
-                        r = validate(args)
-                    except RuntimeError as e:
-                        if batch_size <= args.num_gpu:
-                            print("Validation failed with no ability to reduce batch size. Exiting.")
-                            raise e
-                        batch_size = max(batch_size // 2, args.num_gpu)
-                        print("Validation failed, reducing batch size by 50%")
-                result.update(r)
+                r = _try_run(args, initial_batch_size)
+                if 'error' in r:
+                    continue
                if args.checkpoint:
-                    result['checkpoint'] = args.checkpoint
-                results.append(result)
+                    r['checkpoint'] = args.checkpoint
+                results.append(r)
        except KeyboardInterrupt as e:
            pass
        results = sorted(results, key=lambda x: x['top1'], reverse=True)
        if len(results):
            write_results(results_file, results)
    else:
-        validate(args)
+        results = validate(args)
+    # output results in JSON to stdout w/ delimiter for runner script
+    print(f'--result\n{json.dumps(results, indent=4)}')


 def write_results(results_file, results):