Unify the check point of single and multi GPU

save the model.hyp etc to checkpoint when use multi GPU training
2020-06-30 19:06:28 +08:00 · 2020-06-30 19:06:28 +08:00 · cdb9bde181
parent bfd51f62f8
commit cdb9bde181
1 changed files with 10 additions and 1 deletions
--- a/train.py
+++ b/train.py
@ -79,7 +79,7 @@ def train(hyp):
    # Create model
    model = Model(opt.cfg).to(device)
    assert model.md['nc'] == nc, '%s nc=%g classes but %s nc=%g classes' % (opt.data, nc, opt.cfg, model.md['nc'])
-    model.names = data_dict['names']
+   

    # Image sizes
    gs = int(max(model.stride))  # grid size (max stride)
@ -172,6 +172,7 @@ def train(hyp):
    model.hyp = hyp  # attach hyperparameters to model
    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
+    model.names = data_dict['names']

    # Class frequency
    labels = np.concatenate(dataset.labels, 0)
@ -314,6 +315,14 @@ def train(hyp):
        # Save model
        save = (not opt.nosave) or (final_epoch and not opt.evolve)
        if save:
+            if hasattr(model, 'module'):
+                # Duplicate Model parameters for Multi-GPU save
+                ema.ema.module.nc = model.nc  # attach number of classes to model
+                ema.ema.module.hyp = model.hyp  # attach hyperparameters to model
+                ema.ema.module.gr = model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
+                ema.ema.module.class_weights = model.class_weights # attach class weights
+                ema.ema.module.names = data_dict['names']
+                
            with open(results_file, 'r') as f:  # create checkpoint
                ckpt = {'epoch': epoch,
                        'best_fitness': best_fitness,