mirror of
https://github.com/huggingface/pytorch-image-models.git
synced 2025-06-03 15:01:08 +08:00
Modified save_checkpoint to always save last checkpoint. Fixes #98.
This commit is contained in:
parent
56e2ac3a6d
commit
d92cc4da54
@ -62,6 +62,12 @@ class CheckpointSaver:
|
|||||||
|
|
||||||
def save_checkpoint(self, model, optimizer, args, epoch, model_ema=None, metric=None, use_amp=False):
|
def save_checkpoint(self, model, optimizer, args, epoch, model_ema=None, metric=None, use_amp=False):
|
||||||
assert epoch >= 0
|
assert epoch >= 0
|
||||||
|
tmp_save_path = os.path.join(self.checkpoint_dir, 'tmp' + self.extension)
|
||||||
|
last_save_path = os.path.join(self.checkpoint_dir, 'last' + self.extension)
|
||||||
|
self._save(tmp_save_path, model, optimizer, args, epoch, model_ema, metric, use_amp)
|
||||||
|
if os.path.exists(last_save_path):
|
||||||
|
os.unlink(last_save_path) # required for Windows support.
|
||||||
|
os.rename(tmp_save_path, last_save_path)
|
||||||
worst_file = self.checkpoint_files[-1] if self.checkpoint_files else None
|
worst_file = self.checkpoint_files[-1] if self.checkpoint_files else None
|
||||||
if (len(self.checkpoint_files) < self.max_history
|
if (len(self.checkpoint_files) < self.max_history
|
||||||
or metric is None or self.cmp(metric, worst_file[1])):
|
or metric is None or self.cmp(metric, worst_file[1])):
|
||||||
@ -69,7 +75,7 @@ class CheckpointSaver:
|
|||||||
self._cleanup_checkpoints(1)
|
self._cleanup_checkpoints(1)
|
||||||
filename = '-'.join([self.save_prefix, str(epoch)]) + self.extension
|
filename = '-'.join([self.save_prefix, str(epoch)]) + self.extension
|
||||||
save_path = os.path.join(self.checkpoint_dir, filename)
|
save_path = os.path.join(self.checkpoint_dir, filename)
|
||||||
self._save(save_path, model, optimizer, args, epoch, model_ema, metric, use_amp)
|
os.link(last_save_path, save_path)
|
||||||
self.checkpoint_files.append((save_path, metric))
|
self.checkpoint_files.append((save_path, metric))
|
||||||
self.checkpoint_files = sorted(
|
self.checkpoint_files = sorted(
|
||||||
self.checkpoint_files, key=lambda x: x[1],
|
self.checkpoint_files, key=lambda x: x[1],
|
||||||
@ -83,7 +89,10 @@ class CheckpointSaver:
|
|||||||
if metric is not None and (self.best_metric is None or self.cmp(metric, self.best_metric)):
|
if metric is not None and (self.best_metric is None or self.cmp(metric, self.best_metric)):
|
||||||
self.best_epoch = epoch
|
self.best_epoch = epoch
|
||||||
self.best_metric = metric
|
self.best_metric = metric
|
||||||
shutil.copyfile(save_path, os.path.join(self.checkpoint_dir, 'model_best' + self.extension))
|
best_save_path = os.path.join(self.checkpoint_dir, 'model_best' + self.extension)
|
||||||
|
if os.path.exists(best_save_path):
|
||||||
|
os.unlink(best_save_path)
|
||||||
|
os.link(last_save_path, best_save_path)
|
||||||
|
|
||||||
return (None, None) if self.best_metric is None else (self.best_metric, self.best_epoch)
|
return (None, None) if self.best_metric is None else (self.best_metric, self.best_epoch)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user