Added functionality to use multiple loggers simultaneously and addded the english documentation on how to use them

2022-04-15 06:44:22 +00:00 · 2022-04-15 06:44:22 +00:00 · 56fccb03c8
parent dde60d69be
commit 56fccb03c8
8 changed files with 99 additions and 16 deletions
--- a/doc/doc_en/logging_en.md
+++ b/doc/doc_en/logging_en.md
@ -0,0 +1,54 @@
+## Logging metrics and models 
+
+PaddleOCR comes with two metric logging tools integrated directly into the training API: [VisualDL](https://readthedocs.org/projects/visualdl/) and [Weights & Biases](https://docs.wandb.ai/). 
+
+### VisualDL
+VisualDL is a visualization analysis tool of PaddlePaddle. The integration allows all training metrics to be logged to a VisualDL dashboard. To use it, add the following line to the `Global` section of the config yaml file -
+
+```
+Global:
+    use_visualdl: True
+```
+
+To see the visualizations run the following command in your terminal
+
+```shell
+visualdl --logdir <save_model_dir>
+```
+
+Now open `localhost:8040` in your browser of choice!
+
+### Weights & Biases
+W&B is a MLOps tool that can be used for experiment tracking, dataset/model versioning, visualizing results and collaborating with colleagues. A W&B logger is integrated directly into PaddleOCR and to use it, first you need to install the `wandb` sdk and login to your wandb account.
+
+```shell
+pip install wandb
+wandb login
+```
+
+If you do not have a wandb account, you can make one [here](https://wandb.ai/site).
+
+To visualize and track your model training add the following flag to your config yaml file under the `Global` section -
+
+```
+Global:
+    use_wandb: True
+```
+
+To add more arguments to the `WandbLogger` listed [here](./config_en.md) add the header `wandb` to the yaml file and add the arguments under it - 
+
+```
+wandb:
+    project: my_project
+    entity: my_team
+```
+
+This will automatically log all the training and evaluation metrics to the W&B dashboard along with models at every model saving step and evaluation step are with the appropriate tags and metadata.
+![W&B Dashboard](../imgs_en/wandb_metrics.png)
+
+![W&B Models](../imgs_en/wandb_models.png)
+
+To view the dashboard, the link to the dashboard is printed to the console at the beginning and end of every training job and you can also access it by logging into your W&B account on your browser.
+
+### Using Multiple Loggers
+Both VisualDL and W&B can also be used simultaneously by just setting both the aforementioned flags to True.
--- a/doc/imgs_en/wandb_metrics.png
+++ b/doc/imgs_en/wandb_metrics.png
--- a/doc/imgs_en/wandb_models.png
+++ b/doc/imgs_en/wandb_models.png
--- a/ppocr/utils/loggers/init.py
+++ b/ppocr/utils/loggers/init.py
@ -1,2 +1,3 @@
 from .vdl_logger import VDLLogger
-from .wandb_logger import WandbLogger
+from .wandb_logger import WandbLogger
+from .loggers import Loggers
--- a/ppocr/utils/loggers/loggers.py
+++ b/ppocr/utils/loggers/loggers.py
@ -0,0 +1,18 @@
+from .wandb_logger import WandbLogger
+
+class Loggers(object):
+    def __init__(self, loggers):
+        super().__init__()
+        self.loggers = loggers
+
+    def log_metrics(self, metrics, prefix=None, step=None):
+        for logger in self.loggers:
+            logger.log_metrics(metrics, prefix=prefix, step=step)
+    
+    def log_model(self, is_best, prefix, metadata=None):
+        for logger in self.loggers:
+            logger.log_model(is_best=is_best, prefix=prefix, metadata=metadata)
+    
+    def close(self):
+        for logger in self.loggers:
+            logger.close()
--- a/ppocr/utils/loggers/vdl_logger.py
+++ b/ppocr/utils/loggers/vdl_logger.py
@ -13,6 +13,9 @@ class VDLLogger(BaseLogger):

        for k, v in updated_metrics.items():
            self.vdl_writer.add_scalar(k, v, step)
-
+    
+    def log_model(self, is_best, prefix, metadata=None):
+        pass
+    
    def close(self):
        self.vdl_writer.close() 
--- a/ppocr/utils/loggers/wandb_logger.py
+++ b/ppocr/utils/loggers/wandb_logger.py
@ -59,8 +59,8 @@ class WandbLogger(BaseLogger):
    def log_metrics(self, metrics, prefix=None, step=None):
        if not prefix:
            prefix = ""
-        updated_metrics = {prefix + "/" + k: v for k, v in metrics.items()}
-
+        updated_metrics = {prefix.lower() + "/" + k: v for k, v in metrics.items()}
+        
        self.run.log(updated_metrics, step=step)

    def log_model(self, is_best, prefix, metadata=None):
--- a/tools/program.py
+++ b/tools/program.py
@ -31,7 +31,7 @@ from ppocr.utils.stats import TrainingStats
 from ppocr.utils.save_load import save_model
 from ppocr.utils.utility import print_dict, AverageMeter
 from ppocr.utils.logging import get_logger
-from ppocr.utils.loggers import VDLLogger, WandbLogger
+from ppocr.utils.loggers import VDLLogger, WandbLogger, Loggers
 from ppocr.utils import profiler
 from ppocr.data import build_dataloader

@ -362,10 +362,9 @@ def train(config,
                if log_writer is not None:
                    log_writer.log_metrics(metrics={
                        "best_{}".format(main_indicator): best_model_dict[main_indicator]
-                    }, prefix="EVAL", step=global_step)
-
-                    if isinstance(log_writer, WandbLogger):
-                        log_writer.log_model(is_best=True, prefix="best_accuracy", metadata=best_model_dict)
+                        }, prefix="EVAL", step=global_step)
+                    
+                    log_writer.log_model(is_best=True, prefix="best_accuracy", metadata=best_model_dict)

            reader_start = time.time()
        if dist.get_rank() == 0:
@ -381,8 +380,7 @@ def train(config,
                epoch=epoch,
                global_step=global_step)

-            if isinstance(log_writer, WandbLogger):
-                log_writer.log_model(is_best=False, prefix="latest")
+            log_writer.log_model(is_best=False, prefix="latest")

        if dist.get_rank() == 0 and epoch > 0 and epoch % save_epoch_step == 0:
            save_model(
@ -396,9 +394,8 @@ def train(config,
                best_model_dict=best_model_dict,
                epoch=epoch,
                global_step=global_step)
-            
-            if isinstance(log_writer, WandbLogger):
-                log_writer.log_model(is_best=False, prefix='iter_epoch_{}'.format(epoch))
+
+            log_writer.log_model(is_best=False, prefix='iter_epoch_{}'.format(epoch))

    best_str = 'best metric, {}'.format(', '.join(
        ['{}: {}'.format(k, v) for k, v in best_model_dict.items()]))
@ -561,11 +558,14 @@ def preprocess(is_train=False):

    config['Global']['distributed'] = dist.get_world_size() != 1

-    if "use_visualdl" in config['Global'] and config['Global']['use_visualdl'] and dist.get_rank() == 0:
+    loggers = []
+
+    if config['Global']['use_visualdl']:
        save_model_dir = config['Global']['save_model_dir']
        vdl_writer_path = '{}/vdl/'.format(save_model_dir)
        log_writer = VDLLogger(save_model_dir)
-    elif ("use_wandb" in config['Global'] and config['Global']['use_wandb']) or "wandb" in config:
+        loggers.append(log_writer)
+    if config['Global']['use_wandb'] or 'wandb' in config:
        save_dir = config['Global']['save_model_dir']
        wandb_writer_path = "{}/wandb".format(save_dir)
        if "wandb" in config:
@ -574,9 +574,16 @@ def preprocess(is_train=False):
            wandb_params = dict()
        wandb_params.update({'save_dir': save_model_dir})
        log_writer = WandbLogger(**wandb_params, config=config)
+        loggers.append(log_writer)
    else:
        log_writer = None
    print_dict(config, logger)
+
+    if loggers:
+        log_writer = Loggers(loggers)
+    else:
+        log_writer = None
+
    logger.info('train with paddle {} and device {}'.format(paddle.__version__,
                                                            device))
    return config, device, logger, log_writer