PaddleOCR/tools/program.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import sys
import platform
import yaml
import time
import datetime
import paddle
import paddle.distributed as dist
from tqdm import tqdm
import cv2
import numpy as np
from argparse import ArgumentParser, RawDescriptionHelpFormatter

from ppocr.utils.stats import TrainingStats
from ppocr.utils.save_load import save_model
from ppocr.utils.utility import print_dict, AverageMeter
from ppocr.utils.logging import get_logger
from ppocr.utils.loggers import WandbLogger, Loggers
from ppocr.utils import profiler
from ppocr.data import build_dataloader


class ArgsParser(ArgumentParser):
    def __init__(self):
        super(ArgsParser, self).__init__(formatter_class=RawDescriptionHelpFormatter)
        self.add_argument("-c", "--config", help="configuration file to use")
        self.add_argument("-o", "--opt", nargs="+", help="set configuration options")
        self.add_argument(
            "-p",
            "--profiler_options",
            type=str,
            default=None,
            help="The option of profiler, which should be in format "
            '"key1=value1;key2=value2;key3=value3".',
        )

    def parse_args(self, argv=None):
        args = super(ArgsParser, self).parse_args(argv)
        assert args.config is not None, "Please specify --config=configure_file_path."
        args.opt = self._parse_opt(args.opt)
        return args

    def _parse_opt(self, opts):
        config = {}
        if not opts:
            return config
        for s in opts:
            s = s.strip()
            k, v = s.split("=")
            config[k] = yaml.load(v, Loader=yaml.Loader)
        return config


def load_config(file_path):
    """
    Load config from yml/yaml file.
    Args:
        file_path (str): Path of the config file to be loaded.
    Returns: global config
    """
    _, ext = os.path.splitext(file_path)
    assert ext in [".yml", ".yaml"], "only support yaml files for now"
    config = yaml.load(open(file_path, "rb"), Loader=yaml.Loader)
    return config


def merge_config(config, opts):
    """
    Merge config into global config.
    Args:
        config (dict): Config to be merged.
    Returns: global config
    """
    for key, value in opts.items():
        if "." not in key:
            if isinstance(value, dict) and key in config:
                config[key].update(value)
            else:
                config[key] = value
        else:
            sub_keys = key.split(".")
            assert sub_keys[0] in config, (
                "the sub_keys can only be one of global_config: {}, but get: "
                "{}, please check your running command".format(
                    config.keys(), sub_keys[0]
                )
            )
            cur = config[sub_keys[0]]
            for idx, sub_key in enumerate(sub_keys[1:]):
                if idx == len(sub_keys) - 2:
                    cur[sub_key] = value
                else:
                    cur = cur[sub_key]
    return config


def check_device(use_gpu, use_xpu=False, use_npu=False, use_mlu=False):
    """
    Log error and exit when set use_gpu=true in paddlepaddle
    cpu version.
    """
    err = (
        "Config {} cannot be set as true while your paddle "
        "is not compiled with {} ! \nPlease try: \n"
        "\t1. Install paddlepaddle to run model on {} \n"
        "\t2. Set {} as false in config file to run "
        "model on CPU"
    )

    try:
        if use_gpu and use_xpu:
            print("use_xpu and use_gpu can not both be true.")
        if use_gpu and not paddle.is_compiled_with_cuda():
            print(err.format("use_gpu", "cuda", "gpu", "use_gpu"))
            sys.exit(1)
        if use_xpu and not paddle.device.is_compiled_with_xpu():
            print(err.format("use_xpu", "xpu", "xpu", "use_xpu"))
            sys.exit(1)
        if use_npu:
            if (
                int(paddle.version.major) != 0
                and int(paddle.version.major) <= 2
                and int(paddle.version.minor) <= 4
            ):
                if not paddle.device.is_compiled_with_npu():
                    print(err.format("use_npu", "npu", "npu", "use_npu"))
                    sys.exit(1)
            # is_compiled_with_npu() has been updated after paddle-2.4
            else:
                if not paddle.device.is_compiled_with_custom_device("npu"):
                    print(err.format("use_npu", "npu", "npu", "use_npu"))
                    sys.exit(1)
        if use_mlu and not paddle.device.is_compiled_with_mlu():
            print(err.format("use_mlu", "mlu", "mlu", "use_mlu"))
            sys.exit(1)
    except Exception as e:
        pass


def to_float32(preds):
    if isinstance(preds, dict):
        for k in preds:
            if isinstance(preds[k], dict) or isinstance(preds[k], list):
                preds[k] = to_float32(preds[k])
            elif isinstance(preds[k], paddle.Tensor):
                preds[k] = preds[k].astype(paddle.float32)
    elif isinstance(preds, list):
        for k in range(len(preds)):
            if isinstance(preds[k], dict):
                preds[k] = to_float32(preds[k])
            elif isinstance(preds[k], list):
                preds[k] = to_float32(preds[k])
            elif isinstance(preds[k], paddle.Tensor):
                preds[k] = preds[k].astype(paddle.float32)
    elif isinstance(preds, paddle.Tensor):
        preds = preds.astype(paddle.float32)
    return preds


def train(
    config,
    train_dataloader,
    valid_dataloader,
    device,
    model,
    loss_class,
    optimizer,
    lr_scheduler,
    post_process_class,
    eval_class,
    pre_best_model_dict,
    logger,
    step_pre_epoch,
    log_writer=None,
    scaler=None,
    amp_level="O2",
    amp_custom_black_list=[],
    amp_custom_white_list=[],
    amp_dtype="float16",
):
    cal_metric_during_train = config["Global"].get("cal_metric_during_train", False)
    calc_epoch_interval = config["Global"].get("calc_epoch_interval", 1)
    log_smooth_window = config["Global"]["log_smooth_window"]
    epoch_num = config["Global"]["epoch_num"]
    print_batch_step = config["Global"]["print_batch_step"]
    eval_batch_step = config["Global"]["eval_batch_step"]
    eval_batch_epoch = config["Global"].get("eval_batch_epoch", None)
    profiler_options = config["profiler_options"]

    global_step = 0
    if "global_step" in pre_best_model_dict:
        global_step = pre_best_model_dict["global_step"]
    start_eval_step = 0
    if type(eval_batch_step) == list and len(eval_batch_step) >= 2:
        start_eval_step = eval_batch_step[0] if not eval_batch_epoch else 0
        eval_batch_step = (
            eval_batch_step[1]
            if not eval_batch_epoch
            else step_pre_epoch * eval_batch_epoch
        )
        if len(valid_dataloader) == 0:
            logger.info(
                "No Images in eval dataset, evaluation during training "
                "will be disabled"
            )
            start_eval_step = 1e111
        logger.info(
            "During the training process, after the {}th iteration, "
            "an evaluation is run every {} iterations".format(
                start_eval_step, eval_batch_step
            )
        )
    save_epoch_step = config["Global"]["save_epoch_step"]
    save_model_dir = config["Global"]["save_model_dir"]
    if not os.path.exists(save_model_dir):
        os.makedirs(save_model_dir)
    main_indicator = eval_class.main_indicator
    best_model_dict = {main_indicator: 0}
    best_model_dict.update(pre_best_model_dict)
    train_stats = TrainingStats(log_smooth_window, ["lr"])
    model_average = False
    model.train()

    use_srn = config["Architecture"]["algorithm"] == "SRN"
    extra_input_models = [
        "SRN",
        "NRTR",
        "SAR",
        "SEED",
        "SVTR",
        "SVTR_LCNet",
        "SPIN",
        "VisionLAN",
        "RobustScanner",
        "RFL",
        "DRRG",
        "SATRN",
        "SVTR_HGNet",
        "ParseQ",
        "CPPD",
    ]
    extra_input = False
    if config["Architecture"]["algorithm"] == "Distillation":
        for key in config["Architecture"]["Models"]:
            extra_input = (
                extra_input
                or config["Architecture"]["Models"][key]["algorithm"]
                in extra_input_models
            )
    else:
        extra_input = config["Architecture"]["algorithm"] in extra_input_models
    try:
        model_type = config["Architecture"]["model_type"]
    except:
        model_type = None

    algorithm = config["Architecture"]["algorithm"]

    start_epoch = (
        best_model_dict["start_epoch"] if "start_epoch" in best_model_dict else 1
    )

    total_samples = 0
    train_reader_cost = 0.0
    train_batch_cost = 0.0
    reader_start = time.time()
    eta_meter = AverageMeter()

    max_iter = (
        len(train_dataloader) - 1
        if platform.system() == "Windows"
        else len(train_dataloader)
    )

    for epoch in range(start_epoch, epoch_num + 1):
        if train_dataloader.dataset.need_reset:
            train_dataloader = build_dataloader(
                config, "Train", device, logger, seed=epoch
            )
            max_iter = (
                len(train_dataloader) - 1
                if platform.system() == "Windows"
                else len(train_dataloader)
            )

        for idx, batch in enumerate(train_dataloader):
            profiler.add_profiler_step(profiler_options)
            train_reader_cost += time.time() - reader_start
            if idx >= max_iter:
                break
            lr = optimizer.get_lr()
            images = batch[0]
            if use_srn:
                model_average = True
            # use amp
            if scaler:
                with paddle.amp.auto_cast(
                    level=amp_level,
                    custom_black_list=amp_custom_black_list,
                    custom_white_list=amp_custom_white_list,
                    dtype=amp_dtype,
                ):
                    if model_type == "table" or extra_input:
                        preds = model(images, data=batch[1:])
                    elif model_type in ["kie"]:
                        preds = model(batch)
                    elif algorithm in ["CAN"]:
                        preds = model(batch[:3])
                    else:
                        preds = model(images)
                preds = to_float32(preds)
                loss = loss_class(preds, batch)
                avg_loss = loss["loss"]
                scaled_avg_loss = scaler.scale(avg_loss)
                scaled_avg_loss.backward()
                scaler.minimize(optimizer, scaled_avg_loss)
            else:
                if model_type == "table" or extra_input:
                    preds = model(images, data=batch[1:])
                elif model_type in ["kie", "sr"]:
                    preds = model(batch)
                elif algorithm in ["CAN"]:
                    preds = model(batch[:3])
                else:
                    preds = model(images)
                loss = loss_class(preds, batch)
                avg_loss = loss["loss"]
                avg_loss.backward()
                optimizer.step()

            optimizer.clear_grad()

            if (
                cal_metric_during_train and epoch % calc_epoch_interval == 0
            ):  # only rec and cls need
                batch = [item.numpy() for item in batch]
                if model_type in ["kie", "sr"]:
                    eval_class(preds, batch)
                elif model_type in ["table"]:
                    post_result = post_process_class(preds, batch)
                    eval_class(post_result, batch)
                elif algorithm in ["CAN"]:
                    model_type = "can"
                    eval_class(preds[0], batch[2:], epoch_reset=(idx == 0))
                else:
                    if config["Loss"]["name"] in [
                        "MultiLoss",
                        "MultiLoss_v2",
                    ]:  # for multi head loss
                        post_result = post_process_class(
                            preds["ctc"], batch[1]
                        )  # for CTC head out
                    elif config["Loss"]["name"] in ["VLLoss"]:
                        post_result = post_process_class(preds, batch[1], batch[-1])
                    else:
                        post_result = post_process_class(preds, batch[1])
                    eval_class(post_result, batch)
                metric = eval_class.get_metric()
                train_stats.update(metric)

            train_batch_time = time.time() - reader_start
            train_batch_cost += train_batch_time
            eta_meter.update(train_batch_time)
            global_step += 1
            total_samples += len(images)

            if not isinstance(lr_scheduler, float):
                lr_scheduler.step()

            # logger and visualdl
            stats = {
                k: float(v) if v.shape == [] else v.numpy().mean()
                for k, v in loss.items()
            }
            stats["lr"] = lr
            train_stats.update(stats)

            if log_writer is not None and dist.get_rank() == 0:
                log_writer.log_metrics(
                    metrics=train_stats.get(), prefix="TRAIN", step=global_step
                )

            if dist.get_rank() == 0 and (
                (global_step > 0 and global_step % print_batch_step == 0)
                or (idx >= len(train_dataloader) - 1)
            ):
                logs = train_stats.log()

                eta_sec = (
                    (epoch_num + 1 - epoch) * len(train_dataloader) - idx - 1
                ) * eta_meter.avg
                eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec)))
                max_mem_reserved_str = ""
                max_mem_allocated_str = ""
                if paddle.device.is_compiled_with_cuda():
                    max_mem_reserved_str = f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB,"
                    max_mem_allocated_str = f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
                strs = (
                    "epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: "
                    "{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, "
                    "ips: {:.5f} samples/s, eta: {}, {} {}".format(
                        epoch,
                        epoch_num,
                        global_step,
                        logs,
                        train_reader_cost / print_batch_step,
                        train_batch_cost / print_batch_step,
                        total_samples / print_batch_step,
                        total_samples / train_batch_cost,
                        eta_sec_format,
                        max_mem_reserved_str,
                        max_mem_allocated_str,
                    )
                )
                logger.info(strs)

                total_samples = 0
                train_reader_cost = 0.0
                train_batch_cost = 0.0
            # eval
            if (
                global_step > start_eval_step
                and (global_step - start_eval_step) % eval_batch_step == 0
                and dist.get_rank() == 0
            ):
                if model_average:
                    Model_Average = paddle.incubate.optimizer.ModelAverage(
                        0.15,
                        parameters=model.parameters(),
                        min_average_window=10000,
                        max_average_window=15625,
                    )
                    Model_Average.apply()
                cur_metric = eval(
                    model,
                    valid_dataloader,
                    post_process_class,
                    eval_class,
                    model_type,
                    extra_input=extra_input,
                    scaler=scaler,
                    amp_level=amp_level,
                    amp_custom_black_list=amp_custom_black_list,
                    amp_custom_white_list=amp_custom_white_list,
                    amp_dtype=amp_dtype,
                )
                cur_metric_str = "cur metric, {}".format(
                    ", ".join(["{}: {}".format(k, v) for k, v in cur_metric.items()])
                )
                logger.info(cur_metric_str)

                # logger metric
                if log_writer is not None:
                    log_writer.log_metrics(
                        metrics=cur_metric, prefix="EVAL", step=global_step
                    )

                if cur_metric[main_indicator] >= best_model_dict[main_indicator]:
                    best_model_dict.update(cur_metric)
                    best_model_dict["best_epoch"] = epoch
                    save_model(
                        model,
                        optimizer,
                        save_model_dir,
                        logger,
                        config,
                        is_best=True,
                        prefix="best_accuracy",
                        best_model_dict=best_model_dict,
                        epoch=epoch,
                        global_step=global_step,
                    )
                best_str = "best metric, {}".format(
                    ", ".join(
                        ["{}: {}".format(k, v) for k, v in best_model_dict.items()]
                    )
                )
                logger.info(best_str)
                # logger best metric
                if log_writer is not None:
                    log_writer.log_metrics(
                        metrics={
                            "best_{}".format(main_indicator): best_model_dict[
                                main_indicator
                            ]
                        },
                        prefix="EVAL",
                        step=global_step,
                    )

                    log_writer.log_model(
                        is_best=True, prefix="best_accuracy", metadata=best_model_dict
                    )

            reader_start = time.time()
        if dist.get_rank() == 0:
            save_model(
                model,
                optimizer,
                save_model_dir,
                logger,
                config,
                is_best=False,
                prefix="latest",
                best_model_dict=best_model_dict,
                epoch=epoch,
                global_step=global_step,
            )

            if log_writer is not None:
                log_writer.log_model(is_best=False, prefix="latest")

        if dist.get_rank() == 0 and epoch > 0 and epoch % save_epoch_step == 0:
            save_model(
                model,
                optimizer,
                save_model_dir,
                logger,
                config,
                is_best=False,
                prefix="iter_epoch_{}".format(epoch),
                best_model_dict=best_model_dict,
                epoch=epoch,
                global_step=global_step,
            )
            if log_writer is not None:
                log_writer.log_model(
                    is_best=False, prefix="iter_epoch_{}".format(epoch)
                )

    best_str = "best metric, {}".format(
        ", ".join(["{}: {}".format(k, v) for k, v in best_model_dict.items()])
    )
    logger.info(best_str)
    if dist.get_rank() == 0 and log_writer is not None:
        log_writer.close()
    return


def eval(
    model,
    valid_dataloader,
    post_process_class,
    eval_class,
    model_type=None,
    extra_input=False,
    scaler=None,
    amp_level="O2",
    amp_custom_black_list=[],
    amp_custom_white_list=[],
    amp_dtype="float16",
):
    model.eval()
    with paddle.no_grad():
        total_frame = 0.0
        total_time = 0.0
        pbar = tqdm(
            total=len(valid_dataloader), desc="eval model:", position=0, leave=True
        )
        max_iter = (
            len(valid_dataloader) - 1
            if platform.system() == "Windows"
            else len(valid_dataloader)
        )
        sum_images = 0
        for idx, batch in enumerate(valid_dataloader):
            if idx >= max_iter:
                break
            images = batch[0]
            start = time.time()

            # use amp
            if scaler:
                with paddle.amp.auto_cast(
                    level=amp_level,
                    custom_black_list=amp_custom_black_list,
                    dtype=amp_dtype,
                ):
                    if model_type == "table" or extra_input:
                        preds = model(images, data=batch[1:])
                    elif model_type in ["kie"]:
                        preds = model(batch)
                    elif model_type in ["can"]:
                        preds = model(batch[:3])
                    elif model_type in ["sr"]:
                        preds = model(batch)
                        sr_img = preds["sr_img"]
                        lr_img = preds["lr_img"]
                    else:
                        preds = model(images)
                preds = to_float32(preds)
            else:
                if model_type == "table" or extra_input:
                    preds = model(images, data=batch[1:])
                elif model_type in ["kie"]:
                    preds = model(batch)
                elif model_type in ["can"]:
                    preds = model(batch[:3])
                elif model_type in ["sr"]:
                    preds = model(batch)
                    sr_img = preds["sr_img"]
                    lr_img = preds["lr_img"]
                else:
                    preds = model(images)

            batch_numpy = []
            for item in batch:
                if isinstance(item, paddle.Tensor):
                    batch_numpy.append(item.numpy())
                else:
                    batch_numpy.append(item)
            # Obtain usable results from post-processing methods
            total_time += time.time() - start
            # Evaluate the results of the current batch
            if model_type in ["table", "kie"]:
                if post_process_class is None:
                    eval_class(preds, batch_numpy)
                else:
                    post_result = post_process_class(preds, batch_numpy)
                    eval_class(post_result, batch_numpy)
            elif model_type in ["sr"]:
                eval_class(preds, batch_numpy)
            elif model_type in ["can"]:
                eval_class(preds[0], batch_numpy[2:], epoch_reset=(idx == 0))
            else:
                post_result = post_process_class(preds, batch_numpy[1])
                eval_class(post_result, batch_numpy)

            pbar.update(1)
            total_frame += len(images)
            sum_images += 1
        # Get final metric，eg. acc or hmean
        metric = eval_class.get_metric()

    pbar.close()
    model.train()
    metric["fps"] = total_frame / total_time
    return metric


def update_center(char_center, post_result, preds):
    result, label = post_result
    feats, logits = preds
    logits = paddle.argmax(logits, axis=-1)
    feats = feats.numpy()
    logits = logits.numpy()

    for idx_sample in range(len(label)):
        if result[idx_sample][0] == label[idx_sample][0]:
            feat = feats[idx_sample]
            logit = logits[idx_sample]
            for idx_time in range(len(logit)):
                index = logit[idx_time]
                if index in char_center.keys():
                    char_center[index][0] = (
                        char_center[index][0] * char_center[index][1] + feat[idx_time]
                    ) / (char_center[index][1] + 1)
                    char_center[index][1] += 1
                else:
                    char_center[index] = [feat[idx_time], 1]
    return char_center


def get_center(model, eval_dataloader, post_process_class):
    pbar = tqdm(total=len(eval_dataloader), desc="get center:")
    max_iter = (
        len(eval_dataloader) - 1
        if platform.system() == "Windows"
        else len(eval_dataloader)
    )
    char_center = dict()
    for idx, batch in enumerate(eval_dataloader):
        if idx >= max_iter:
            break
        images = batch[0]
        start = time.time()
        preds = model(images)

        batch = [item.numpy() for item in batch]
        # Obtain usable results from post-processing methods
        post_result = post_process_class(preds, batch[1])

        # update char_center
        char_center = update_center(char_center, post_result, preds)
        pbar.update(1)

    pbar.close()
    for key in char_center.keys():
        char_center[key] = char_center[key][0]
    return char_center


def preprocess(is_train=False):
    FLAGS = ArgsParser().parse_args()
    profiler_options = FLAGS.profiler_options
    config = load_config(FLAGS.config)
    config = merge_config(config, FLAGS.opt)
    profile_dic = {"profiler_options": FLAGS.profiler_options}
    config = merge_config(config, profile_dic)

    if is_train:
        # save_config
        save_model_dir = config["Global"]["save_model_dir"]
        os.makedirs(save_model_dir, exist_ok=True)
        with open(os.path.join(save_model_dir, "config.yml"), "w") as f:
            yaml.dump(dict(config), f, default_flow_style=False, sort_keys=False)
        log_file = "{}/train.log".format(save_model_dir)
    else:
        log_file = None
    logger = get_logger(log_file=log_file)

    # check if set use_gpu=True in paddlepaddle cpu version
    use_gpu = config["Global"].get("use_gpu", False)
    use_xpu = config["Global"].get("use_xpu", False)
    use_npu = config["Global"].get("use_npu", False)
    use_mlu = config["Global"].get("use_mlu", False)

    alg = config["Architecture"]["algorithm"]
    assert alg in [
        "EAST",
        "DB",
        "SAST",
        "Rosetta",
        "CRNN",
        "STARNet",
        "RARE",
        "SRN",
        "CLS",
        "PGNet",
        "Distillation",
        "NRTR",
        "TableAttn",
        "SAR",
        "PSE",
        "SEED",
        "SDMGR",
        "LayoutXLM",
        "LayoutLM",
        "LayoutLMv2",
        "PREN",
        "FCE",
        "SVTR",
        "SVTR_LCNet",
        "ViTSTR",
        "ABINet",
        "DB++",
        "TableMaster",
        "SPIN",
        "VisionLAN",
        "Gestalt",
        "SLANet",
        "RobustScanner",
        "CT",
        "RFL",
        "DRRG",
        "CAN",
        "Telescope",
        "SATRN",
        "SVTR_HGNet",
        "ParseQ",
        "CPPD",
    ]

    if use_xpu:
        device = "xpu:{0}".format(os.getenv("FLAGS_selected_xpus", 0))
    elif use_npu:
        device = "npu:{0}".format(os.getenv("FLAGS_selected_npus", 0))
    elif use_mlu:
        device = "mlu:{0}".format(os.getenv("FLAGS_selected_mlus", 0))
    else:
        device = "gpu:{}".format(dist.ParallelEnv().dev_id) if use_gpu else "cpu"
    check_device(use_gpu, use_xpu, use_npu, use_mlu)

    device = paddle.set_device(device)

    config["Global"]["distributed"] = dist.get_world_size() != 1

    loggers = []

    if "use_visualdl" in config["Global"] and config["Global"]["use_visualdl"]:
        logger.warning(
            "You are using VisualDL, the VisualDL is deprecated and "
            "removed in ppocr!"
        )
        log_writer = None
    if (
        "use_wandb" in config["Global"] and config["Global"]["use_wandb"]
    ) or "wandb" in config:
        save_dir = config["Global"]["save_model_dir"]
        wandb_writer_path = "{}/wandb".format(save_dir)
        if "wandb" in config:
            wandb_params = config["wandb"]
        else:
            wandb_params = dict()
        wandb_params.update({"save_dir": save_dir})
        log_writer = WandbLogger(**wandb_params, config=config)
        loggers.append(log_writer)
    else:
        log_writer = None
    print_dict(config, logger)

    if loggers:
        log_writer = Loggers(loggers)
    else:
        log_writer = None

    logger.info("train with paddle {} and device {}".format(paddle.__version__, device))
    return config, device, logger, log_writer
-												refine

											
										
										
											2021-06-21 20:20:25 +08:00
+								# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								#
 								# Licensed under the Apache License, Version 2.0 (the "License");
 								# you may not use this file except in compliance with the License.
 								# You may obtain a copy of the License at
 								#
 								#     http://www.apache.org/licenses/LICENSE-2.0
 								#
 								# Unless required by applicable law or agreed to in writing, software
 								# distributed under the License is distributed on an "AS IS" BASIS,
 								# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								# See the License for the specific language governing permissions and
 								# limitations under the License.
 								from __future__ import absolute_import
 								from __future__ import division
 								from __future__ import print_function
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								import os
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								import sys
-												fix eval laoder on win (#2654)


											
										
										
											2021-04-27 10:32:17 +08:00
+								import platform
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								import yaml
 								import time
-												[Feature] Add eta function in model's training stage (#5380)

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [BugFix] Fix offset bug, residual idxes should -1
											
										
										
											2022-02-01 17:46:42 +08:00
+								import datetime
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								import paddle
 								import paddle.distributed as dist
 								from tqdm import tqdm
-												Submit SR model (#6933)

* add sr model

* update for eval

* submit sr

* polish code

* polish code

* polish code

* update sr model

* update doc

* update doc

* update doc

* fix typo

* format code

* update metric

* fix export
											
										
										
											2022-08-12 10:49:54 +08:00
+								import cv2
 								import numpy as np
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								from argparse import ArgumentParser, RawDescriptionHelpFormatter
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								from ppocr.utils.stats import TrainingStats
 								from ppocr.utils.save_load import save_model
-												[Feature] Add eta function in model's training stage (#5380)

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [BugFix] Fix offset bug, residual idxes should -1
											
										
										
											2022-02-01 17:46:42 +08:00
+								from ppocr.utils.utility import print_dict, AverageMeter
-												updata structure of dygraph

											
										
										
											2020-11-04 20:43:27 +08:00
+								from ppocr.utils.logging import get_logger
-												【OCR Issue No.9】以可选形式支持Visualdl (#11947)

* delete visual dl

* totally delete visual

* delete vdl file

* fix codestyle
											
										
										
											2024-04-25 17:37:27 +08:00
+								from ppocr.utils.loggers import WandbLogger, Loggers
-												opt benchmark

											
										
										
											2021-09-28 10:28:25 +08:00
+								from ppocr.utils import profiler
-												updata structure of dygraph

											
										
										
											2020-11-04 20:43:27 +08:00
+								from ppocr.data import build_dataloader
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
-												trans to paddle-rc

											
										
										
											2020-11-05 15:13:36 +08:00
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								class ArgsParser(ArgumentParser):
 								    def __init__(self):
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        super(ArgsParser, self).__init__(formatter_class=RawDescriptionHelpFormatter)
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								        self.add_argument("-c", "--config", help="configuration file to use")
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        self.add_argument("-o", "--opt", nargs="+", help="set configuration options")
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								        self.add_argument(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            "-p",
 								            "--profiler_options",
-												add profile

											
										
										
											2021-09-28 10:01:37 +08:00
+								            type=str,
 								            default=None,
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            help="The option of profiler, which should be in format "
 								            '"key1=value1;key2=value2;key3=value3".',
-												add profile

											
										
										
											2021-09-28 10:01:37 +08:00
+								        )
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
 								    def parse_args(self, argv=None):
 								        args = super(ArgsParser, self).parse_args(argv)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        assert args.config is not None, "Please specify --config=configure_file_path."
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								        args.opt = self._parse_opt(args.opt)
 								        return args
 								    def _parse_opt(self, opts):
 								        config = {}
 								        if not opts:
 								            return config
 								        for s in opts:
 								            s = s.strip()
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            k, v = s.split("=")
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								            config[k] = yaml.load(v, Loader=yaml.Loader)
 								        return config
 								def load_config(file_path):
 								    """
 								    Load config from yml/yaml file.
 								    Args:
 								        file_path (str): Path of the config file to be loaded.
 								    Returns: global config
 								    """
 								    _, ext = os.path.splitext(file_path)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    assert ext in [".yml", ".yaml"], "only support yaml files for now"
 								    config = yaml.load(open(file_path, "rb"), Loader=yaml.Loader)
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								    return config
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								def merge_config(config, opts):
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								    """
 								    Merge config into global config.
 								    Args:
 								        config (dict): Config to be merged.
 								    Returns: global config
 								    """
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								    for key, value in opts.items():
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								        if "." not in key:
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								            if isinstance(value, dict) and key in config:
 								                config[key].update(value)
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								            else:
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								                config[key] = value
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								        else:
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            sub_keys = key.split(".")
 								            assert sub_keys[0] in config, (
 								                "the sub_keys can only be one of global_config: {}, but get: "
 								                "{}, please check your running command".format(
 								                    config.keys(), sub_keys[0]
 								                )
 								            )
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								            cur = config[sub_keys[0]]
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								            for idx, sub_key in enumerate(sub_keys[1:]):
 								                if idx == len(sub_keys) - 2:
 								                    cur[sub_key] = value
 								                else:
 								                    cur = cur[sub_key]
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								    return config
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
-												[MLU]adapt mlu device for running dbnet network

											
										
										
											2022-10-08 16:46:41 +08:00
+								def check_device(use_gpu, use_xpu=False, use_npu=False, use_mlu=False):
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								    """
 								    Log error and exit when set use_gpu=true in paddlepaddle
 								    cpu version.
 								    """
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    err = (
 								        "Config {} cannot be set as true while your paddle "
 								        "is not compiled with {} ! \nPlease try: \n"
 								        "\t1. Install paddlepaddle to run model on {} \n"
 								        "\t2. Set {} as false in config file to run "
 								        "model on CPU"
 								    )
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
 								    try:
-												Cherry pick  to dygraph (#6383)

* add starnet doc & test=document_fix

* add starnet doc & test=document_fix

* Update algorithm_rec_starnet.md

* support xpu
											
										
										
											2022-05-23 23:38:33 +08:00
+								        if use_gpu and use_xpu:
-												Fix (#11448)


											
										
										
											2024-01-02 11:02:13 +08:00
+								            print("use_xpu and use_gpu can not both be true.")
-												delete fluid

											
										
										
											2020-12-21 17:13:32 +08:00
+								        if use_gpu and not paddle.is_compiled_with_cuda():
-												Cherry pick  to dygraph (#6383)

* add starnet doc & test=document_fix

* add starnet doc & test=document_fix

* Update algorithm_rec_starnet.md

* support xpu
											
										
										
											2022-05-23 23:38:33 +08:00
+								            print(err.format("use_gpu", "cuda", "gpu", "use_gpu"))
 								            sys.exit(1)
 								        if use_xpu and not paddle.device.is_compiled_with_xpu():
 								            print(err.format("use_xpu", "xpu", "xpu", "use_xpu"))
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								            sys.exit(1)
-												update npu api (#9688)


											
										
										
											2023-04-11 09:56:08 +08:00
+								        if use_npu:
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            if (
 								                int(paddle.version.major) != 0
 								                and int(paddle.version.major) <= 2
 								                and int(paddle.version.minor) <= 4
 								            ):
-												update npu api (#9688)


											
										
										
											2023-04-11 09:56:08 +08:00
+								                if not paddle.device.is_compiled_with_npu():
 								                    print(err.format("use_npu", "npu", "npu", "use_npu"))
 								                    sys.exit(1)
 								            # is_compiled_with_npu() has been updated after paddle-2.4
 								            else:
 								                if not paddle.device.is_compiled_with_custom_device("npu"):
 								                    print(err.format("use_npu", "npu", "npu", "use_npu"))
 								                    sys.exit(1)
-												[MLU]adapt mlu device for running dbnet network

											
										
										
											2022-10-08 16:46:41 +08:00
+								        if use_mlu and not paddle.device.is_compiled_with_mlu():
 								            print(err.format("use_mlu", "mlu", "mlu", "use_mlu"))
 								            sys.exit(1)
-												add use_xpu config for det_mv3_db.yml

											
										
										
											2022-02-23 16:31:16 +08:00
+								    except Exception as e:
 								        pass
-												add SLANet

											
										
										
											2022-08-08 14:50:27 +08:00
-												add benckmark

											
										
										
											2022-07-30 10:00:11 +08:00
+								def to_float32(preds):
 								    if isinstance(preds, dict):
 								        for k in preds:
 								            if isinstance(preds[k], dict) or isinstance(preds[k], list):
 								                preds[k] = to_float32(preds[k])
-												fix amp bug

											
										
										
											2022-08-19 13:26:02 +08:00
+								            elif isinstance(preds[k], paddle.Tensor):
 								                preds[k] = preds[k].astype(paddle.float32)
-												add benckmark

											
										
										
											2022-07-30 10:00:11 +08:00
+								    elif isinstance(preds, list):
 								        for k in range(len(preds)):
 								            if isinstance(preds[k], dict):
 								                preds[k] = to_float32(preds[k])
 								            elif isinstance(preds[k], list):
 								                preds[k] = to_float32(preds[k])
-												fix amp bug

											
										
										
											2022-08-19 13:26:02 +08:00
+								            elif isinstance(preds[k], paddle.Tensor):
 								                preds[k] = preds[k].astype(paddle.float32)
 								    elif isinstance(preds, paddle.Tensor):
-												polish kie doc and code (#7255)

* add fapiao kie

* fix readme

* fix fanli

* add readme

* add how to do kie en

* add algo kie

* add algo overview en

* rename vqa to kie

* fix read gif
											
										
										
											2022-08-21 10:55:49 +08:00
+								        preds = preds.astype(paddle.float32)
-												add benckmark

											
										
										
											2022-07-30 10:00:11 +08:00
+								    return preds
-												add use_xpu config for det_mv3_db.yml

											
										
										
											2022-02-23 16:31:16 +08:00
-												add SLANet

											
										
										
											2022-08-08 14:50:27 +08:00
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								def train(
 								    config,
 								    train_dataloader,
 								    valid_dataloader,
 								    device,
 								    model,
 								    loss_class,
 								    optimizer,
 								    lr_scheduler,
 								    post_process_class,
 								    eval_class,
 								    pre_best_model_dict,
 								    logger,
 								    step_pre_epoch,
 								    log_writer=None,
 								    scaler=None,
 								    amp_level="O2",
 								    amp_custom_black_list=[],
 								    amp_custom_white_list=[],
 								    amp_dtype="float16",
 								):
 								    cal_metric_during_train = config["Global"].get("cal_metric_during_train", False)
 								    calc_epoch_interval = config["Global"].get("calc_epoch_interval", 1)
 								    log_smooth_window = config["Global"]["log_smooth_window"]
 								    epoch_num = config["Global"]["epoch_num"]
 								    print_batch_step = config["Global"]["print_batch_step"]
 								    eval_batch_step = config["Global"]["eval_batch_step"]
 								    eval_batch_epoch = config["Global"].get("eval_batch_epoch", None)
 								    profiler_options = config["profiler_options"]
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
-												updata structure of dygraph

											
										
										
											2020-11-04 20:43:27 +08:00
+								    global_step = 0
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    if "global_step" in pre_best_model_dict:
 								        global_step = pre_best_model_dict["global_step"]
-												set evaluation interval

											
										
										
											2020-07-07 10:35:17 +08:00
+								    start_eval_step = 0
 								    if type(eval_batch_step) == list and len(eval_batch_step) >= 2:
-												support eval pre epoch (#11003)


											
										
										
											2023-09-26 18:50:42 +08:00
+								        start_eval_step = eval_batch_step[0] if not eval_batch_epoch else 0
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        eval_batch_step = (
 								            eval_batch_step[1]
 								            if not eval_batch_epoch
 								            else step_pre_epoch * eval_batch_epoch
 								        )
-												add dataset len check

											
										
										
											2021-02-04 11:33:48 +08:00
+								        if len(valid_dataloader) == 0:
 								            logger.info(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                "No Images in eval dataset, evaluation during training "
 								                "will be disabled"
-												add dataset len check

											
										
										
											2021-02-04 11:33:48 +08:00
+								            )
 								            start_eval_step = 1e111
-												set evaluation interval

											
										
										
											2020-07-07 10:35:17 +08:00
+								        logger.info(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            "During the training process, after the {}th iteration, "
 								            "an evaluation is run every {} iterations".format(
 								                start_eval_step, eval_batch_step
 								            )
 								        )
 								    save_epoch_step = config["Global"]["save_epoch_step"]
 								    save_model_dir = config["Global"]["save_model_dir"]
-												fix det inference bug and optimize save path

											
										
										
											2020-05-13 16:05:00 +08:00
+								    if not os.path.exists(save_model_dir):
 								        os.makedirs(save_model_dir)
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								    main_indicator = eval_class.main_indicator
 								    best_model_dict = {main_indicator: 0}
 								    best_model_dict.update(pre_best_model_dict)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    train_stats = TrainingStats(log_smooth_window, ["lr"])
-												mv model_average to incubate

											
										
										
											2021-01-22 11:15:56 +08:00
+								    model_average = False
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								    model.train()
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    use_srn = config["Architecture"]["algorithm"] == "SRN"
-												Add rec algo VisionLAN (#6943)

* add vl

* add vl

* add vl

* add ref

* fix head out

* add visionlan doc

* fix vl infer

* update dict
											
										
										
											2022-08-09 11:29:43 +08:00
+								    extra_input_models = [
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        "SRN",
 								        "NRTR",
 								        "SAR",
 								        "SEED",
 								        "SVTR",
 								        "SVTR_LCNet",
 								        "SPIN",
 								        "VisionLAN",
 								        "RobustScanner",
 								        "RFL",
 								        "DRRG",
 								        "SATRN",
 								        "SVTR_HGNet",
 								        "ParseQ",
 								        "CPPD",
-												Add rec algo VisionLAN (#6943)

* add vl

* add vl

* add vl

* add ref

* fix head out

* add visionlan doc

* fix vl infer

* update dict
											
										
										
											2022-08-09 11:29:43 +08:00
+								    ]
-												fix key for dis and cls resize

											
										
										
											2022-04-27 13:19:55 +08:00
+								    extra_input = False
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    if config["Architecture"]["algorithm"] == "Distillation":
 								        for key in config["Architecture"]["Models"]:
 								            extra_input = (
 								                extra_input
 								                or config["Architecture"]["Models"][key]["algorithm"]
 								                in extra_input_models
 								            )
-												add ppocrv3 rec (#6033)

* add ppocrv3 rec


											
										
										
											2022-04-26 16:19:31 +08:00
+								    else:
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        extra_input = config["Architecture"]["algorithm"] in extra_input_models
-												delete blank lines and modify forward_train

											
										
										
											2021-08-19 17:31:02 +08:00
+								    try:
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        model_type = config["Architecture"]["model_type"]
-												delete blank lines and modify forward_train

											
										
										
											2021-08-19 17:31:02 +08:00
+								    except:
-												fix bug

											
										
										
											2021-07-07 09:54:03 +08:00
+								        model_type = None
-												add ppocrv3 rec (#6033)

* add ppocrv3 rec


											
										
										
											2022-04-26 16:19:31 +08:00
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    algorithm = config["Architecture"]["algorithm"]
-												polish code for srn eval

											
										
										
											2021-02-07 15:31:24 +08:00
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    start_epoch = (
 								        best_model_dict["start_epoch"] if "start_epoch" in best_model_dict else 1
 								    )
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
 								    total_samples = 0
-												[Feature] Add eta function in model's training stage (#5380)

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [BugFix] Fix offset bug, residual idxes should -1
											
										
										
											2022-02-01 17:46:42 +08:00
+								    train_reader_cost = 0.0
 								    train_batch_cost = 0.0
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								    reader_start = time.time()
-												[Feature] Add eta function in model's training stage (#5380)

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [BugFix] Fix offset bug, residual idxes should -1
											
										
										
											2022-02-01 17:46:42 +08:00
+								    eta_meter = AverageMeter()
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    max_iter = (
 								        len(train_dataloader) - 1
 								        if platform.system() == "Windows"
 								        else len(train_dataloader)
 								    )
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
-												fix bugs

											
										
										
											2020-12-30 19:54:16 +08:00
+								    for epoch in range(start_epoch, epoch_num + 1):
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								        if train_dataloader.dataset.need_reset:
 								            train_dataloader = build_dataloader(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                config, "Train", device, logger, seed=epoch
 								            )
 								            max_iter = (
 								                len(train_dataloader) - 1
 								                if platform.system() == "Windows"
 								                else len(train_dataloader)
 								            )
-												Submit SR model (#6933)

* add sr model

* update for eval

* submit sr

* polish code

* polish code

* polish code

* update sr model

* update doc

* update doc

* update doc

* fix typo

* format code

* update metric

* fix export
											
										
										
											2022-08-12 10:49:54 +08:00
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								        for idx, batch in enumerate(train_dataloader):
-												add profile

											
										
										
											2021-09-28 10:01:37 +08:00
+								            profiler.add_profiler_step(profiler_options)
-												add pse to benchmark

											
										
										
											2021-10-27 20:16:36 +08:00
+								            train_reader_cost += time.time() - reader_start
-												fix win train loader

											
										
										
											2021-04-29 12:37:05 +08:00
+								            if idx >= max_iter:
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								                break
 								            lr = optimizer.get_lr()
 								            images = batch[0]
-												polish code for srn eval

											
										
										
											2021-02-07 15:31:24 +08:00
+								            if use_srn:
-												mv model_average to incubate

											
										
										
											2021-01-22 11:15:56 +08:00
+								                model_average = True
-												add amp train

											
										
										
											2021-10-15 16:30:51 +08:00
+								            # use amp
 								            if scaler:
-												[TIPC] add scripts for NPU and XPU, test=develop

											
										
										
											2022-09-13 14:11:21 +08:00
+								                with paddle.amp.auto_cast(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                    level=amp_level,
 								                    custom_black_list=amp_custom_black_list,
 								                    custom_white_list=amp_custom_white_list,
 								                    dtype=amp_dtype,
 								                ):
 								                    if model_type == "table" or extra_input:
-												add amp train

											
										
										
											2021-10-15 16:30:51 +08:00
+								                        preds = model(images, data=batch[1:])
-												polish kie doc and code (#7255)

* add fapiao kie

* fix readme

* fix fanli

* add readme

* add how to do kie en

* add algo kie

* add algo overview en

* rename vqa to kie

* fix read gif
											
										
										
											2022-08-21 10:55:49 +08:00
+								                    elif model_type in ["kie"]:
-												fix amp vqa

											
										
										
											2022-06-28 10:17:53 +08:00
+								                        preds = model(batch)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                    elif algorithm in ["CAN"]:
-												update CAN model

											
										
										
											2022-10-15 20:27:05 +08:00
+								                        preds = model(batch[:3])
-												add amp train

											
										
										
											2021-10-15 16:30:51 +08:00
+								                    else:
 								                        preds = model(images)
-												add benckmark

											
										
										
											2022-07-30 10:00:11 +08:00
+								                preds = to_float32(preds)
 								                loss = loss_class(preds, batch)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                avg_loss = loss["loss"]
-												add benckmark

											
										
										
											2022-07-30 10:00:11 +08:00
+								                scaled_avg_loss = scaler.scale(avg_loss)
 								                scaled_avg_loss.backward()
 								                scaler.minimize(optimizer, scaled_avg_loss)
-												add srn for dygraph

											
										
										
											2020-12-30 16:15:49 +08:00
+								            else:
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                if model_type == "table" or extra_input:
-												add amp train

											
										
										
											2021-10-15 16:30:51 +08:00
+								                    preds = model(images, data=batch[1:])
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                elif model_type in ["kie", "sr"]:
-												fix train

											
										
										
											2021-12-18 16:04:10 +08:00
+								                    preds = model(batch)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                elif algorithm in ["CAN"]:
-												update CAN model

											
										
										
											2022-10-15 20:27:05 +08:00
+								                    preds = model(batch[:3])
-												add amp train

											
										
										
											2021-10-15 16:30:51 +08:00
+								                else:
 								                    preds = model(images)
-												add benckmark

											
										
										
											2022-07-30 10:00:11 +08:00
+								                loss = loss_class(preds, batch)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                avg_loss = loss["loss"]
-												add amp train

											
										
										
											2021-10-15 16:30:51 +08:00
+								                avg_loss.backward()
 								                optimizer.step()
-												Submit SR model (#6933)

* add sr model

* update for eval

* submit sr

* polish code

* polish code

* polish code

* update sr model

* update doc

* update doc

* update doc

* fix typo

* format code

* update metric

* fix export
											
										
										
											2022-08-12 10:49:54 +08:00
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								            optimizer.clear_grad()
-												日志符合benchmark规范

											
										
										
											2020-11-16 19:00:27 +08:00
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            if (
 								                cal_metric_during_train and epoch % calc_epoch_interval == 0
 								            ):  # only rec and cls need
-												fix ips info and reduce interval of metric calc

											
										
										
											2022-02-07 20:19:25 +08:00
+								                batch = [item.numpy() for item in batch]
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                if model_type in ["kie", "sr"]:
-												fix ips info and reduce interval of metric calc

											
										
										
											2022-02-07 20:19:25 +08:00
+								                    eval_class(preds, batch)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                elif model_type in ["table"]:
-												add TableMaster

											
										
										
											2022-06-16 21:24:38 +08:00
+								                    post_result = post_process_class(preds, batch)
 								                    eval_class(post_result, batch)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                elif algorithm in ["CAN"]:
 								                    model_type = "can"
-												update CAN model

											
										
										
											2022-10-15 20:27:05 +08:00
+								                    eval_class(preds[0], batch[2:], epoch_reset=(idx == 0))
-												fix ips info and reduce interval of metric calc

											
										
										
											2022-02-07 20:19:25 +08:00
+								                else:
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                    if config["Loss"]["name"] in [
 								                        "MultiLoss",
 								                        "MultiLoss_v2",
 								                    ]:  # for multi head loss
-												add ppocrv3 rec (#6033)

* add ppocrv3 rec


											
										
										
											2022-04-26 16:19:31 +08:00
+								                        post_result = post_process_class(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                            preds["ctc"], batch[1]
 								                        )  # for CTC head out
 								                    elif config["Loss"]["name"] in ["VLLoss"]:
 								                        post_result = post_process_class(preds, batch[1], batch[-1])
-												add ppocrv3 rec (#6033)

* add ppocrv3 rec


											
										
										
											2022-04-26 16:19:31 +08:00
+								                    else:
 								                        post_result = post_process_class(preds, batch[1])
-												fix ips info and reduce interval of metric calc

											
										
										
											2022-02-07 20:19:25 +08:00
+								                    eval_class(post_result, batch)
 								                metric = eval_class.get_metric()
 								                train_stats.update(metric)
-												[Feature] Add eta function in model's training stage (#5380)

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [BugFix] Fix offset bug, residual idxes should -1
											
										
										
											2022-02-01 17:46:42 +08:00
+								            train_batch_time = time.time() - reader_start
 								            train_batch_cost += train_batch_time
 								            eta_meter.update(train_batch_time)
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								            global_step += 1
-												add pse to benchmark

											
										
										
											2021-10-27 20:16:36 +08:00
+								            total_samples += len(images)
-												日志符合benchmark规范

											
										
										
											2020-11-16 19:00:27 +08:00
-												updata structure of dygraph

											
										
										
											2020-11-04 20:43:27 +08:00
+								            if not isinstance(lr_scheduler, float):
 								                lr_scheduler.step()
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
 								            # logger and visualdl
-												Suppress scalar warnings (#10509)


											
										
										
											2023-07-31 15:13:11 +08:00
+								            stats = {
 								                k: float(v) if v.shape == [] else v.numpy().mean()
 								                for k, v in loss.items()
 								            }
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            stats["lr"] = lr
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								            train_stats.update(stats)
-												Integration of the WandbLogger with the latest changes in the PaddleOCR integrations

											
										
										
											2022-04-12 23:47:54 +08:00
+								            if log_writer is not None and dist.get_rank() == 0:
-												add TableMaster

											
										
										
											2022-06-16 21:24:38 +08:00
+								                log_writer.log_metrics(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                    metrics=train_stats.get(), prefix="TRAIN", step=global_step
 								                )
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
-												fix logger info for empty log

											
										
										
											2021-03-24 14:32:38 +08:00
+								            if dist.get_rank() == 0 and (
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                (global_step > 0 and global_step % print_batch_step == 0)
 								                or (idx >= len(train_dataloader) - 1)
 								            ):
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								                logs = train_stats.log()
-												delete debug

											
										
										
											2022-02-07 15:02:04 +08:00
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                eta_sec = (
 								                    (epoch_num + 1 - epoch) * len(train_dataloader) - idx - 1
 								                ) * eta_meter.avg
-												[Feature] Add eta function in model's training stage (#5380)

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [BugFix] Fix offset bug, residual idxes should -1
											
										
										
											2022-02-01 17:46:42 +08:00
+								                eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec)))
-												Refine according to comment (#11301)


											
										
										
											2023-11-24 16:50:34 +08:00
+								                max_mem_reserved_str = ""
 								                max_mem_allocated_str = ""
-												Fix bug when running on XPU (#11299)


											
										
										
											2023-11-23 16:58:48 +08:00
+								                if paddle.device.is_compiled_with_cuda():
-												Dygraph fix max_mem_reserved for benchmark (#11341)

* fix profile

* fix python3.10

* add max_mem_reserved for benchmark

* fix benchmark
											
										
										
											2023-12-05 14:55:38 +08:00
+								                    max_mem_reserved_str = f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB,"
 								                    max_mem_allocated_str = f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                strs = (
 								                    "epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: "
 								                    "{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, "
 								                    "ips: {:.5f} samples/s, eta: {}, {} {}".format(
 								                        epoch,
 								                        epoch_num,
 								                        global_step,
 								                        logs,
 								                        train_reader_cost / print_batch_step,
 								                        train_batch_cost / print_batch_step,
 								                        total_samples / print_batch_step,
 								                        total_samples / train_batch_cost,
 								                        eta_sec_format,
 								                        max_mem_reserved_str,
 								                        max_mem_allocated_str,
 								                    )
 								                )
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								                logger.info(strs)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
-												add pse to benchmark

											
										
										
											2021-10-27 20:16:36 +08:00
+								                total_samples = 0
-												[Feature] Add eta function in model's training stage (#5380)

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Add eta function in model's training stage

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [Feature] Adjust the strategy of ETA function according to Donkey's smart proposals.

* [BugFix] Fix offset bug, residual idxes should -1
											
										
										
											2022-02-01 17:46:42 +08:00
+								                train_reader_cost = 0.0
 								                train_batch_cost = 0.0
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								            # eval
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            if (
 								                global_step > start_eval_step
 								                and (global_step - start_eval_step) % eval_batch_step == 0
 								                and dist.get_rank() == 0
 								            ):
-												mv model_average to incubate

											
										
										
											2021-01-22 11:15:56 +08:00
+								                if model_average:
 								                    Model_Average = paddle.incubate.optimizer.ModelAverage(
 .15,
 								                        parameters=model.parameters(),
 								                        min_average_window=10000,
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                        max_average_window=15625,
 								                    )
-												mv model_average to incubate

											
										
										
											2021-01-22 11:15:56 +08:00
+								                    Model_Average.apply()
-												polish code for srn eval

											
										
										
											2021-02-07 15:31:24 +08:00
+								                cur_metric = eval(
 								                    model,
 								                    valid_dataloader,
 								                    post_process_class,
 								                    eval_class,
-												refine

											
										
										
											2021-06-21 20:33:19 +08:00
+								                    model_type,
-												fix bug in amp eval

											
										
										
											2022-08-08 19:31:12 +08:00
+								                    extra_input=extra_input,
-												add amp eval

											
										
										
											2022-08-22 19:32:37 +08:00
+								                    scaler=scaler,
 								                    amp_level=amp_level,
-												improve amp training (#10119)


											
										
										
											2023-06-08 15:50:37 +08:00
+								                    amp_custom_black_list=amp_custom_black_list,
 								                    amp_custom_white_list=amp_custom_white_list,
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                    amp_dtype=amp_dtype,
 								                )
 								                cur_metric_str = "cur metric, {}".format(
 								                    ", ".join(["{}: {}".format(k, v) for k, v in cur_metric.items()])
 								                )
-												fix typo

											
										
										
											2021-01-26 15:16:02 +08:00
+								                logger.info(cur_metric_str)
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
 								                # logger metric
-												Integration of the WandbLogger with the latest changes in the PaddleOCR integrations

											
										
										
											2022-04-12 23:47:54 +08:00
+								                if log_writer is not None:
-												add TableMaster

											
										
										
											2022-06-16 21:24:38 +08:00
+								                    log_writer.log_metrics(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                        metrics=cur_metric, prefix="EVAL", step=global_step
 								                    )
-												Integration of the WandbLogger with the latest changes in the PaddleOCR integrations

											
										
										
											2022-04-12 23:47:54 +08:00
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                if cur_metric[main_indicator] >= best_model_dict[main_indicator]:
-												fix typo

											
										
										
											2021-01-26 15:16:02 +08:00
+								                    best_model_dict.update(cur_metric)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                    best_model_dict["best_epoch"] = epoch
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								                    save_model(
 								                        model,
 								                        optimizer,
 								                        save_model_dir,
 								                        logger,
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								                        config,
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								                        is_best=True,
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                        prefix="best_accuracy",
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								                        best_model_dict=best_model_dict,
-												add global_step to .states files (#2566)

Co-authored-by: littletomatodonkey <2120160898@bit.edu.cn>
											
										
										
											2021-04-27 10:13:21 +08:00
+								                        epoch=epoch,
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                        global_step=global_step,
 								                    )
 								                best_str = "best metric, {}".format(
 								                    ", ".join(
 								                        ["{}: {}".format(k, v) for k, v in best_model_dict.items()]
 								                    )
 								                )
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								                logger.info(best_str)
 								                # logger best metric
-												Integration of the WandbLogger with the latest changes in the PaddleOCR integrations

											
										
										
											2022-04-12 23:47:54 +08:00
+								                if log_writer is not None:
-												add TableMaster

											
										
										
											2022-06-16 21:24:38 +08:00
+								                    log_writer.log_metrics(
 								                        metrics={
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                            "best_{}".format(main_indicator): best_model_dict[
 								                                main_indicator
 								                            ]
-												add TableMaster

											
										
										
											2022-06-16 21:24:38 +08:00
+								                        },
 								                        prefix="EVAL",
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                        step=global_step,
 								                    )
-												add TableMaster

											
										
										
											2022-06-16 21:24:38 +08:00
 								                    log_writer.log_model(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                        is_best=True, prefix="best_accuracy", metadata=best_model_dict
 								                    )
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
-												add pse to benchmark

											
										
										
											2021-10-27 20:16:36 +08:00
+								            reader_start = time.time()
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								        if dist.get_rank() == 0:
 								            save_model(
 								                model,
 								                optimizer,
 								                save_model_dir,
 								                logger,
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								                config,
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								                is_best=False,
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                prefix="latest",
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								                best_model_dict=best_model_dict,
-												add global_step to .states files (#2566)

Co-authored-by: littletomatodonkey <2120160898@bit.edu.cn>
											
										
										
											2021-04-27 10:13:21 +08:00
+								                epoch=epoch,
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                global_step=global_step,
 								            )
-												Integration of the WandbLogger with the latest changes in the PaddleOCR integrations

											
										
										
											2022-04-12 23:47:54 +08:00
-												Fixed bug tto check if log_writer is not None before calling log_model

											
										
										
											2022-04-15 21:43:22 +08:00
+								            if log_writer is not None:
 								                log_writer.log_model(is_best=False, prefix="latest")
-												Integration of the WandbLogger with the latest changes in the PaddleOCR integrations

											
										
										
											2022-04-12 23:47:54 +08:00
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								        if dist.get_rank() == 0 and epoch > 0 and epoch % save_epoch_step == 0:
 								            save_model(
 								                model,
 								                optimizer,
 								                save_model_dir,
 								                logger,
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								                config,
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								                is_best=False,
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                prefix="iter_epoch_{}".format(epoch),
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								                best_model_dict=best_model_dict,
-												add global_step to .states files (#2566)

Co-authored-by: littletomatodonkey <2120160898@bit.edu.cn>
											
										
										
											2021-04-27 10:13:21 +08:00
+								                epoch=epoch,
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                global_step=global_step,
 								            )
-												Fixed bug tto check if log_writer is not None before calling log_model

											
										
										
											2022-04-15 21:43:22 +08:00
+								            if log_writer is not None:
-												add TableMaster

											
										
										
											2022-06-16 21:24:38 +08:00
+								                log_writer.log_model(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                    is_best=False, prefix="iter_epoch_{}".format(epoch)
 								                )
-												Integration of the WandbLogger with the latest changes in the PaddleOCR integrations

											
										
										
											2022-04-12 23:47:54 +08:00
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    best_str = "best metric, {}".format(
 								        ", ".join(["{}: {}".format(k, v) for k, v in best_model_dict.items()])
 								    )
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								    logger.info(best_str)
-												Integration of the WandbLogger with the latest changes in the PaddleOCR integrations

											
										
										
											2022-04-12 23:47:54 +08:00
+								    if dist.get_rank() == 0 and log_writer is not None:
 								        log_writer.close()
-												upload PaddleOCR code

											
										
										
											2020-05-10 16:26:57 +08:00
+								    return
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								def eval(
 								    model,
 								    valid_dataloader,
 								    post_process_class,
 								    eval_class,
 								    model_type=None,
 								    extra_input=False,
 								    scaler=None,
 								    amp_level="O2",
 								    amp_custom_black_list=[],
 								    amp_custom_white_list=[],
 								    amp_dtype="float16",
 								):
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								    model.eval()
 								    with paddle.no_grad():
 								        total_frame = 0.0
 								        total_time = 0.0
-												add pse to benchmark

											
										
										
											2021-10-27 20:16:36 +08:00
+								        pbar = tqdm(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            total=len(valid_dataloader), desc="eval model:", position=0, leave=True
 								        )
 								        max_iter = (
 								            len(valid_dataloader) - 1
 								            if platform.system() == "Windows"
 								            else len(valid_dataloader)
 								        )
-												Submit SR model (#6933)

* add sr model

* update for eval

* submit sr

* polish code

* polish code

* polish code

* update sr model

* update doc

* update doc

* update doc

* fix typo

* format code

* update metric

* fix export
											
										
										
											2022-08-12 10:49:54 +08:00
+								        sum_images = 0
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								        for idx, batch in enumerate(valid_dataloader):
-												fix eval laoder on win (#2654)


											
										
										
											2021-04-27 10:32:17 +08:00
+								            if idx >= max_iter:
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								                break
-												fix bug

											
										
										
											2020-11-06 18:56:53 +08:00
+								            images = batch[0]
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								            start = time.time()
-												fix bug in amp eval

											
										
										
											2022-08-08 19:31:12 +08:00
 								            # use amp
 								            if scaler:
-												[TIPC] add scripts for NPU and XPU, test=develop

											
										
										
											2022-09-13 14:11:21 +08:00
+								                with paddle.amp.auto_cast(
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                    level=amp_level,
 								                    custom_black_list=amp_custom_black_list,
 								                    dtype=amp_dtype,
 								                ):
 								                    if model_type == "table" or extra_input:
-												fix bug in amp eval

											
										
										
											2022-08-08 19:31:12 +08:00
+								                        preds = model(images, data=batch[1:])
-												polish kie doc and code (#7255)

* add fapiao kie

* fix readme

* fix fanli

* add readme

* add how to do kie en

* add algo kie

* add algo overview en

* rename vqa to kie

* fix read gif
											
										
										
											2022-08-21 10:55:49 +08:00
+								                    elif model_type in ["kie"]:
-												fix bug in amp eval

											
										
										
											2022-08-08 19:31:12 +08:00
+								                        preds = model(batch)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                    elif model_type in ["can"]:
-												update CAN model

											
										
										
											2022-10-15 20:27:05 +08:00
+								                        preds = model(batch[:3])
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                    elif model_type in ["sr"]:
-												Submit SR model (#6933)

* add sr model

* update for eval

* submit sr

* polish code

* polish code

* polish code

* update sr model

* update doc

* update doc

* update doc

* fix typo

* format code

* update metric

* fix export
											
										
										
											2022-08-12 10:49:54 +08:00
+								                        preds = model(batch)
 								                        sr_img = preds["sr_img"]
 								                        lr_img = preds["lr_img"]
-												fix bug in amp eval

											
										
										
											2022-08-08 19:31:12 +08:00
+								                    else:
 								                        preds = model(images)
-												fix amp bug

											
										
										
											2022-08-19 13:26:02 +08:00
+								                preds = to_float32(preds)
-												fix eval mode without srn (#1889)

* fix base model

* fix start time
											
										
										
											2021-01-31 22:37:30 +08:00
+								            else:
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                if model_type == "table" or extra_input:
-												fix bug in amp eval

											
										
										
											2022-08-08 19:31:12 +08:00
+								                    preds = model(images, data=batch[1:])
-												polish kie doc and code (#7255)

* add fapiao kie

* fix readme

* fix fanli

* add readme

* add how to do kie en

* add algo kie

* add algo overview en

* rename vqa to kie

* fix read gif
											
										
										
											2022-08-21 10:55:49 +08:00
+								                elif model_type in ["kie"]:
-												fix bug in amp eval

											
										
										
											2022-08-08 19:31:12 +08:00
+								                    preds = model(batch)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                elif model_type in ["can"]:
-												update CAN model

											
										
										
											2022-10-15 20:27:05 +08:00
+								                    preds = model(batch[:3])
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                elif model_type in ["sr"]:
-												Submit SR model (#6933)

* add sr model

* update for eval

* submit sr

* polish code

* polish code

* polish code

* update sr model

* update doc

* update doc

* update doc

* fix typo

* format code

* update metric

* fix export
											
										
										
											2022-08-12 10:49:54 +08:00
+								                    preds = model(batch)
 								                    sr_img = preds["sr_img"]
 								                    lr_img = preds["lr_img"]
-												fix bug in amp eval

											
										
										
											2022-08-08 19:31:12 +08:00
+								                else:
 								                    preds = model(images)
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								            batch_numpy = []
 								            for item in batch:
 								                if isinstance(item, paddle.Tensor):
 								                    batch_numpy.append(item.numpy())
 								                else:
 								                    batch_numpy.append(item)
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								            # Obtain usable results from post-processing methods
 								            total_time += time.time() - start
 								            # Evaluate the results of the current batch
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            if model_type in ["table", "kie"]:
-												polish kie doc and code (#7255)

* add fapiao kie

* fix readme

* fix fanli

* add readme

* add how to do kie en

* add algo kie

* add algo overview en

* rename vqa to kie

* fix read gif
											
										
										
											2022-08-21 10:55:49 +08:00
+								                if post_process_class is None:
 								                    eval_class(preds, batch_numpy)
 								                else:
 								                    post_result = post_process_class(preds, batch_numpy)
 								                    eval_class(post_result, batch_numpy)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            elif model_type in ["sr"]:
-												Submit SR model (#6933)

* add sr model

* update for eval

* submit sr

* polish code

* polish code

* polish code

* update sr model

* update doc

* update doc

* update doc

* fix typo

* format code

* update metric

* fix export
											
										
										
											2022-08-12 10:49:54 +08:00
+								                eval_class(preds, batch_numpy)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            elif model_type in ["can"]:
-												update can data loading method and tipc configs, revert precommit config

											
										
										
											2022-10-17 15:04:42 +08:00
+								                eval_class(preds[0], batch_numpy[2:], epoch_reset=(idx == 0))
-												add train code for table

											
										
										
											2021-06-16 16:47:33 +08:00
+								            else:
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								                post_result = post_process_class(preds, batch_numpy[1])
 								                eval_class(post_result, batch_numpy)
-												fix conflicts

											
										
										
											2021-10-09 17:53:22 +08:00
-												fix bug

											
										
										
											2020-11-06 18:56:53 +08:00
+								            pbar.update(1)
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								            total_frame += len(images)
-												Submit SR model (#6933)

* add sr model

* update for eval

* submit sr

* polish code

* polish code

* polish code

* update sr model

* update doc

* update doc

* update doc

* fix typo

* format code

* update metric

* fix export
											
										
										
											2022-08-12 10:49:54 +08:00
+								            sum_images += 1
-												fix typo

											
										
										
											2021-01-26 15:16:02 +08:00
+								        # Get final metric，eg. acc or hmean
 								        metric = eval_class.get_metric()
-												trans to paddle-rc

											
										
										
											2020-11-05 15:13:36 +08:00
-												fix bug

											
										
										
											2020-11-06 18:56:53 +08:00
+								    pbar.close()
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								    model.train()
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    metric["fps"] = total_frame / total_time
-												fix typo

											
										
										
											2021-01-26 15:16:02 +08:00
+								    return metric
-												fix conflict

											
										
										
											2020-08-15 21:54:59 +08:00
-												update config

											
										
										
											2020-08-15 12:39:07 +08:00
-												update enhanced ctc loss (#4256)

* fix Focal-ctc bug
* add enhanced_ctc_loss.md
											
										
										
											2021-10-12 13:41:47 +08:00
+								def update_center(char_center, post_result, preds):
 								    result, label = post_result
 								    feats, logits = preds
 								    logits = paddle.argmax(logits, axis=-1)
 								    feats = feats.numpy()
 								    logits = logits.numpy()
 								    for idx_sample in range(len(label)):
 								        if result[idx_sample][0] == label[idx_sample][0]:
 								            feat = feats[idx_sample]
 								            logit = logits[idx_sample]
 								            for idx_time in range(len(logit)):
 								                index = logit[idx_time]
 								                if index in char_center.keys():
 								                    char_center[index][0] = (
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								                        char_center[index][0] * char_center[index][1] + feat[idx_time]
 								                    ) / (char_center[index][1] + 1)
-												update enhanced ctc loss (#4256)

* fix Focal-ctc bug
* add enhanced_ctc_loss.md
											
										
										
											2021-10-12 13:41:47 +08:00
+								                    char_center[index][1] += 1
 								                else:
 								                    char_center[index] = [feat[idx_time], 1]
 								    return char_center
 								def get_center(model, eval_dataloader, post_process_class):
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    pbar = tqdm(total=len(eval_dataloader), desc="get center:")
 								    max_iter = (
 								        len(eval_dataloader) - 1
 								        if platform.system() == "Windows"
 								        else len(eval_dataloader)
 								    )
-												update enhanced ctc loss (#4256)

* fix Focal-ctc bug
* add enhanced_ctc_loss.md
											
										
										
											2021-10-12 13:41:47 +08:00
+								    char_center = dict()
 								    for idx, batch in enumerate(eval_dataloader):
 								        if idx >= max_iter:
 								            break
 								        images = batch[0]
 								        start = time.time()
 								        preds = model(images)
 								        batch = [item.numpy() for item in batch]
 								        # Obtain usable results from post-processing methods
 								        post_result = post_process_class(preds, batch[1])
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        # update char_center
-												update enhanced ctc loss (#4256)

* fix Focal-ctc bug
* add enhanced_ctc_loss.md
											
										
										
											2021-10-12 13:41:47 +08:00
+								        char_center = update_center(char_center, post_result, preds)
 								        pbar.update(1)
 								    pbar.close()
 								    for key in char_center.keys():
 								        char_center[key] = char_center[key][0]
 								    return char_center
-												Save configuration files and logs only during training

											
										
										
											2020-12-18 18:51:19 +08:00
+								def preprocess(is_train=False):
-												fix conflict

											
										
										
											2020-08-15 21:54:59 +08:00
+								    FLAGS = ArgsParser().parse_args()
-												add profile

											
										
										
											2021-09-28 10:01:37 +08:00
+								    profiler_options = FLAGS.profiler_options
-												fix conflict

											
										
										
											2020-08-15 21:54:59 +08:00
+								    config = load_config(FLAGS.config)
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								    config = merge_config(config, FLAGS.opt)
-												fix profile_options

											
										
										
											2021-09-29 09:59:43 +08:00
+								    profile_dic = {"profiler_options": FLAGS.profiler_options}
-												vqa code integrated into ppocr training system

											
										
										
											2022-01-05 19:03:45 +08:00
+								    config = merge_config(config, profile_dic)
-												fix conflict

											
										
										
											2020-08-15 21:54:59 +08:00
-												add pse to windows_not_support_list

											
										
										
											2021-09-27 19:43:36 +08:00
+								    if is_train:
 								        # save_config
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        save_model_dir = config["Global"]["save_model_dir"]
-												add pse to windows_not_support_list

											
										
										
											2021-09-27 19:43:36 +08:00
+								        os.makedirs(save_model_dir, exist_ok=True)
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        with open(os.path.join(save_model_dir, "config.yml"), "w") as f:
 								            yaml.dump(dict(config), f, default_flow_style=False, sort_keys=False)
 								        log_file = "{}/train.log".format(save_model_dir)
-												add pse to windows_not_support_list

											
										
										
											2021-09-27 19:43:36 +08:00
+								    else:
 								        log_file = None
-												fix log print twice error (#5743)


											
										
										
											2022-03-22 12:48:50 +08:00
+								    logger = get_logger(log_file=log_file)
-												fix conflict

											
										
										
											2020-08-15 21:54:59 +08:00
 								    # check if set use_gpu=True in paddlepaddle cpu version
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    use_gpu = config["Global"].get("use_gpu", False)
 								    use_xpu = config["Global"].get("use_xpu", False)
 								    use_npu = config["Global"].get("use_npu", False)
 								    use_mlu = config["Global"].get("use_mlu", False)
-												add use_xpu config for det_mv3_db.yml

											
										
										
											2022-02-23 16:31:16 +08:00
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    alg = config["Architecture"]["algorithm"]
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								    assert alg in [
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        "EAST",
 								        "DB",
 								        "SAST",
 								        "Rosetta",
 								        "CRNN",
 								        "STARNet",
 								        "RARE",
 								        "SRN",
 								        "CLS",
 								        "PGNet",
 								        "Distillation",
 								        "NRTR",
 								        "TableAttn",
 								        "SAR",
 								        "PSE",
 								        "SEED",
 								        "SDMGR",
 								        "LayoutXLM",
 								        "LayoutLM",
 								        "LayoutLMv2",
 								        "PREN",
 								        "FCE",
 								        "SVTR",
 								        "SVTR_LCNet",
 								        "ViTSTR",
 								        "ABINet",
 								        "DB++",
 								        "TableMaster",
 								        "SPIN",
 								        "VisionLAN",
 								        "Gestalt",
 								        "SLANet",
 								        "RobustScanner",
 								        "CT",
 								        "RFL",
 								        "DRRG",
 								        "CAN",
 								        "Telescope",
 								        "SATRN",
 								        "SVTR_HGNet",
 								        "ParseQ",
 								        "CPPD",
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								    ]
-												fix conflict

											
										
										
											2020-08-15 21:54:59 +08:00
-												add use_xpu config for det_mv3_db.yml

											
										
										
											2022-02-23 16:31:16 +08:00
+								    if use_xpu:
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        device = "xpu:{0}".format(os.getenv("FLAGS_selected_xpus", 0))
-												[TIPC] add scripts for NPU and XPU, test=develop

											
										
										
											2022-09-13 14:11:21 +08:00
+								    elif use_npu:
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        device = "npu:{0}".format(os.getenv("FLAGS_selected_npus", 0))
-												[MLU]adapt mlu device for running dbnet network

											
										
										
											2022-10-08 16:46:41 +08:00
+								    elif use_mlu:
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        device = "mlu:{0}".format(os.getenv("FLAGS_selected_mlus", 0))
-												Cherry pick  to dygraph (#6383)

* add starnet doc & test=document_fix

* add starnet doc & test=document_fix

* Update algorithm_rec_starnet.md

* support xpu
											
										
										
											2022-05-23 23:38:33 +08:00
+								    else:
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        device = "gpu:{}".format(dist.ParallelEnv().dev_id) if use_gpu else "cpu"
-												[MLU]adapt mlu device for running dbnet network

											
										
										
											2022-10-08 16:46:41 +08:00
+								    check_device(use_gpu, use_xpu, use_npu, use_mlu)
-												Cherry pick  to dygraph (#6383)

* add starnet doc & test=document_fix

* add starnet doc & test=document_fix

* Update algorithm_rec_starnet.md

* support xpu
											
										
										
											2022-05-23 23:38:33 +08:00
-												dygraph first commit

											
										
										
											2020-10-13 17:13:33 +08:00
+								    device = paddle.set_device(device)
-												trans to paddle-rc

											
										
										
											2020-11-05 15:13:36 +08:00
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    config["Global"]["distributed"] = dist.get_world_size() != 1
-												add pse to windows_not_support_list

											
										
										
											2021-09-27 19:43:36 +08:00
-												Added functionality to use multiple loggers simultaneously and addded the english documentation on how to use them

											
										
										
											2022-04-15 14:44:22 +08:00
+								    loggers = []
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    if "use_visualdl" in config["Global"] and config["Global"]["use_visualdl"]:
-												【OCR Issue No.9】以可选形式支持Visualdl (#11947)

* delete visual dl

* totally delete visual

* delete vdl file

* fix codestyle
											
										
										
											2024-04-25 17:37:27 +08:00
+								        logger.warning(
 								            "You are using VisualDL, the VisualDL is deprecated and "
 								            "removed in ppocr!"
 								        )
 								        log_writer = None
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    if (
 								        "use_wandb" in config["Global"] and config["Global"]["use_wandb"]
 								    ) or "wandb" in config:
 								        save_dir = config["Global"]["save_model_dir"]
-												Integration of the WandbLogger with the latest changes in the PaddleOCR integrations

											
										
										
											2022-04-12 23:47:54 +08:00
+								        wandb_writer_path = "{}/wandb".format(save_dir)
 								        if "wandb" in config:
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								            wandb_params = config["wandb"]
-												Integration of the WandbLogger with the latest changes in the PaddleOCR integrations

											
										
										
											2022-04-12 23:47:54 +08:00
+								        else:
 								            wandb_params = dict()
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								        wandb_params.update({"save_dir": save_dir})
-												Integration of the WandbLogger with the latest changes in the PaddleOCR integrations

											
										
										
											2022-04-12 23:47:54 +08:00
+								        log_writer = WandbLogger(**wandb_params, config=config)
-												Added functionality to use multiple loggers simultaneously and addded the english documentation on how to use them

											
										
										
											2022-04-15 14:44:22 +08:00
+								        loggers.append(log_writer)
-												updata structure of dygraph

											
										
										
											2020-11-04 20:43:27 +08:00
+								    else:
-												Integration of the WandbLogger with the latest changes in the PaddleOCR integrations

											
										
										
											2022-04-12 23:47:54 +08:00
+								        log_writer = None
-												updata structure of dygraph

											
										
										
											2020-11-04 20:43:27 +08:00
+								    print_dict(config, logger)
-												Added functionality to use multiple loggers simultaneously and addded the english documentation on how to use them

											
										
										
											2022-04-15 14:44:22 +08:00
 								    if loggers:
 								        log_writer = Loggers(loggers)
 								    else:
 								        log_writer = None
-												add pre-commit workflow (#11973)

* add pre-commit workflow

* run 'pre-commit run --all-files'

* setup python version
											
										
										
											2024-04-21 21:46:20 +08:00
+								    logger.info("train with paddle {} and device {}".format(paddle.__version__, device))
-												Integration of the WandbLogger with the latest changes in the PaddleOCR integrations

											
										
										
											2022-04-12 23:47:54 +08:00
+								    return config, device, logger, log_writer