PaddleClas/ppcls/engine/evaluation/classification.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import platform
import paddle

from ...utils.misc import AverageMeter
from ...utils import logger
from ...data import build_dataloader
from ...loss import build_loss
from ...metric import build_metrics


class ClassEval(object):
    def __init__(self, config, mode, model):
        self.config = config
        self.model = model
        self.use_dali = self.config["Global"].get("use_dali", False)
        self.eval_metric_func = build_metrics(config, "eval")
        self.eval_dataloader = build_dataloader(config, "eval")
        self.eval_loss_func = build_loss(config, "eval")
        self.output_info = dict()

    @paddle.no_grad()
    def __call__(self, epoch_id=0):
        self.model.eval()

        if hasattr(self.eval_metric_func, "reset"):
            self.eval_metric_func.reset()

        time_info = {
            "batch_cost": AverageMeter(
                "batch_cost", '.5f', postfix=" s,"),
            "reader_cost": AverageMeter(
                "reader_cost", ".5f", postfix=" s,"),
        }
        print_batch_step = self.config["Global"]["print_batch_step"]

        tic = time.time()
        total_samples = self.eval_dataloader["Eval"].total_samples
        accum_samples = 0
        max_iter = self.eval_dataloader["Eval"].max_iter
        for iter_id, batch in enumerate(self.eval_dataloader["Eval"]):
            if iter_id >= max_iter:
                break
            if iter_id == 5:
                for key in time_info:
                    time_info[key].reset()

            time_info["reader_cost"].update(time.time() - tic)
            batch_size = batch[0].shape[0]
            batch[0] = paddle.to_tensor(batch[0])
            if not self.config["Global"].get("use_multilabel", False):
                batch[1] = batch[1].reshape([-1, 1]).astype("int64")

            # image input
            # if engine.amp and engine.amp_eval:
            #     with paddle.amp.auto_cast(
            #             custom_black_list={
            #                 "flatten_contiguous_range", "greater_than"
            #             },
            #             level=engine.amp_level):
            #         out = engine.model(batch)
            # else:
            #     out = self.model(batch)
            out = self.model(batch)

            # just for DistributedBatchSampler issue: repeat sampling
            current_samples = batch_size * paddle.distributed.get_world_size()
            accum_samples += current_samples

            if isinstance(out, dict) and "Student" in out:
                out = out["Student"]
            if isinstance(out, dict) and "logits" in out:
                out = out["logits"]

            # gather Tensor when distributed
            if paddle.distributed.get_world_size() > 1:
                label_list = []
                device_id = paddle.distributed.ParallelEnv().device_id
                label = batch[1].cuda(device_id) if self.config["Global"][
                    "device"] == "gpu" else batch[1]
                paddle.distributed.all_gather(label_list, label)
                labels = paddle.concat(label_list, 0)

                if isinstance(out, list):
                    preds = []
                    for x in out:
                        pred_list = []
                        paddle.distributed.all_gather(pred_list, x)
                        pred_x = paddle.concat(pred_list, 0)
                        preds.append(pred_x)
                else:
                    pred_list = []
                    paddle.distributed.all_gather(pred_list, out)
                    preds = paddle.concat(pred_list, 0)

                if accum_samples > total_samples and not self.use_dali:
                    if isinstance(preds, list):
                        preds = [
                            pred[:total_samples + current_samples -
                                 accum_samples] for pred in preds
                        ]
                    else:
                        preds = preds[:total_samples + current_samples -
                                      accum_samples]
                    labels = labels[:total_samples + current_samples -
                                    accum_samples]
                    current_samples = total_samples + current_samples - accum_samples
            else:
                labels = batch[1]
                preds = out

            # calc loss
            if self.eval_loss_func is not None:
                # if self.amp and self.amp_eval:
                #     with paddle.amp.auto_cast(
                #             custom_black_list={
                #                 "flatten_contiguous_range", "greater_than"
                #             },
                #             level=engine.amp_level):
                #         loss_dict = engine.eval_loss_func(preds, labels)
                # else:
                loss_dict = self.eval_loss_func(preds, labels)

                for key in loss_dict:
                    if key not in self.output_info:
                        self.output_info[key] = AverageMeter(key, '7.5f')
                    self.output_info[key].update(
                        float(loss_dict[key]), current_samples)

            #  calc metric
            if self.eval_metric_func is not None:
                self.eval_metric_func(preds, labels)
            time_info["batch_cost"].update(time.time() - tic)

            if iter_id % print_batch_step == 0:
                time_msg = "s, ".join([
                    "{}: {:.5f}".format(key, time_info[key].avg)
                    for key in time_info
                ])

                ips_msg = "ips: {:.5f} images/sec".format(
                    batch_size / time_info["batch_cost"].avg)

                if "ATTRMetric" in self.config["Metric"]["Eval"][0]:
                    metric_msg = ""
                else:
                    metric_msg = ", ".join([
                        "{}: {:.5f}".format(key, self.output_info[key].val)
                        for key in self.output_info
                    ])
                    metric_msg += ", {}".format(self.eval_metric_func.avg_info)
                logger.info("[Eval][Epoch {}][Iter: {}/{}]{}, {}, {}".format(
                    epoch_id, iter_id, max_iter, metric_msg, time_msg,
                    ips_msg))

            tic = time.time()
        if self.use_dali:
            self.eval_dataloader["Eval"].reset()

        if "ATTRMetric" in self.config["Metric"]["Eval"][0]:
            metric_msg = ", ".join([
                "evalres: ma: {:.5f} label_f1: {:.5f} label_pos_recall: {:.5f} label_neg_recall: {:.5f} instance_f1: {:.5f} instance_acc: {:.5f} instance_prec: {:.5f} instance_recall: {:.5f}".
                format(*self.eval_metric_func.attr_res())
            ])
            logger.info("[Eval][Epoch {}][Avg]{}".format(epoch_id, metric_msg))

            # do not try to save best eval.model
            if self.eval_metric_func is None:
                return -1
            # return 1st metric in the dict
            return self.eval_metric_func.attr_res()[0]
        else:
            metric_msg = ", ".join([
                "{}: {:.5f}".format(key, self.output_info[key].avg)
                for key in self.output_info
            ])
            metric_msg += ", {}".format(self.eval_metric_func.avg_info)
            logger.info("[Eval][Epoch {}][Avg]{}".format(epoch_id, metric_msg))

            # do not try to save best eval.model
            if self.eval_metric_func is None:
                return -1
            # return 1st metric in the dict
            return self.eval_metric_func.avg
        self.model.train()
        return eval_result
refactor trainer 2021-08-22 15:10:23 +00:00			`# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`from __future__ import absolute_import`
			`from __future__ import division`
			`from __future__ import print_function`
			`import time`
			`import platform`
			`import paddle`

refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00			`from ...utils.misc import AverageMeter`
			`from ...utils import logger`
			`from ...data import build_dataloader`
			`from ...loss import build_loss`
			`from ...metric import build_metrics`


			`class ClassEval(object):`
			`def __init__(self, config, mode, model):`
			`self.config = config`
			`self.model = model`
			`self.use_dali = self.config["Global"].get("use_dali", False)`
Revert "refactor" This reverts commit 32593b63751b922e17b59384ed64654e6fcef42d. 2023-03-14 16:16:40 +08:00			`self.eval_metric_func = build_metrics(config, "eval")`
			`self.eval_dataloader = build_dataloader(config, "eval")`
			`self.eval_loss_func = build_loss(config, "eval")`
refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00			`self.output_info = dict()`

			`@paddle.no_grad()`
			`def __call__(self, epoch_id=0):`
			`self.model.eval()`

			`if hasattr(self.eval_metric_func, "reset"):`
			`self.eval_metric_func.reset()`

			`time_info = {`
			`"batch_cost": AverageMeter(`
			`"batch_cost", '.5f', postfix=" s,"),`
			`"reader_cost": AverageMeter(`
			`"reader_cost", ".5f", postfix=" s,"),`
			`}`
Revert "refactor" This reverts commit 32593b63751b922e17b59384ed64654e6fcef42d. 2023-03-14 16:16:40 +08:00			`print_batch_step = self.config["Global"]["print_batch_step"]`
refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00
			`tic = time.time()`
Revert "refactor" This reverts commit 32593b63751b922e17b59384ed64654e6fcef42d. 2023-03-14 16:16:40 +08:00			`total_samples = self.eval_dataloader["Eval"].total_samples`
refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00			`accum_samples = 0`
Revert "refactor" This reverts commit 32593b63751b922e17b59384ed64654e6fcef42d. 2023-03-14 16:16:40 +08:00			`max_iter = self.eval_dataloader["Eval"].max_iter`
			`for iter_id, batch in enumerate(self.eval_dataloader["Eval"]):`
refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00			`if iter_id >= max_iter:`
			`break`
			`if iter_id == 5:`
			`for key in time_info:`
			`time_info[key].reset()`

			`time_info["reader_cost"].update(time.time() - tic)`
			`batch_size = batch[0].shape[0]`
			`batch[0] = paddle.to_tensor(batch[0])`
			`if not self.config["Global"].get("use_multilabel", False):`
			`batch[1] = batch[1].reshape([-1, 1]).astype("int64")`

Revert "rm amp code from train and eval & use decorator for amp training" This reverts commit d3941dc1e9628fa7cc83de7c3a6da3dfcd03b5de. 2023-03-14 16:16:40 +08:00			`# image input`
			`# if engine.amp and engine.amp_eval:`
			`# with paddle.amp.auto_cast(`
			`# custom_black_list={`
			`# "flatten_contiguous_range", "greater_than"`
			`# },`
			`# level=engine.amp_level):`
			`# out = engine.model(batch)`
			`# else:`
			`# out = self.model(batch)`
Revert "revert for running" This reverts commit 392b75b1acac742b74e808353059d0281df26dcc. 2023-03-14 16:16:40 +08:00			`out = self.model(batch)`
refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00
			`# just for DistributedBatchSampler issue: repeat sampling`
			`current_samples = batch_size * paddle.distributed.get_world_size()`
			`accum_samples += current_samples`

			`if isinstance(out, dict) and "Student" in out:`
			`out = out["Student"]`
			`if isinstance(out, dict) and "logits" in out:`
			`out = out["logits"]`

			`# gather Tensor when distributed`
			`if paddle.distributed.get_world_size() > 1:`
			`label_list = []`
			`device_id = paddle.distributed.ParallelEnv().device_id`
			`label = batch[1].cuda(device_id) if self.config["Global"][`
			`"device"] == "gpu" else batch[1]`
			`paddle.distributed.all_gather(label_list, label)`
			`labels = paddle.concat(label_list, 0)`

			`if isinstance(out, list):`
			`preds = []`
			`for x in out:`
			`pred_list = []`
			`paddle.distributed.all_gather(pred_list, x)`
			`pred_x = paddle.concat(pred_list, 0)`
			`preds.append(pred_x)`
fix classification bug 2022-07-06 20:38:40 +08:00			`else:`
refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00			`pred_list = []`
			`paddle.distributed.all_gather(pred_list, out)`
			`preds = paddle.concat(pred_list, 0)`

			`if accum_samples > total_samples and not self.use_dali:`
			`if isinstance(preds, list):`
			`preds = [`
			`pred[:total_samples + current_samples -`
			`accum_samples] for pred in preds`
			`]`
			`else:`
			`preds = preds[:total_samples + current_samples -`
			`accum_samples]`
			`labels = labels[:total_samples + current_samples -`
			`accum_samples]`
			`current_samples = total_samples + current_samples - accum_samples`
fix clas distributed eval bug 2021-10-20 11:22:37 +00:00			`else:`
refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00			`labels = batch[1]`
			`preds = out`

			`# calc loss`
			`if self.eval_loss_func is not None:`
Revert "rm amp code from train and eval & use decorator for amp training" This reverts commit d3941dc1e9628fa7cc83de7c3a6da3dfcd03b5de. 2023-03-14 16:16:40 +08:00			`# if self.amp and self.amp_eval:`
			`# with paddle.amp.auto_cast(`
			`# custom_black_list={`
			`# "flatten_contiguous_range", "greater_than"`
			`# },`
			`# level=engine.amp_level):`
			`# loss_dict = engine.eval_loss_func(preds, labels)`
			`# else:`
refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00			`loss_dict = self.eval_loss_func(preds, labels)`

			`for key in loss_dict:`
			`if key not in self.output_info:`
			`self.output_info[key] = AverageMeter(key, '7.5f')`
			`self.output_info[key].update(`
			`float(loss_dict[key]), current_samples)`

			`# calc metric`
			`if self.eval_metric_func is not None:`
			`self.eval_metric_func(preds, labels)`
			`time_info["batch_cost"].update(time.time() - tic)`

Revert "refactor" This reverts commit 32593b63751b922e17b59384ed64654e6fcef42d. 2023-03-14 16:16:40 +08:00			`if iter_id % print_batch_step == 0:`
refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00			`time_msg = "s, ".join([`
			`"{}: {:.5f}".format(key, time_info[key].avg)`
			`for key in time_info`
			`])`
refactor trainer 2021-08-22 15:10:23 +00:00
refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00			`ips_msg = "ips: {:.5f} images/sec".format(`
			`batch_size / time_info["batch_cost"].avg)`
refactor trainer 2021-08-22 15:10:23 +00:00
refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00			`if "ATTRMetric" in self.config["Metric"]["Eval"][0]:`
			`metric_msg = ""`
			`else:`
			`metric_msg = ", ".join([`
			`"{}: {:.5f}".format(key, self.output_info[key].val)`
			`for key in self.output_info`
			`])`
			`metric_msg += ", {}".format(self.eval_metric_func.avg_info)`
			`logger.info("[Eval][Epoch {}][Iter: {}/{}]{}, {}, {}".format(`
			`epoch_id, iter_id, max_iter, metric_msg, time_msg,`
			`ips_msg))`

			`tic = time.time()`
			`if self.use_dali:`
Revert "refactor" This reverts commit 32593b63751b922e17b59384ed64654e6fcef42d. 2023-03-14 16:16:40 +08:00			`self.eval_dataloader["Eval"].reset()`
refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00
			`if "ATTRMetric" in self.config["Metric"]["Eval"][0]:`
			`metric_msg = ", ".join([`
			`"evalres: ma: {:.5f} label_f1: {:.5f} label_pos_recall: {:.5f} label_neg_recall: {:.5f} instance_f1: {:.5f} instance_acc: {:.5f} instance_prec: {:.5f} instance_recall: {:.5f}".`
			`format(*self.eval_metric_func.attr_res())`
			`])`
			`logger.info("[Eval][Epoch {}][Avg]{}".format(epoch_id, metric_msg))`
refactor trainer 2021-08-22 15:10:23 +00:00
refactor: rm train and eval from engine 2023-03-05 12:53:36 +00:00			`# do not try to save best eval.model`
			`if self.eval_metric_func is None:`
			`return -1`
			`# return 1st metric in the dict`
			`return self.eval_metric_func.attr_res()[0]`
			`else:`
			`metric_msg = ", ".join([`
			`"{}: {:.5f}".format(key, self.output_info[key].avg)`
			`for key in self.output_info`
			`])`
			`metric_msg += ", {}".format(self.eval_metric_func.avg_info)`
			`logger.info("[Eval][Epoch {}][Avg]{}".format(epoch_id, metric_msg))`

			`# do not try to save best eval.model`
			`if self.eval_metric_func is None:`
			`return -1`
			`# return 1st metric in the dict`
			`return self.eval_metric_func.avg`
			`self.model.train()`
			`return eval_result`