fix quant logic (#1941)
* fix quant logic * add support for trt+int8 inference * add support for swin quant * fix swin and quant * fix assert info * fix assert info * fix logpull/1922/head^2
parent
821e550900
commit
6dac07f68d
|
@ -49,10 +49,15 @@ class ClsPredictor(Predictor):
|
|||
pid = os.getpid()
|
||||
size = config["PreProcess"]["transform_ops"][1]["CropImage"][
|
||||
"size"]
|
||||
if config["Global"].get("use_int8", False):
|
||||
precision = "int8"
|
||||
elif config["Global"].get("use_fp16", False):
|
||||
precision = "fp16"
|
||||
else:
|
||||
precision = "fp32"
|
||||
self.auto_logger = auto_log.AutoLogger(
|
||||
model_name=config["Global"].get("model_name", "cls"),
|
||||
model_precision='fp16'
|
||||
if config["Global"]["use_fp16"] else 'fp32',
|
||||
model_precision=precision,
|
||||
batch_size=config["Global"].get("batch_size", 1),
|
||||
data_shape=[3, size, size],
|
||||
save_path=config["Global"].get("save_log_path",
|
||||
|
|
|
@ -42,8 +42,22 @@ class Predictor(object):
|
|||
def create_paddle_predictor(self, args, inference_model_dir=None):
|
||||
if inference_model_dir is None:
|
||||
inference_model_dir = args.inference_model_dir
|
||||
params_file = os.path.join(inference_model_dir, "inference.pdiparams")
|
||||
model_file = os.path.join(inference_model_dir, "inference.pdmodel")
|
||||
if "inference_int8.pdiparams" in os.listdir(inference_model_dir):
|
||||
params_file = os.path.join(inference_model_dir,
|
||||
"inference_int8.pdiparams")
|
||||
model_file = os.path.join(inference_model_dir,
|
||||
"inference_int8.pdmodel")
|
||||
assert args.get(
|
||||
"use_fp16", False
|
||||
) is False, "fp16 mode is not supported for int8 model inference, please set use_fp16 as False during inference."
|
||||
else:
|
||||
params_file = os.path.join(inference_model_dir,
|
||||
"inference.pdiparams")
|
||||
model_file = os.path.join(inference_model_dir, "inference.pdmodel")
|
||||
assert args.get(
|
||||
"use_int8", False
|
||||
) is False, "int8 mode is not supported for fp32 model inference, please set use_int8 as False during inference."
|
||||
|
||||
config = Config(model_file, params_file)
|
||||
|
||||
if args.use_gpu:
|
||||
|
@ -63,12 +77,18 @@ class Predictor(object):
|
|||
config.disable_glog_info()
|
||||
config.switch_ir_optim(args.ir_optim) # default true
|
||||
if args.use_tensorrt:
|
||||
precision = Config.Precision.Float32
|
||||
if args.get("use_int8", False):
|
||||
precision = Config.Precision.Int8
|
||||
elif args.get("use_fp16", False):
|
||||
precision = Config.Precision.Half
|
||||
|
||||
config.enable_tensorrt_engine(
|
||||
precision_mode=Config.Precision.Half
|
||||
if args.use_fp16 else Config.Precision.Float32,
|
||||
precision_mode=precision,
|
||||
max_batch_size=args.batch_size,
|
||||
workspace_size=1 << 30,
|
||||
min_subgraph_size=30)
|
||||
min_subgraph_size=30,
|
||||
use_calib_mode=False)
|
||||
|
||||
config.enable_memory_optim()
|
||||
# use zero copy
|
||||
|
|
|
@ -33,7 +33,7 @@ from ppcls.arch.distill.afd_attention import LinearTransformStudent, LinearTrans
|
|||
__all__ = ["build_model", "RecModel", "DistillationModel", "AttentionModel"]
|
||||
|
||||
|
||||
def build_model(config):
|
||||
def build_model(config, mode="train"):
|
||||
arch_config = copy.deepcopy(config["Arch"])
|
||||
model_type = arch_config.pop("name")
|
||||
use_sync_bn = arch_config.pop("use_sync_bn", False)
|
||||
|
@ -44,7 +44,7 @@ def build_model(config):
|
|||
|
||||
if isinstance(arch, TheseusLayer):
|
||||
prune_model(config, arch)
|
||||
quantize_model(config, arch)
|
||||
quantize_model(config, arch, mode)
|
||||
|
||||
logger.info("The FLOPs and Params of Arch:")
|
||||
try:
|
||||
|
|
|
@ -52,7 +52,7 @@ from ppcls.arch.backbone.model_zoo.darknet import DarkNet53
|
|||
from ppcls.arch.backbone.model_zoo.regnet import RegNetX_200MF, RegNetX_4GF, RegNetX_32GF, RegNetY_200MF, RegNetY_4GF, RegNetY_32GF
|
||||
from ppcls.arch.backbone.model_zoo.vision_transformer import ViT_small_patch16_224, ViT_base_patch16_224, ViT_base_patch16_384, ViT_base_patch32_384, ViT_large_patch16_224, ViT_large_patch16_384, ViT_large_patch32_384
|
||||
from ppcls.arch.backbone.model_zoo.distilled_vision_transformer import DeiT_tiny_patch16_224, DeiT_small_patch16_224, DeiT_base_patch16_224, DeiT_tiny_distilled_patch16_224, DeiT_small_distilled_patch16_224, DeiT_base_distilled_patch16_224, DeiT_base_patch16_384, DeiT_base_distilled_patch16_384
|
||||
from ppcls.arch.backbone.model_zoo.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384
|
||||
from ppcls.arch.backbone.legendary_models.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384
|
||||
from ppcls.arch.backbone.model_zoo.cswin_transformer import CSWinTransformer_tiny_224, CSWinTransformer_small_224, CSWinTransformer_base_224, CSWinTransformer_large_224, CSWinTransformer_base_384, CSWinTransformer_large_384
|
||||
from ppcls.arch.backbone.model_zoo.mixnet import MixNet_S, MixNet_M, MixNet_L
|
||||
from ppcls.arch.backbone.model_zoo.rexnet import ReXNet_1_0, ReXNet_1_3, ReXNet_1_5, ReXNet_2_0, ReXNet_3_0
|
||||
|
|
|
@ -21,8 +21,8 @@ import paddle.nn as nn
|
|||
import paddle.nn.functional as F
|
||||
from paddle.nn.initializer import TruncatedNormal, Constant
|
||||
|
||||
from .vision_transformer import trunc_normal_, zeros_, ones_, to_2tuple, DropPath, Identity
|
||||
|
||||
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
|
||||
from ppcls.arch.backbone.model_zoo.vision_transformer import trunc_normal_, zeros_, ones_, to_2tuple, DropPath, Identity
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
|
@ -589,7 +589,7 @@ class PatchEmbed(nn.Layer):
|
|||
return flops
|
||||
|
||||
|
||||
class SwinTransformer(nn.Layer):
|
||||
class SwinTransformer(TheseusLayer):
|
||||
""" Swin Transformer
|
||||
A PaddlePaddle impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows` -
|
||||
https://arxiv.org/pdf/2103.14030
|
|
@ -40,12 +40,14 @@ QUANT_CONFIG = {
|
|||
}
|
||||
|
||||
|
||||
def quantize_model(config, model):
|
||||
def quantize_model(config, model, mode="train"):
|
||||
if config.get("Slim", False) and config["Slim"].get("quant", False):
|
||||
from paddleslim.dygraph.quant import QAT
|
||||
assert config["Slim"]["quant"]["name"].lower(
|
||||
) == 'pact', 'Only PACT quantization method is supported now'
|
||||
QUANT_CONFIG["activation_preprocess_type"] = "PACT"
|
||||
if mode in ["infer", "export"]:
|
||||
QUANT_CONFIG['activation_preprocess_type'] = None
|
||||
model.quanter = QAT(config=QUANT_CONFIG)
|
||||
model.quanter.quantize(model)
|
||||
logger.info("QAT model summary:")
|
||||
|
|
|
@ -189,7 +189,7 @@ class Engine(object):
|
|||
self.eval_metric_func = None
|
||||
|
||||
# build model
|
||||
self.model = build_model(self.config)
|
||||
self.model = build_model(self.config, self.mode)
|
||||
# set @to_static for benchmark, skip this by default.
|
||||
apply_to_static(self.config, self.model)
|
||||
|
||||
|
@ -472,23 +472,19 @@ class Engine(object):
|
|||
|
||||
save_path = os.path.join(self.config["Global"]["save_inference_dir"],
|
||||
"inference")
|
||||
if model.quanter:
|
||||
model.quanter.save_quantized_model(
|
||||
model.base_model,
|
||||
save_path,
|
||||
input_spec=[
|
||||
paddle.static.InputSpec(
|
||||
shape=[None] + self.config["Global"]["image_shape"],
|
||||
dtype='float32')
|
||||
])
|
||||
|
||||
model = paddle.jit.to_static(
|
||||
model,
|
||||
input_spec=[
|
||||
paddle.static.InputSpec(
|
||||
shape=[None] + self.config["Global"]["image_shape"],
|
||||
dtype='float32')
|
||||
])
|
||||
if hasattr(model.base_model,
|
||||
"quanter") and model.base_model.quanter is not None:
|
||||
model.base_model.quanter.save_quantized_model(model,
|
||||
save_path + "_int8")
|
||||
else:
|
||||
model = paddle.jit.to_static(
|
||||
model,
|
||||
input_spec=[
|
||||
paddle.static.InputSpec(
|
||||
shape=[None] + self.config["Global"]["image_shape"],
|
||||
dtype='float32')
|
||||
])
|
||||
paddle.jit.save(model, save_path)
|
||||
logger.info(
|
||||
f"Export succeeded! The inference model exported has been saved in \"{self.config['Global']['save_inference_dir']}\"."
|
||||
|
|
Loading…
Reference in New Issue