mirror of
https://github.com/PaddlePaddle/PaddleOCR.git
synced 2025-06-03 21:53:39 +08:00
update common pre-commit configs and commit the results of running pre-commit run -a (#12516)
This commit is contained in:
parent
6e7a1b871d
commit
24f06d1a1b
@ -1,26 +1,22 @@
|
|||||||
repos:
|
repos:
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
rev: a11d9314b22d8f8c7556443875b731ef05965464
|
rev: v4.6.0
|
||||||
hooks:
|
hooks:
|
||||||
|
- id: check-added-large-files
|
||||||
|
args: ['--maxkb=512']
|
||||||
|
- id: check-case-conflict
|
||||||
- id: check-merge-conflict
|
- id: check-merge-conflict
|
||||||
- id: check-symlinks
|
- id: check-symlinks
|
||||||
- id: detect-private-key
|
- id: detect-private-key
|
||||||
files: (?!.*paddle)^.*$
|
|
||||||
- id: end-of-file-fixer
|
- id: end-of-file-fixer
|
||||||
files: \.md$
|
|
||||||
- id: trailing-whitespace
|
- id: trailing-whitespace
|
||||||
files: \.md$
|
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|py|md)$
|
||||||
- repo: https://github.com/Lucas-C/pre-commit-hooks
|
- repo: https://github.com/Lucas-C/pre-commit-hooks
|
||||||
rev: v1.0.1
|
rev: v1.5.1
|
||||||
hooks:
|
hooks:
|
||||||
- id: forbid-crlf
|
|
||||||
files: \.md$
|
|
||||||
- id: remove-crlf
|
- id: remove-crlf
|
||||||
files: \.md$
|
|
||||||
- id: forbid-tabs
|
|
||||||
files: \.md$
|
|
||||||
- id: remove-tabs
|
- id: remove-tabs
|
||||||
files: \.md$
|
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|py|md)$
|
||||||
- repo: local
|
- repo: local
|
||||||
hooks:
|
hooks:
|
||||||
- id: clang-format
|
- id: clang-format
|
||||||
@ -31,7 +27,7 @@ repos:
|
|||||||
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
|
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
|
||||||
# For Python files
|
# For Python files
|
||||||
- repo: https://github.com/psf/black.git
|
- repo: https://github.com/psf/black.git
|
||||||
rev: 23.3.0
|
rev: 24.4.2
|
||||||
hooks:
|
hooks:
|
||||||
- id: black
|
- id: black
|
||||||
files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
|
files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
|
||||||
@ -47,4 +43,3 @@ repos:
|
|||||||
- --show-source
|
- --show-source
|
||||||
- --statistics
|
- --statistics
|
||||||
exclude: ^benchmark/|^test_tipc/
|
exclude: ^benchmark/|^test_tipc/
|
||||||
|
|
||||||
|
@ -546,7 +546,7 @@ python3 tools/infer/predict_system.py \
|
|||||||
--use_gpu=True
|
--use_gpu=True
|
||||||
```
|
```
|
||||||
|
|
||||||
得到保存结果,文本检测识别可视化图保存在`det_rec_infer/`目录下,预测结果保存在`det_rec_infer/system_results.txt`中,格式如下:`0018.jpg [{"transcription": "E295", "points": [[88, 33], [137, 33], [137, 40], [88, 40]]}]`
|
得到保存结果,文本检测识别可视化图保存在`det_rec_infer/`目录下,预测结果保存在`det_rec_infer/system_results.txt`中,格式如下:`0018.jpg [{"transcription": "E295", "points": [[88, 33], [137, 33], [137, 40], [88, 40]]}]`
|
||||||
|
|
||||||
2)然后将步骤一保存的数据转换为端对端评测需要的数据格式: 修改 `tools/end2end/convert_ppocr_label.py`中的代码,convert_label函数中设置输入标签路径,Mode,保存标签路径等,对预测数据的GTlabel和预测结果的label格式进行转换。
|
2)然后将步骤一保存的数据转换为端对端评测需要的数据格式: 修改 `tools/end2end/convert_ppocr_label.py`中的代码,convert_label函数中设置输入标签路径,Mode,保存标签路径等,对预测数据的GTlabel和预测结果的label格式进行转换。
|
||||||
```
|
```
|
||||||
|
@ -456,7 +456,7 @@ display(HTML('<html><body><table><tr><td colspan="5">alleadersh</td><td rowspan=
|
|||||||
|
|
||||||
预测结果如下:
|
预测结果如下:
|
||||||
```
|
```
|
||||||
val_9.jpg: {'attributes': ['Scanned', 'Little', 'Black-and-White', 'Clear', 'Without-Obstacles', 'Horizontal'], 'output': [1, 1, 1, 1, 1, 1]}
|
val_9.jpg: {'attributes': ['Scanned', 'Little', 'Black-and-White', 'Clear', 'Without-Obstacles', 'Horizontal'], 'output': [1, 1, 1, 1, 1, 1]}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@ -466,7 +466,7 @@ val_9.jpg: {'attributes': ['Scanned', 'Little', 'Black-and-White', 'Clear', 'Wi
|
|||||||
|
|
||||||
预测结果如下:
|
预测结果如下:
|
||||||
```
|
```
|
||||||
val_3253.jpg: {'attributes': ['Photo', 'Little', 'Black-and-White', 'Blurry', 'Without-Obstacles', 'Tilted'], 'output': [0, 1, 1, 0, 1, 0]}
|
val_3253.jpg: {'attributes': ['Photo', 'Little', 'Black-and-White', 'Blurry', 'Without-Obstacles', 'Tilted'], 'output': [0, 1, 1, 0, 1, 0]}
|
||||||
```
|
```
|
||||||
|
|
||||||
对比两张图片可以发现,第一张图片比较清晰,表格属性的结果也偏向于比较容易识别,我们可以更相信表格识别的结果,第二张图片比较模糊,且存在倾斜现象,表格识别可能存在错误,需要我们人工进一步校验。通过表格的属性识别能力,可以进一步将“人工”和“智能”很好的结合起来,为表格识别能力的落地的精度提供保障。
|
对比两张图片可以发现,第一张图片比较清晰,表格属性的结果也偏向于比较容易识别,我们可以更相信表格识别的结果,第二张图片比较模糊,且存在倾斜现象,表格识别可能存在错误,需要我们人工进一步校验。通过表格的属性识别能力,可以进一步将“人工”和“智能”很好的结合起来,为表格识别能力的落地的精度提供保障。
|
||||||
|
@ -110,12 +110,12 @@ tar -xf XFUND.tar
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
/home/aistudio/PaddleOCR/ppstructure/vqa/XFUND
|
/home/aistudio/PaddleOCR/ppstructure/vqa/XFUND
|
||||||
└─ zh_train/ 训练集
|
└─ zh_train/ 训练集
|
||||||
├── image/ 图片存放文件夹
|
├── image/ 图片存放文件夹
|
||||||
├── xfun_normalize_train.json 标注信息
|
├── xfun_normalize_train.json 标注信息
|
||||||
└─ zh_val/ 验证集
|
└─ zh_val/ 验证集
|
||||||
├── image/ 图片存放文件夹
|
├── image/ 图片存放文件夹
|
||||||
├── xfun_normalize_val.json 标注信息
|
├── xfun_normalize_val.json 标注信息
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -805,7 +805,7 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/infer_vqa_token_ser_re.py \
|
|||||||
最终会在config.Global.save_res_path字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为infer_results.txt, 每一行表示一张图片的结果,每张图片的结果如下所示,前面表示测试图片路径,后面为测试结果:key字段及对应的value字段。
|
最终会在config.Global.save_res_path字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为infer_results.txt, 每一行表示一张图片的结果,每张图片的结果如下所示,前面表示测试图片路径,后面为测试结果:key字段及对应的value字段。
|
||||||
|
|
||||||
```
|
```
|
||||||
test_imgs/t131.jpg {"政治面税": "群众", "性别": "男", "籍贯": "河北省邯郸市", "婚姻状况": "亏末婚口已婚口已娇", "通讯地址": "邯郸市阳光苑7号楼003", "民族": "汉族", "毕业院校": "河南工业大学", "户口性质": "口农村城镇", "户口地址": "河北省邯郸市", "联系电话": "13288888888", "健康状况": "健康", "姓名": "小六", "好高cm": "180", "出生年月": "1996年8月9日", "文化程度": "本科", "身份证号码": "458933777777777777"}
|
test_imgs/t131.jpg {"政治面税": "群众", "性别": "男", "籍贯": "河北省邯郸市", "婚姻状况": "亏末婚口已婚口已娇", "通讯地址": "邯郸市阳光苑7号楼003", "民族": "汉族", "毕业院校": "河南工业大学", "户口性质": "口农村城镇", "户口地址": "河北省邯郸市", "联系电话": "13288888888", "健康状况": "健康", "姓名": "小六", "好高cm": "180", "出生年月": "1996年8月9日", "文化程度": "本科", "身份证号码": "458933777777777777"}
|
||||||
````
|
````
|
||||||
|
|
||||||
展示预测结果
|
展示预测结果
|
||||||
|
@ -194,7 +194,9 @@ class InferenceEngine(object):
|
|||||||
box_list = [box_list[i] for i, v in enumerate(idx) if v]
|
box_list = [box_list[i] for i, v in enumerate(idx) if v]
|
||||||
score_list = [score_list[i] for i, v in enumerate(idx) if v]
|
score_list = [score_list[i] for i, v in enumerate(idx) if v]
|
||||||
else:
|
else:
|
||||||
idx = box_list.reshape(box_list.shape[0], -1).sum(axis=1) > 0 # 去掉全为0的框
|
idx = (
|
||||||
|
box_list.reshape(box_list.shape[0], -1).sum(axis=1) > 0
|
||||||
|
) # 去掉全为0的框
|
||||||
box_list, score_list = box_list[idx], score_list[idx]
|
box_list, score_list = box_list[idx], score_list[idx]
|
||||||
else:
|
else:
|
||||||
box_list, score_list = [], []
|
box_list, score_list = [], []
|
||||||
|
@ -59,4 +59,3 @@ source ${BENCHMARK_ROOT}/scripts/run_model.sh # 在该脚本中会对符合
|
|||||||
_set_params $@
|
_set_params $@
|
||||||
#_train # 如果只想产出训练log,不解析,可取消注释
|
#_train # 如果只想产出训练log,不解析,可取消注释
|
||||||
_run # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只想要产出训练log可以注掉本行,提交时需打开
|
_run # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只想要产出训练log可以注掉本行,提交时需打开
|
||||||
|
|
||||||
|
@ -34,6 +34,3 @@ for model_mode in ${model_mode_list[@]}; do
|
|||||||
done
|
done
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -173,5 +173,3 @@ Eval:
|
|||||||
drop_last: False
|
drop_last: False
|
||||||
batch_size_per_card: 8
|
batch_size_per_card: 8
|
||||||
num_workers: 8
|
num_workers: 8
|
||||||
|
|
||||||
|
|
||||||
|
@ -198,9 +198,9 @@ class ArgsParser(ArgumentParser):
|
|||||||
lang = "cyrillic"
|
lang = "cyrillic"
|
||||||
elif lang in devanagari_lang:
|
elif lang in devanagari_lang:
|
||||||
lang = "devanagari"
|
lang = "devanagari"
|
||||||
global_config["Global"][
|
global_config["Global"]["character_dict_path"] = (
|
||||||
"character_dict_path"
|
"ppocr/utils/dict/{}_dict.txt".format(lang)
|
||||||
] = "ppocr/utils/dict/{}_dict.txt".format(lang)
|
)
|
||||||
global_config["Global"]["save_model_dir"] = "./output/rec_{}_lite".format(lang)
|
global_config["Global"]["save_model_dir"] = "./output/rec_{}_lite".format(lang)
|
||||||
global_config["Train"]["dataset"]["label_file_list"] = [
|
global_config["Train"]["dataset"]["label_file_list"] = [
|
||||||
"train_data/{}_train.txt".format(lang)
|
"train_data/{}_train.txt".format(lang)
|
||||||
|
@ -114,4 +114,3 @@ Eval:
|
|||||||
batch_size_per_card: 128
|
batch_size_per_card: 128
|
||||||
num_workers: 4
|
num_workers: 4
|
||||||
use_shared_memory: False
|
use_shared_memory: False
|
||||||
|
|
||||||
|
@ -81,4 +81,3 @@ Eval:
|
|||||||
drop_last: False
|
drop_last: False
|
||||||
batch_size_per_card: 16
|
batch_size_per_card: 16
|
||||||
num_workers: 4
|
num_workers: 4
|
||||||
|
|
||||||
|
@ -82,4 +82,3 @@ Eval:
|
|||||||
drop_last: False
|
drop_last: False
|
||||||
batch_size_per_card: 16
|
batch_size_per_card: 16
|
||||||
num_workers: 4
|
num_workers: 4
|
||||||
|
|
||||||
|
1
deploy/android_demo/.gitignore
vendored
1
deploy/android_demo/.gitignore
vendored
@ -6,4 +6,3 @@
|
|||||||
/build
|
/build
|
||||||
/captures
|
/captures
|
||||||
.externalNativeBuild
|
.externalNativeBuild
|
||||||
|
|
||||||
|
@ -17,4 +17,3 @@
|
|||||||
<string name="DET_LONG_SIZE_DEFAULT">960</string>
|
<string name="DET_LONG_SIZE_DEFAULT">960</string>
|
||||||
<string name="SCORE_THRESHOLD_DEFAULT">0.1</string>
|
<string name="SCORE_THRESHOLD_DEFAULT">0.1</string>
|
||||||
</resources>
|
</resources>
|
||||||
|
|
||||||
|
@ -11,4 +11,3 @@ FetchContent_Declare(
|
|||||||
GIT_TAG main
|
GIT_TAG main
|
||||||
)
|
)
|
||||||
FetchContent_MakeAvailable(extern_Autolog)
|
FetchContent_MakeAvailable(extern_Autolog)
|
||||||
|
|
||||||
|
@ -3,4 +3,3 @@
|
|||||||
<item android:state_pressed="true" android:drawable="@drawable/btn_settings_pressed"/>
|
<item android:state_pressed="true" android:drawable="@drawable/btn_settings_pressed"/>
|
||||||
<item android:drawable="@drawable/btn_settings_default"/>
|
<item android:drawable="@drawable/btn_settings_default"/>
|
||||||
</selector>
|
</selector>
|
||||||
|
|
||||||
|
@ -13,4 +13,3 @@
|
|||||||
"use_multiprocess": false,
|
"use_multiprocess": false,
|
||||||
"workers": 2
|
"workers": 2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,4 +13,3 @@
|
|||||||
"use_multiprocess": false,
|
"use_multiprocess": false,
|
||||||
"workers": 2
|
"workers": 2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,4 +13,3 @@
|
|||||||
"use_multiprocess": false,
|
"use_multiprocess": false,
|
||||||
"workers": 2
|
"workers": 2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,4 +13,3 @@
|
|||||||
"use_multiprocess": false,
|
"use_multiprocess": false,
|
||||||
"workers": 2
|
"workers": 2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,4 +13,3 @@
|
|||||||
"use_multiprocess": false,
|
"use_multiprocess": false,
|
||||||
"workers": 2
|
"workers": 2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,4 +13,3 @@
|
|||||||
"use_multiprocess": false,
|
"use_multiprocess": false,
|
||||||
"workers": 2
|
"workers": 2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ with open(annotation_file, "r") as f:
|
|||||||
lines = f.readlines()
|
lines = f.readlines()
|
||||||
|
|
||||||
for i, line in enumerate(lines):
|
for i, line in enumerate(lines):
|
||||||
image_name = line.split(" ")[0]
|
image_name = line.split(" ")[0]
|
||||||
|
|
||||||
image_path = os.path.join(dataset_path, image_name)
|
image_path = os.path.join(dataset_path, image_name)
|
||||||
|
|
||||||
|
@ -188,7 +188,7 @@ A:可以看下训练的尺度和预测的尺度是否相同,如果训练的
|
|||||||
|
|
||||||
#### Q: 图像正常识别出来的文字是OK的,旋转90度后识别出来的结果就比较差,有什么方法可以优化?
|
#### Q: 图像正常识别出来的文字是OK的,旋转90度后识别出来的结果就比较差,有什么方法可以优化?
|
||||||
|
|
||||||
**A**: 整图旋转90之后效果变差是有可能的,因为目前PPOCR默认输入的图片是正向的; 可以自己训练一个整图的方向分类器,放在预测的最前端(可以参照现有方向分类器的方式),或者可以基于规则做一些预处理,比如判断长宽等等。
|
**A**: 整图旋转90之后效果变差是有可能的,因为目前PPOCR默认输入的图片是正向的; 可以自己训练一个整图的方向分类器,放在预测的最前端(可以参照现有方向分类器的方式),或者可以基于规则做一些预处理,比如判断长宽等等。
|
||||||
|
|
||||||
#### Q: 如何识别竹简上的古文?
|
#### Q: 如何识别竹简上的古文?
|
||||||
|
|
||||||
|
@ -439,9 +439,9 @@ class DRRGTargets(object):
|
|||||||
)
|
)
|
||||||
|
|
||||||
inner_center_sample_mask = np.zeros_like(center_sample_mask)
|
inner_center_sample_mask = np.zeros_like(center_sample_mask)
|
||||||
inner_center_sample_mask[
|
inner_center_sample_mask[margin : h - margin, margin : w - margin] = (
|
||||||
margin : h - margin, margin : w - margin
|
center_sample_mask[margin : h - margin, margin : w - margin]
|
||||||
] = center_sample_mask[margin : h - margin, margin : w - margin]
|
)
|
||||||
kernel_size = int(np.clip(max_rand_half_height, 7, 21))
|
kernel_size = int(np.clip(max_rand_half_height, 7, 21))
|
||||||
inner_center_sample_mask = cv2.erode(
|
inner_center_sample_mask = cv2.erode(
|
||||||
inner_center_sample_mask, np.ones((kernel_size, kernel_size), np.uint8)
|
inner_center_sample_mask, np.ones((kernel_size, kernel_size), np.uint8)
|
||||||
|
@ -48,10 +48,14 @@ class GenTableMask(object):
|
|||||||
in_text = False # 是否遍历到了字符区内
|
in_text = False # 是否遍历到了字符区内
|
||||||
box_list = []
|
box_list = []
|
||||||
for i in range(len(project_val_array)):
|
for i in range(len(project_val_array)):
|
||||||
if in_text == False and project_val_array[i] > spilt_threshold: # 进入字符区了
|
if (
|
||||||
|
in_text == False and project_val_array[i] > spilt_threshold
|
||||||
|
): # 进入字符区了
|
||||||
in_text = True
|
in_text = True
|
||||||
start_idx = i
|
start_idx = i
|
||||||
elif project_val_array[i] <= spilt_threshold and in_text == True: # 进入空白区了
|
elif (
|
||||||
|
project_val_array[i] <= spilt_threshold and in_text == True
|
||||||
|
): # 进入空白区了
|
||||||
end_idx = i
|
end_idx = i
|
||||||
in_text = False
|
in_text = False
|
||||||
if end_idx - start_idx <= 2:
|
if end_idx - start_idx <= 2:
|
||||||
@ -95,10 +99,14 @@ class GenTableMask(object):
|
|||||||
box_list = []
|
box_list = []
|
||||||
spilt_threshold = 0
|
spilt_threshold = 0
|
||||||
for i in range(len(project_val_array)):
|
for i in range(len(project_val_array)):
|
||||||
if in_text == False and project_val_array[i] > spilt_threshold: # 进入字符区了
|
if (
|
||||||
|
in_text == False and project_val_array[i] > spilt_threshold
|
||||||
|
): # 进入字符区了
|
||||||
in_text = True
|
in_text = True
|
||||||
start_idx = i
|
start_idx = i
|
||||||
elif project_val_array[i] <= spilt_threshold and in_text == True: # 进入空白区了
|
elif (
|
||||||
|
project_val_array[i] <= spilt_threshold and in_text == True
|
||||||
|
): # 进入空白区了
|
||||||
end_idx = i
|
end_idx = i
|
||||||
in_text = False
|
in_text = False
|
||||||
if end_idx - start_idx <= 2:
|
if end_idx - start_idx <= 2:
|
||||||
|
@ -231,9 +231,7 @@ class DKDLoss(nn.Layer):
|
|||||||
pred_student = self._cat_mask(pred_student, gt_mask, other_mask)
|
pred_student = self._cat_mask(pred_student, gt_mask, other_mask)
|
||||||
pred_teacher = self._cat_mask(pred_teacher, gt_mask, other_mask)
|
pred_teacher = self._cat_mask(pred_teacher, gt_mask, other_mask)
|
||||||
log_pred_student = paddle.log(pred_student)
|
log_pred_student = paddle.log(pred_student)
|
||||||
tckd_loss = self._kl_div(log_pred_student, pred_teacher) * (
|
tckd_loss = self._kl_div(log_pred_student, pred_teacher) * (self.temperature**2)
|
||||||
self.temperature**2
|
|
||||||
)
|
|
||||||
pred_teacher_part2 = F.softmax(
|
pred_teacher_part2 = F.softmax(
|
||||||
logits_teacher / self.temperature - 1000.0 * gt_mask, axis=1
|
logits_teacher / self.temperature - 1000.0 * gt_mask, axis=1
|
||||||
)
|
)
|
||||||
|
@ -113,9 +113,9 @@ class DistillationDMLLoss(DMLLoss):
|
|||||||
loss = super().forward(out1, out2)
|
loss = super().forward(out1, out2)
|
||||||
if isinstance(loss, dict):
|
if isinstance(loss, dict):
|
||||||
for key in loss:
|
for key in loss:
|
||||||
loss_dict[
|
loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1], idx)] = (
|
||||||
"{}_{}_{}_{}".format(key, pair[0], pair[1], idx)
|
loss[key]
|
||||||
] = loss[key]
|
)
|
||||||
else:
|
else:
|
||||||
loss_dict["{}_{}".format(self.name, idx)] = loss
|
loss_dict["{}_{}".format(self.name, idx)] = loss
|
||||||
else:
|
else:
|
||||||
@ -218,9 +218,9 @@ class DistillationKLDivLoss(KLDivLoss):
|
|||||||
loss = super().forward(out1, out2)
|
loss = super().forward(out1, out2)
|
||||||
if isinstance(loss, dict):
|
if isinstance(loss, dict):
|
||||||
for key in loss:
|
for key in loss:
|
||||||
loss_dict[
|
loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1], idx)] = (
|
||||||
"{}_{}_{}_{}".format(key, pair[0], pair[1], idx)
|
loss[key]
|
||||||
] = loss[key]
|
)
|
||||||
else:
|
else:
|
||||||
loss_dict["{}_{}".format(self.name, idx)] = loss
|
loss_dict["{}_{}".format(self.name, idx)] = loss
|
||||||
else:
|
else:
|
||||||
@ -329,9 +329,9 @@ class DistillationDKDLoss(DKDLoss):
|
|||||||
loss = super().forward(out1, out2)
|
loss = super().forward(out1, out2)
|
||||||
if isinstance(loss, dict):
|
if isinstance(loss, dict):
|
||||||
for key in loss:
|
for key in loss:
|
||||||
loss_dict[
|
loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1], idx)] = (
|
||||||
"{}_{}_{}_{}".format(key, pair[0], pair[1], idx)
|
loss[key]
|
||||||
] = loss[key]
|
)
|
||||||
else:
|
else:
|
||||||
loss_dict["{}_{}".format(self.name, idx)] = loss
|
loss_dict["{}_{}".format(self.name, idx)] = loss
|
||||||
else:
|
else:
|
||||||
@ -472,9 +472,9 @@ class DistillationKLDivLoss(KLDivLoss):
|
|||||||
loss = super().forward(out1, out2)
|
loss = super().forward(out1, out2)
|
||||||
if isinstance(loss, dict):
|
if isinstance(loss, dict):
|
||||||
for key in loss:
|
for key in loss:
|
||||||
loss_dict[
|
loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1], idx)] = (
|
||||||
"{}_{}_{}_{}".format(key, pair[0], pair[1], idx)
|
loss[key]
|
||||||
] = loss[key]
|
)
|
||||||
else:
|
else:
|
||||||
loss_dict["{}_{}".format(self.name, idx)] = loss
|
loss_dict["{}_{}".format(self.name, idx)] = loss
|
||||||
else:
|
else:
|
||||||
@ -583,9 +583,9 @@ class DistillationDKDLoss(DKDLoss):
|
|||||||
loss = super().forward(out1, out2)
|
loss = super().forward(out1, out2)
|
||||||
if isinstance(loss, dict):
|
if isinstance(loss, dict):
|
||||||
for key in loss:
|
for key in loss:
|
||||||
loss_dict[
|
loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1], idx)] = (
|
||||||
"{}_{}_{}_{}".format(key, pair[0], pair[1], idx)
|
loss[key]
|
||||||
] = loss[key]
|
)
|
||||||
else:
|
else:
|
||||||
loss_dict["{}_{}".format(self.name, idx)] = loss
|
loss_dict["{}_{}".format(self.name, idx)] = loss
|
||||||
else:
|
else:
|
||||||
|
@ -185,9 +185,11 @@ class ResNet(nn.Layer):
|
|||||||
bottleneck_block = self.add_sublayer(
|
bottleneck_block = self.add_sublayer(
|
||||||
conv_name,
|
conv_name,
|
||||||
BottleneckBlock(
|
BottleneckBlock(
|
||||||
num_channels=num_channels[block]
|
num_channels=(
|
||||||
if i == 0
|
num_channels[block]
|
||||||
else num_filters[block] * 4,
|
if i == 0
|
||||||
|
else num_filters[block] * 4
|
||||||
|
),
|
||||||
num_filters=num_filters[block],
|
num_filters=num_filters[block],
|
||||||
stride=2 if i == 0 and block != 0 else 1,
|
stride=2 if i == 0 and block != 0 else 1,
|
||||||
shortcut=shortcut,
|
shortcut=shortcut,
|
||||||
@ -208,9 +210,9 @@ class ResNet(nn.Layer):
|
|||||||
basic_block = self.add_sublayer(
|
basic_block = self.add_sublayer(
|
||||||
conv_name,
|
conv_name,
|
||||||
BasicBlock(
|
BasicBlock(
|
||||||
num_channels=num_channels[block]
|
num_channels=(
|
||||||
if i == 0
|
num_channels[block] if i == 0 else num_filters[block]
|
||||||
else num_filters[block],
|
),
|
||||||
num_filters=num_filters[block],
|
num_filters=num_filters[block],
|
||||||
stride=2 if i == 0 and block != 0 else 1,
|
stride=2 if i == 0 and block != 0 else 1,
|
||||||
shortcut=shortcut,
|
shortcut=shortcut,
|
||||||
|
@ -316,9 +316,11 @@ class ResNet_vd(nn.Layer):
|
|||||||
bottleneck_block = self.add_sublayer(
|
bottleneck_block = self.add_sublayer(
|
||||||
"bb_%d_%d" % (block, i),
|
"bb_%d_%d" % (block, i),
|
||||||
BottleneckBlock(
|
BottleneckBlock(
|
||||||
in_channels=num_channels[block]
|
in_channels=(
|
||||||
if i == 0
|
num_channels[block]
|
||||||
else num_filters[block] * 4,
|
if i == 0
|
||||||
|
else num_filters[block] * 4
|
||||||
|
),
|
||||||
out_channels=num_filters[block],
|
out_channels=num_filters[block],
|
||||||
stride=2 if i == 0 and block != 0 else 1,
|
stride=2 if i == 0 and block != 0 else 1,
|
||||||
shortcut=shortcut,
|
shortcut=shortcut,
|
||||||
@ -339,9 +341,9 @@ class ResNet_vd(nn.Layer):
|
|||||||
basic_block = self.add_sublayer(
|
basic_block = self.add_sublayer(
|
||||||
"bb_%d_%d" % (block, i),
|
"bb_%d_%d" % (block, i),
|
||||||
BasicBlock(
|
BasicBlock(
|
||||||
in_channels=num_channels[block]
|
in_channels=(
|
||||||
if i == 0
|
num_channels[block] if i == 0 else num_filters[block]
|
||||||
else num_filters[block],
|
),
|
||||||
out_channels=num_filters[block],
|
out_channels=num_filters[block],
|
||||||
stride=2 if i == 0 and block != 0 else 1,
|
stride=2 if i == 0 and block != 0 else 1,
|
||||||
shortcut=shortcut,
|
shortcut=shortcut,
|
||||||
|
@ -261,9 +261,11 @@ class ResNet_SAST(nn.Layer):
|
|||||||
bottleneck_block = self.add_sublayer(
|
bottleneck_block = self.add_sublayer(
|
||||||
"bb_%d_%d" % (block, i),
|
"bb_%d_%d" % (block, i),
|
||||||
BottleneckBlock(
|
BottleneckBlock(
|
||||||
in_channels=num_channels[block]
|
in_channels=(
|
||||||
if i == 0
|
num_channels[block]
|
||||||
else num_filters[block] * 4,
|
if i == 0
|
||||||
|
else num_filters[block] * 4
|
||||||
|
),
|
||||||
out_channels=num_filters[block],
|
out_channels=num_filters[block],
|
||||||
stride=2 if i == 0 and block != 0 else 1,
|
stride=2 if i == 0 and block != 0 else 1,
|
||||||
shortcut=shortcut,
|
shortcut=shortcut,
|
||||||
@ -284,9 +286,9 @@ class ResNet_SAST(nn.Layer):
|
|||||||
basic_block = self.add_sublayer(
|
basic_block = self.add_sublayer(
|
||||||
"bb_%d_%d" % (block, i),
|
"bb_%d_%d" % (block, i),
|
||||||
BasicBlock(
|
BasicBlock(
|
||||||
in_channels=num_channels[block]
|
in_channels=(
|
||||||
if i == 0
|
num_channels[block] if i == 0 else num_filters[block]
|
||||||
else num_filters[block],
|
),
|
||||||
out_channels=num_filters[block],
|
out_channels=num_filters[block],
|
||||||
stride=2 if i == 0 and block != 0 else 1,
|
stride=2 if i == 0 and block != 0 else 1,
|
||||||
shortcut=shortcut,
|
shortcut=shortcut,
|
||||||
|
@ -241,9 +241,11 @@ class ResNet(nn.Layer):
|
|||||||
bottleneck_block = self.add_sublayer(
|
bottleneck_block = self.add_sublayer(
|
||||||
"bb_%d_%d" % (block, i),
|
"bb_%d_%d" % (block, i),
|
||||||
BottleneckBlock(
|
BottleneckBlock(
|
||||||
in_channels=num_channels[block]
|
in_channels=(
|
||||||
if i == 0
|
num_channels[block]
|
||||||
else num_filters[block] * 4,
|
if i == 0
|
||||||
|
else num_filters[block] * 4
|
||||||
|
),
|
||||||
out_channels=num_filters[block],
|
out_channels=num_filters[block],
|
||||||
stride=2 if i == 0 and block != 0 else 1,
|
stride=2 if i == 0 and block != 0 else 1,
|
||||||
shortcut=shortcut,
|
shortcut=shortcut,
|
||||||
@ -264,9 +266,9 @@ class ResNet(nn.Layer):
|
|||||||
basic_block = self.add_sublayer(
|
basic_block = self.add_sublayer(
|
||||||
"bb_%d_%d" % (block, i),
|
"bb_%d_%d" % (block, i),
|
||||||
BasicBlock(
|
BasicBlock(
|
||||||
in_channels=num_channels[block]
|
in_channels=(
|
||||||
if i == 0
|
num_channels[block] if i == 0 else num_filters[block]
|
||||||
else num_filters[block],
|
),
|
||||||
out_channels=num_filters[block],
|
out_channels=num_filters[block],
|
||||||
stride=2 if i == 0 and block != 0 else 1,
|
stride=2 if i == 0 and block != 0 else 1,
|
||||||
shortcut=shortcut,
|
shortcut=shortcut,
|
||||||
|
@ -364,114 +364,136 @@ class DYMicroBlock(nn.Layer):
|
|||||||
if gs1[0] == 0:
|
if gs1[0] == 0:
|
||||||
self.layers = nn.Sequential(
|
self.layers = nn.Sequential(
|
||||||
DepthSpatialSepConv(inp, t1, kernel_size, stride),
|
DepthSpatialSepConv(inp, t1, kernel_size, stride),
|
||||||
DYShiftMax(
|
(
|
||||||
hidden_dim2,
|
DYShiftMax(
|
||||||
hidden_dim2,
|
hidden_dim2,
|
||||||
act_max=2.0,
|
hidden_dim2,
|
||||||
act_relu=True if y2 == 2 else False,
|
act_max=2.0,
|
||||||
init_a=init_a,
|
act_relu=True if y2 == 2 else False,
|
||||||
reduction=act_reduction,
|
init_a=init_a,
|
||||||
init_b=init_b,
|
reduction=act_reduction,
|
||||||
g=gs1,
|
init_b=init_b,
|
||||||
expansion=False,
|
g=gs1,
|
||||||
)
|
expansion=False,
|
||||||
if y2 > 0
|
)
|
||||||
else nn.ReLU6(),
|
if y2 > 0
|
||||||
|
else nn.ReLU6()
|
||||||
|
),
|
||||||
ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
|
ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
|
||||||
ChannelShuffle(hidden_dim2 // 2)
|
(
|
||||||
if shuffle and y2 != 0
|
ChannelShuffle(hidden_dim2 // 2)
|
||||||
else nn.Sequential(),
|
if shuffle and y2 != 0
|
||||||
|
else nn.Sequential()
|
||||||
|
),
|
||||||
GroupConv(hidden_dim2, oup, (g1, g2)),
|
GroupConv(hidden_dim2, oup, (g1, g2)),
|
||||||
DYShiftMax(
|
(
|
||||||
oup,
|
DYShiftMax(
|
||||||
oup,
|
oup,
|
||||||
act_max=2.0,
|
oup,
|
||||||
act_relu=False,
|
act_max=2.0,
|
||||||
init_a=[1.0, 0.0],
|
act_relu=False,
|
||||||
reduction=act_reduction // 2,
|
init_a=[1.0, 0.0],
|
||||||
init_b=[0.0, 0.0],
|
reduction=act_reduction // 2,
|
||||||
g=(g1, g2),
|
init_b=[0.0, 0.0],
|
||||||
expansion=False,
|
g=(g1, g2),
|
||||||
)
|
expansion=False,
|
||||||
if y3 > 0
|
)
|
||||||
else nn.Sequential(),
|
if y3 > 0
|
||||||
|
else nn.Sequential()
|
||||||
|
),
|
||||||
ChannelShuffle(g2) if shuffle else nn.Sequential(),
|
ChannelShuffle(g2) if shuffle else nn.Sequential(),
|
||||||
ChannelShuffle(oup // 2)
|
(
|
||||||
if shuffle and oup % 2 == 0 and y3 != 0
|
ChannelShuffle(oup // 2)
|
||||||
else nn.Sequential(),
|
if shuffle and oup % 2 == 0 and y3 != 0
|
||||||
|
else nn.Sequential()
|
||||||
|
),
|
||||||
)
|
)
|
||||||
elif g2 == 0:
|
elif g2 == 0:
|
||||||
self.layers = nn.Sequential(
|
self.layers = nn.Sequential(
|
||||||
GroupConv(inp, hidden_dim2, gs1),
|
GroupConv(inp, hidden_dim2, gs1),
|
||||||
DYShiftMax(
|
(
|
||||||
hidden_dim2,
|
DYShiftMax(
|
||||||
hidden_dim2,
|
hidden_dim2,
|
||||||
act_max=2.0,
|
hidden_dim2,
|
||||||
act_relu=False,
|
act_max=2.0,
|
||||||
init_a=[1.0, 0.0],
|
act_relu=False,
|
||||||
reduction=act_reduction,
|
init_a=[1.0, 0.0],
|
||||||
init_b=[0.0, 0.0],
|
reduction=act_reduction,
|
||||||
g=gs1,
|
init_b=[0.0, 0.0],
|
||||||
expansion=False,
|
g=gs1,
|
||||||
)
|
expansion=False,
|
||||||
if y3 > 0
|
)
|
||||||
else nn.Sequential(),
|
if y3 > 0
|
||||||
|
else nn.Sequential()
|
||||||
|
),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self.layers = nn.Sequential(
|
self.layers = nn.Sequential(
|
||||||
GroupConv(inp, hidden_dim2, gs1),
|
GroupConv(inp, hidden_dim2, gs1),
|
||||||
DYShiftMax(
|
(
|
||||||
hidden_dim2,
|
DYShiftMax(
|
||||||
hidden_dim2,
|
hidden_dim2,
|
||||||
act_max=2.0,
|
hidden_dim2,
|
||||||
act_relu=True if y1 == 2 else False,
|
act_max=2.0,
|
||||||
init_a=init_a,
|
act_relu=True if y1 == 2 else False,
|
||||||
reduction=act_reduction,
|
init_a=init_a,
|
||||||
init_b=init_b,
|
reduction=act_reduction,
|
||||||
g=gs1,
|
init_b=init_b,
|
||||||
expansion=False,
|
g=gs1,
|
||||||
)
|
expansion=False,
|
||||||
if y1 > 0
|
)
|
||||||
else nn.ReLU6(),
|
if y1 > 0
|
||||||
|
else nn.ReLU6()
|
||||||
|
),
|
||||||
ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
|
ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
|
||||||
DepthSpatialSepConv(hidden_dim2, (1, 1), kernel_size, stride)
|
(
|
||||||
if depthsep
|
DepthSpatialSepConv(hidden_dim2, (1, 1), kernel_size, stride)
|
||||||
else DepthConv(hidden_dim2, hidden_dim2, kernel_size, stride),
|
if depthsep
|
||||||
|
else DepthConv(hidden_dim2, hidden_dim2, kernel_size, stride)
|
||||||
|
),
|
||||||
nn.Sequential(),
|
nn.Sequential(),
|
||||||
DYShiftMax(
|
(
|
||||||
hidden_dim2,
|
DYShiftMax(
|
||||||
hidden_dim2,
|
hidden_dim2,
|
||||||
act_max=2.0,
|
hidden_dim2,
|
||||||
act_relu=True if y2 == 2 else False,
|
act_max=2.0,
|
||||||
init_a=init_a,
|
act_relu=True if y2 == 2 else False,
|
||||||
reduction=act_reduction,
|
init_a=init_a,
|
||||||
init_b=init_b,
|
reduction=act_reduction,
|
||||||
g=gs1,
|
init_b=init_b,
|
||||||
expansion=True,
|
g=gs1,
|
||||||
)
|
expansion=True,
|
||||||
if y2 > 0
|
)
|
||||||
else nn.ReLU6(),
|
if y2 > 0
|
||||||
ChannelShuffle(hidden_dim2 // 4)
|
else nn.ReLU6()
|
||||||
if shuffle and y1 != 0 and y2 != 0
|
),
|
||||||
else nn.Sequential()
|
(
|
||||||
if y1 == 0 and y2 == 0
|
ChannelShuffle(hidden_dim2 // 4)
|
||||||
else ChannelShuffle(hidden_dim2 // 2),
|
if shuffle and y1 != 0 and y2 != 0
|
||||||
|
else (
|
||||||
|
nn.Sequential()
|
||||||
|
if y1 == 0 and y2 == 0
|
||||||
|
else ChannelShuffle(hidden_dim2 // 2)
|
||||||
|
)
|
||||||
|
),
|
||||||
GroupConv(hidden_dim2, oup, (g1, g2)),
|
GroupConv(hidden_dim2, oup, (g1, g2)),
|
||||||
DYShiftMax(
|
(
|
||||||
oup,
|
DYShiftMax(
|
||||||
oup,
|
oup,
|
||||||
act_max=2.0,
|
oup,
|
||||||
act_relu=False,
|
act_max=2.0,
|
||||||
init_a=[1.0, 0.0],
|
act_relu=False,
|
||||||
reduction=act_reduction // 2
|
init_a=[1.0, 0.0],
|
||||||
if oup < hidden_dim2
|
reduction=(
|
||||||
else act_reduction,
|
act_reduction // 2 if oup < hidden_dim2 else act_reduction
|
||||||
init_b=[0.0, 0.0],
|
),
|
||||||
g=(g1, g2),
|
init_b=[0.0, 0.0],
|
||||||
expansion=False,
|
g=(g1, g2),
|
||||||
)
|
expansion=False,
|
||||||
if y3 > 0
|
)
|
||||||
else nn.Sequential(),
|
if y3 > 0
|
||||||
|
else nn.Sequential()
|
||||||
|
),
|
||||||
ChannelShuffle(g2) if shuffle else nn.Sequential(),
|
ChannelShuffle(g2) if shuffle else nn.Sequential(),
|
||||||
ChannelShuffle(oup // 2) if shuffle and y3 != 0 else nn.Sequential(),
|
ChannelShuffle(oup // 2) if shuffle and y3 != 0 else nn.Sequential(),
|
||||||
)
|
)
|
||||||
|
@ -35,9 +35,7 @@ class MTB(nn.Layer):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
self.block.add_sublayer("relu_{}".format(i), nn.ReLU())
|
self.block.add_sublayer("relu_{}".format(i), nn.ReLU())
|
||||||
self.block.add_sublayer(
|
self.block.add_sublayer("bn_{}".format(i), nn.BatchNorm2D(32 * (2**i)))
|
||||||
"bn_{}".format(i), nn.BatchNorm2D(32 * (2**i))
|
|
||||||
)
|
|
||||||
|
|
||||||
def forward(self, images):
|
def forward(self, images):
|
||||||
x = self.block(images)
|
x = self.block(images)
|
||||||
|
@ -259,9 +259,11 @@ class ResNet(nn.Layer):
|
|||||||
bottleneck_block = self.add_sublayer(
|
bottleneck_block = self.add_sublayer(
|
||||||
"bb_%d_%d" % (block, i),
|
"bb_%d_%d" % (block, i),
|
||||||
BottleneckBlock(
|
BottleneckBlock(
|
||||||
in_channels=num_channels[block]
|
in_channels=(
|
||||||
if i == 0
|
num_channels[block]
|
||||||
else num_filters[block] * 4,
|
if i == 0
|
||||||
|
else num_filters[block] * 4
|
||||||
|
),
|
||||||
out_channels=num_filters[block],
|
out_channels=num_filters[block],
|
||||||
stride=stride,
|
stride=stride,
|
||||||
shortcut=shortcut,
|
shortcut=shortcut,
|
||||||
@ -285,9 +287,9 @@ class ResNet(nn.Layer):
|
|||||||
basic_block = self.add_sublayer(
|
basic_block = self.add_sublayer(
|
||||||
"bb_%d_%d" % (block, i),
|
"bb_%d_%d" % (block, i),
|
||||||
BasicBlock(
|
BasicBlock(
|
||||||
in_channels=num_channels[block]
|
in_channels=(
|
||||||
if i == 0
|
num_channels[block] if i == 0 else num_filters[block]
|
||||||
else num_filters[block],
|
),
|
||||||
out_channels=num_filters[block],
|
out_channels=num_filters[block],
|
||||||
stride=stride,
|
stride=stride,
|
||||||
shortcut=shortcut,
|
shortcut=shortcut,
|
||||||
|
@ -141,8 +141,7 @@ class GNNLayer(nn.Layer):
|
|||||||
)
|
)
|
||||||
residuals.append(
|
residuals.append(
|
||||||
(
|
(
|
||||||
residual
|
residual * cat_nodes[start : start + num**2].reshape([num, num, -1])
|
||||||
* cat_nodes[start : start + num**2].reshape([num, num, -1])
|
|
||||||
).sum(1)
|
).sum(1)
|
||||||
)
|
)
|
||||||
start += num**2
|
start += num**2
|
||||||
|
@ -341,9 +341,11 @@ class PrePostProcessLayer(nn.Layer):
|
|||||||
)
|
)
|
||||||
elif cmd == "d": # add dropout
|
elif cmd == "d": # add dropout
|
||||||
self.functors.append(
|
self.functors.append(
|
||||||
lambda x: F.dropout(x, p=dropout_rate, mode="downscale_in_infer")
|
lambda x: (
|
||||||
if dropout_rate
|
F.dropout(x, p=dropout_rate, mode="downscale_in_infer")
|
||||||
else x
|
if dropout_rate
|
||||||
|
else x
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
def forward(self, x, residual=None):
|
def forward(self, x, residual=None):
|
||||||
|
@ -150,7 +150,7 @@ class TBSRN(nn.Layer):
|
|||||||
upsample_block_num = int(math.log(scale_factor, 2))
|
upsample_block_num = int(math.log(scale_factor, 2))
|
||||||
self.block1 = nn.Sequential(
|
self.block1 = nn.Sequential(
|
||||||
nn.Conv2D(in_planes, 2 * hidden_units, kernel_size=9, padding=4),
|
nn.Conv2D(in_planes, 2 * hidden_units, kernel_size=9, padding=4),
|
||||||
nn.PReLU()
|
nn.PReLU(),
|
||||||
# nn.ReLU()
|
# nn.ReLU()
|
||||||
)
|
)
|
||||||
self.srb_nums = srb_nums
|
self.srb_nums = srb_nums
|
||||||
|
@ -1133,9 +1133,11 @@ class VLLabelDecode(BaseRecLabelDecode):
|
|||||||
preds_idx = net_out[start_idx:end_idx].topk(1)[1][:, 0].tolist()
|
preds_idx = net_out[start_idx:end_idx].topk(1)[1][:, 0].tolist()
|
||||||
preds_text = "".join(
|
preds_text = "".join(
|
||||||
[
|
[
|
||||||
self.character[idx - 1]
|
(
|
||||||
if idx > 0 and idx <= len(self.character)
|
self.character[idx - 1]
|
||||||
else ""
|
if idx > 0 and idx <= len(self.character)
|
||||||
|
else ""
|
||||||
|
)
|
||||||
for idx in preds_idx
|
for idx in preds_idx
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
@ -24,9 +24,9 @@ PP-StructureV2在PP-StructureV1的基础上进一步改进,主要有以下3个
|
|||||||
|
|
||||||
* **系统功能升级** :新增图像矫正和版面复原模块,图像转word/pdf、关键信息抽取能力全覆盖!
|
* **系统功能升级** :新增图像矫正和版面复原模块,图像转word/pdf、关键信息抽取能力全覆盖!
|
||||||
* **系统性能优化** :
|
* **系统性能优化** :
|
||||||
* 版面分析:发布轻量级版面分析模型,速度提升**11倍**,平均CPU耗时仅需**41ms**!
|
* 版面分析:发布轻量级版面分析模型,速度提升**11倍**,平均CPU耗时仅需**41ms**!
|
||||||
* 表格识别:设计3大优化策略,预测耗时不变情况下,模型精度提升**6%**。
|
* 表格识别:设计3大优化策略,预测耗时不变情况下,模型精度提升**6%**。
|
||||||
* 关键信息抽取:设计视觉无关模型结构,语义实体识别精度提升**2.8%**,关系抽取精度提升**9.1%**。
|
* 关键信息抽取:设计视觉无关模型结构,语义实体识别精度提升**2.8%**,关系抽取精度提升**9.1%**。
|
||||||
* **中文场景适配** :完成对版面分析与表格识别的中文场景适配,开源**开箱即用**的中文场景版面结构化模型!
|
* **中文场景适配** :完成对版面分析与表格识别的中文场景适配,开源**开箱即用**的中文场景版面结构化模型!
|
||||||
|
|
||||||
PP-StructureV2系统流程图如下所示,文档图像首先经过图像矫正模块,判断整图方向并完成转正,随后可以完成版面信息分析与关键信息抽取2类任务。版面分析任务中,图像首先经过版面分析模型,将图像划分为文本、表格、图像等不同区域,随后对这些区域分别进行识别,如,将表格区域送入表格识别模块进行结构化识别,将文本区域送入OCR引擎进行文字识别,最后使用版面恢复模块将其恢复为与原始图像布局一致的word或者pdf格式的文件;关键信息抽取任务中,首先使用OCR引擎提取文本内容,然后由语义实体识别模块获取图像中的语义实体,最后经关系抽取模块获取语义实体之间的对应关系,从而提取需要的关键信息。
|
PP-StructureV2系统流程图如下所示,文档图像首先经过图像矫正模块,判断整图方向并完成转正,随后可以完成版面信息分析与关键信息抽取2类任务。版面分析任务中,图像首先经过版面分析模型,将图像划分为文本、表格、图像等不同区域,随后对这些区域分别进行识别,如,将表格区域送入表格识别模块进行结构化识别,将文本区域送入OCR引擎进行文字识别,最后使用版面恢复模块将其恢复为与原始图像布局一致的word或者pdf格式的文件;关键信息抽取任务中,首先使用OCR引擎提取文本内容,然后由语义实体识别模块获取图像中的语义实体,最后经关系抽取模块获取语义实体之间的对应关系,从而提取需要的关键信息。
|
||||||
@ -39,18 +39,18 @@ PP-StructureV2系统流程图如下所示,文档图像首先经过图像矫正
|
|||||||
从算法改进思路来看,对系统中的3个关键子模块,共进行了8个方面的改进。
|
从算法改进思路来看,对系统中的3个关键子模块,共进行了8个方面的改进。
|
||||||
|
|
||||||
* 版面分析
|
* 版面分析
|
||||||
* PP-PicoDet:轻量级版面分析模型
|
* PP-PicoDet:轻量级版面分析模型
|
||||||
* FGD:兼顾全局与局部特征的模型蒸馏算法
|
* FGD:兼顾全局与局部特征的模型蒸馏算法
|
||||||
|
|
||||||
* 表格识别
|
* 表格识别
|
||||||
* PP-LCNet: CPU友好型轻量级骨干网络
|
* PP-LCNet: CPU友好型轻量级骨干网络
|
||||||
* CSP-PAN:轻量级高低层特征融合模块
|
* CSP-PAN:轻量级高低层特征融合模块
|
||||||
* SLAHead:结构与位置信息对齐的特征解码模块
|
* SLAHead:结构与位置信息对齐的特征解码模块
|
||||||
|
|
||||||
* 关键信息抽取
|
* 关键信息抽取
|
||||||
* VI-LayoutXLM:视觉特征无关的多模态预训练模型结构
|
* VI-LayoutXLM:视觉特征无关的多模态预训练模型结构
|
||||||
* TB-YX:考虑阅读顺序的文本行排序逻辑
|
* TB-YX:考虑阅读顺序的文本行排序逻辑
|
||||||
* UDML:联合互学习知识蒸馏策略
|
* UDML:联合互学习知识蒸馏策略
|
||||||
|
|
||||||
最终,与PP-StructureV1相比:
|
最终,与PP-StructureV1相比:
|
||||||
|
|
||||||
@ -87,8 +87,8 @@ PP-StructureV2系统流程图如下所示,文档图像首先经过图像矫正
|
|||||||
| 5 | PP-PicoDet-LCNet1.0x(800*608) + FGD | 9.7 | 94.00% |41.20|
|
| 5 | PP-PicoDet-LCNet1.0x(800*608) + FGD | 9.7 | 94.00% |41.20|
|
||||||
|
|
||||||
* 测试条件
|
* 测试条件
|
||||||
* paddle版本:2.3.0
|
* paddle版本:2.3.0
|
||||||
* CPU:Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz,开启mkldnn,线程数为10
|
* CPU:Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz,开启mkldnn,线程数为10
|
||||||
|
|
||||||
在PubLayNet数据集上,与其他方法的性能对比如下表所示。可以看到,和基于Detectron2的版面分析工具layoutparser相比,我们的模型精度高出大约5%,预测速度快约69倍。
|
在PubLayNet数据集上,与其他方法的性能对比如下表所示。可以看到,和基于Detectron2的版面分析工具layoutparser相比,我们的模型精度高出大约5%,预测速度快约69倍。
|
||||||
|
|
||||||
@ -167,11 +167,11 @@ FGD(Focal and Global Knowledge Distillation for Detectors),是一种兼顾
|
|||||||
|
|
||||||
|策略|Acc|TEDS|推理速度(CPU+MKLDNN)|模型大小|
|
|策略|Acc|TEDS|推理速度(CPU+MKLDNN)|模型大小|
|
||||||
|---|---|---|---|---|
|
|---|---|---|---|---|
|
||||||
|TableRec-RARE| 71.73% | 93.88% |779ms |6.8M|
|
|TableRec-RARE| 71.73% | 93.88% |779ms |6.8M|
|
||||||
|+PP-LCNet| 74.71% |94.37% |778ms| 8.7M|
|
|+PP-LCNet| 74.71% |94.37% |778ms| 8.7M|
|
||||||
|+CSP-PAN| 75.68%| 94.72% |708ms| 9.3M|
|
|+CSP-PAN| 75.68%| 94.72% |708ms| 9.3M|
|
||||||
|+SLAHead| 77.70%|94.85%| 766ms| 9.2M|
|
|+SLAHead| 77.70%|94.85%| 766ms| 9.2M|
|
||||||
|+MergeToken| 76.31%| 95.89%|766ms| 9.2M|
|
|+MergeToken| 76.31%| 95.89%|766ms| 9.2M|
|
||||||
|
|
||||||
* 测试环境
|
* 测试环境
|
||||||
* paddle版本:2.3.1
|
* paddle版本:2.3.1
|
||||||
@ -182,8 +182,8 @@ FGD(Focal and Global Knowledge Distillation for Detectors),是一种兼顾
|
|||||||
|策略|Acc|TEDS|推理速度(CPU+MKLDNN)|模型大小|
|
|策略|Acc|TEDS|推理速度(CPU+MKLDNN)|模型大小|
|
||||||
|---|---|---|---|---|
|
|---|---|---|---|---|
|
||||||
|TableMaster|77.90%|96.12%|2144ms|253.0M|
|
|TableMaster|77.90%|96.12%|2144ms|253.0M|
|
||||||
|TableRec-RARE| 71.73% | 93.88% |779ms |6.8M|
|
|TableRec-RARE| 71.73% | 93.88% |779ms |6.8M|
|
||||||
|SLANet|76.31%| 95.89%|766ms|9.2M|
|
|SLANet|76.31%| 95.89%|766ms|9.2M|
|
||||||
|
|
||||||
#### 4.2.1 优化策略
|
#### 4.2.1 优化策略
|
||||||
|
|
||||||
@ -283,9 +283,9 @@ XFUND-zh数据集上,SER任务的消融实验如下所示。
|
|||||||
| 5 | 实验3 + UDML蒸馏 | 1.1 | **93.19%** | **15.49** | **675.58** |
|
| 5 | 实验3 + UDML蒸馏 | 1.1 | **93.19%** | **15.49** | **675.58** |
|
||||||
|
|
||||||
* 测试条件
|
* 测试条件
|
||||||
* paddle版本:2.3.0
|
* paddle版本:2.3.0
|
||||||
* GPU:V100,实验5的GPU预测耗时使用`trt+fp16`测试得到,环境为cuda10.2+ cudnn8.1.1 + trt7.2.3.4,其他实验的预测耗时统计中没有使用TRT。
|
* GPU:V100,实验5的GPU预测耗时使用`trt+fp16`测试得到,环境为cuda10.2+ cudnn8.1.1 + trt7.2.3.4,其他实验的预测耗时统计中没有使用TRT。
|
||||||
* CPU:Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz,开启mkldnn,线程数为10
|
* CPU:Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz,开启mkldnn,线程数为10
|
||||||
|
|
||||||
在XFUND数据集上,与其他方法的效果对比如下所示。
|
在XFUND数据集上,与其他方法的效果对比如下所示。
|
||||||
|
|
||||||
@ -318,10 +318,10 @@ LayoutLMv2以及LayoutXLM中引入视觉骨干网络,用于提取视觉特征
|
|||||||
|
|
||||||
```py
|
```py
|
||||||
def order_by_tbyx(ocr_info, th=20):
|
def order_by_tbyx(ocr_info, th=20):
|
||||||
"""
|
"""
|
||||||
ocr_info: a list of dict, which contains bbox information([x1, y1, x2, y2])
|
ocr_info: a list of dict, which contains bbox information([x1, y1, x2, y2])
|
||||||
th: threshold of the position threshold
|
th: threshold of the position threshold
|
||||||
"""
|
"""
|
||||||
res = sorted(ocr_info, key=lambda r: (r["bbox"][1], r["bbox"][0])) # sort using y1 first and then x1
|
res = sorted(ocr_info, key=lambda r: (r["bbox"][1], r["bbox"][0])) # sort using y1 first and then x1
|
||||||
for i in range(len(res) - 1):
|
for i in range(len(res) - 1):
|
||||||
for j in range(i, 0, -1):
|
for j in range(i, 0, -1):
|
||||||
|
@ -40,7 +40,7 @@
|
|||||||
| --- | --- | --- | ---|
|
| --- | --- | --- | ---|
|
||||||
| EDD<sup>[2]</sup> |x| 88.30% |x|
|
| EDD<sup>[2]</sup> |x| 88.30% |x|
|
||||||
| TableRec-RARE(ours) | 71.73%| 93.88% |779ms|
|
| TableRec-RARE(ours) | 71.73%| 93.88% |779ms|
|
||||||
| SLANet(ours) |76.31%| 95.89%|766ms|
|
| SLANet(ours) |76.31%| 95.89%|766ms|
|
||||||
|
|
||||||
性能指标解释如下:
|
性能指标解释如下:
|
||||||
- Acc: 模型对每张图像里表格结构的识别准确率,错一个token就算错误。
|
- Acc: 模型对每张图像里表格结构的识别准确率,错一个token就算错误。
|
||||||
|
@ -45,12 +45,16 @@ def build_pre_process_list(args):
|
|||||||
pad_op = {"PaddingTableImage": {"size": [args.table_max_len, args.table_max_len]}}
|
pad_op = {"PaddingTableImage": {"size": [args.table_max_len, args.table_max_len]}}
|
||||||
normalize_op = {
|
normalize_op = {
|
||||||
"NormalizeImage": {
|
"NormalizeImage": {
|
||||||
"std": [0.229, 0.224, 0.225]
|
"std": (
|
||||||
if args.table_algorithm not in ["TableMaster"]
|
[0.229, 0.224, 0.225]
|
||||||
else [0.5, 0.5, 0.5],
|
if args.table_algorithm not in ["TableMaster"]
|
||||||
"mean": [0.485, 0.456, 0.406]
|
else [0.5, 0.5, 0.5]
|
||||||
if args.table_algorithm not in ["TableMaster"]
|
),
|
||||||
else [0.5, 0.5, 0.5],
|
"mean": (
|
||||||
|
[0.485, 0.456, 0.406]
|
||||||
|
if args.table_algorithm not in ["TableMaster"]
|
||||||
|
else [0.5, 0.5, 0.5]
|
||||||
|
),
|
||||||
"scale": "1./255.",
|
"scale": "1./255.",
|
||||||
"order": "hwc",
|
"order": "hwc",
|
||||||
}
|
}
|
||||||
|
@ -65,4 +65,3 @@ function status_check(){
|
|||||||
echo -e "\033[33m Run failed with command - ${model_name} - ${run_command} - ${log_path} \033[0m" | tee -a ${run_log}
|
echo -e "\033[33m Run failed with command - ${model_name} - ${run_command} - ${log_path} \033[0m" | tee -a ${run_log}
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,4 +49,3 @@ inference:tools/infer/predict_det.py
|
|||||||
null:null
|
null:null
|
||||||
--benchmark:True
|
--benchmark:True
|
||||||
null:null
|
null:null
|
||||||
|
|
||||||
|
@ -108,4 +108,3 @@ Eval:
|
|||||||
drop_last: False
|
drop_last: False
|
||||||
batch_size_per_card: 1 # must be 1
|
batch_size_per_card: 1 # must be 1
|
||||||
num_workers: 2
|
num_workers: 2
|
||||||
|
|
||||||
|
@ -51,4 +51,3 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/dict90.t
|
|||||||
null:null
|
null:null
|
||||||
===========================infer_benchmark_params==========================
|
===========================infer_benchmark_params==========================
|
||||||
random_infer_input:[{float32,[3,48,160]}]
|
random_infer_input:[{float32,[3,48,160]}]
|
||||||
|
|
||||||
|
@ -81,4 +81,3 @@ Eval:
|
|||||||
drop_last: False
|
drop_last: False
|
||||||
batch_size_per_card: 16
|
batch_size_per_card: 16
|
||||||
num_workers: 4
|
num_workers: 4
|
||||||
|
|
||||||
|
@ -59,18 +59,18 @@ train_log/
|
|||||||
|模型名称|配置文件|大数据集 float32 fps |小数据集 float32 fps |diff |大数据集 float16 fps|小数据集 float16 fps| diff | 大数据集大小 | 小数据集大小 |
|
|模型名称|配置文件|大数据集 float32 fps |小数据集 float32 fps |diff |大数据集 float16 fps|小数据集 float16 fps| diff | 大数据集大小 | 小数据集大小 |
|
||||||
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|
||||||
| ch_ppocr_mobile_v2.0_det |[config](../configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt) | 53.836 | 53.343 / 53.914 / 52.785 |0.020940758 | 45.574 | 45.57 / 46.292 / 46.213 | 0.015596647 | 10,000| 2,000|
|
| ch_ppocr_mobile_v2.0_det |[config](../configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt) | 53.836 | 53.343 / 53.914 / 52.785 |0.020940758 | 45.574 | 45.57 / 46.292 / 46.213 | 0.015596647 | 10,000| 2,000|
|
||||||
| ch_ppocr_mobile_v2.0_rec |[config](../configs/ch_ppocr_mobile_v2.0_rec/train_infer_python.txt) | 2083.311 | 2043.194 / 2066.372 / 2093.317 |0.023944295 | 2153.261 | 2167.561 / 2165.726 / 2155.614| 0.005511725 | 600,000| 160,000|
|
| ch_ppocr_mobile_v2.0_rec |[config](../configs/ch_ppocr_mobile_v2.0_rec/train_infer_python.txt) | 2083.311 | 2043.194 / 2066.372 / 2093.317 |0.023944295 | 2153.261 | 2167.561 / 2165.726 / 2155.614| 0.005511725 | 600,000| 160,000|
|
||||||
| ch_ppocr_server_v2.0_det |[config](../configs/ch_ppocr_server_v2.0_det/train_infer_python.txt) | 20.716 | 20.739 / 20.807 / 20.755 |0.003268131 | 20.592 | 20.498 / 20.993 / 20.75| 0.023579288 | 10,000| 2,000|
|
| ch_ppocr_server_v2.0_det |[config](../configs/ch_ppocr_server_v2.0_det/train_infer_python.txt) | 20.716 | 20.739 / 20.807 / 20.755 |0.003268131 | 20.592 | 20.498 / 20.993 / 20.75| 0.023579288 | 10,000| 2,000|
|
||||||
| ch_ppocr_server_v2.0_rec |[config](../configs/ch_ppocr_server_v2.0_rec/train_infer_python.txt) | 528.56 | 528.386 / 528.991 / 528.391 |0.001143687 | 1189.788 | 1190.007 / 1176.332 / 1192.084| 0.013213834 | 600,000| 160,000|
|
| ch_ppocr_server_v2.0_rec |[config](../configs/ch_ppocr_server_v2.0_rec/train_infer_python.txt) | 528.56 | 528.386 / 528.991 / 528.391 |0.001143687 | 1189.788 | 1190.007 / 1176.332 / 1192.084| 0.013213834 | 600,000| 160,000|
|
||||||
| ch_PP-OCRv2_det |[config](../configs/ch_PP-OCRv2_det/train_infer_python.txt) | 13.87 | 13.386 / 13.529 / 13.428 |0.010569887 | 17.847 | 17.746 / 17.908 / 17.96| 0.011915367 | 10,000| 2,000|
|
| ch_PP-OCRv2_det |[config](../configs/ch_PP-OCRv2_det/train_infer_python.txt) | 13.87 | 13.386 / 13.529 / 13.428 |0.010569887 | 17.847 | 17.746 / 17.908 / 17.96| 0.011915367 | 10,000| 2,000|
|
||||||
| ch_PP-OCRv2_rec |[config](../configs/ch_PP-OCRv2_rec/train_infer_python.txt) | 109.248 | 106.32 / 106.318 / 108.587 |0.020895687 | 117.491 | 117.62 / 117.757 / 117.726| 0.001163413 | 140,000| 40,000|
|
| ch_PP-OCRv2_rec |[config](../configs/ch_PP-OCRv2_rec/train_infer_python.txt) | 109.248 | 106.32 / 106.318 / 108.587 |0.020895687 | 117.491 | 117.62 / 117.757 / 117.726| 0.001163413 | 140,000| 40,000|
|
||||||
| det_mv3_db_v2.0 |[config](../configs/det_mv3_db_v2_0/train_infer_python.txt) | 61.802 | 62.078 / 61.802 / 62.008 |0.00444602 | 82.947 | 84.294 / 84.457 / 84.005| 0.005351836 | 10,000| 2,000|
|
| det_mv3_db_v2.0 |[config](../configs/det_mv3_db_v2_0/train_infer_python.txt) | 61.802 | 62.078 / 61.802 / 62.008 |0.00444602 | 82.947 | 84.294 / 84.457 / 84.005| 0.005351836 | 10,000| 2,000|
|
||||||
| det_r50_vd_db_v2.0 |[config](../configs/det_r50_vd_db_v2.0/train_infer_python.txt) | 29.955 | 29.092 / 29.31 / 28.844 |0.015899011 | 51.097 |50.367 / 50.879 / 50.227| 0.012814717 | 10,000| 2,000|
|
| det_r50_vd_db_v2.0 |[config](../configs/det_r50_vd_db_v2.0/train_infer_python.txt) | 29.955 | 29.092 / 29.31 / 28.844 |0.015899011 | 51.097 |50.367 / 50.879 / 50.227| 0.012814717 | 10,000| 2,000|
|
||||||
| det_r50_vd_east_v2.0 |[config](../configs/det_r50_vd_east_v2.0/train_infer_python.txt) | 42.485 | 42.624 / 42.663 / 42.561 |0.00239083 | 67.61 |67.825/ 68.299/ 68.51| 0.00999854 | 10,000| 2,000|
|
| det_r50_vd_east_v2.0 |[config](../configs/det_r50_vd_east_v2.0/train_infer_python.txt) | 42.485 | 42.624 / 42.663 / 42.561 |0.00239083 | 67.61 |67.825/ 68.299/ 68.51| 0.00999854 | 10,000| 2,000|
|
||||||
| det_r50_vd_pse_v2.0 |[config](../configs/det_r50_vd_pse_v2.0/train_infer_python.txt) | 16.455 | 16.517 / 16.555 / 16.353 |0.012201752 | 27.02 |27.288 / 27.152 / 27.408| 0.009340339 | 10,000| 2,000|
|
| det_r50_vd_pse_v2.0 |[config](../configs/det_r50_vd_pse_v2.0/train_infer_python.txt) | 16.455 | 16.517 / 16.555 / 16.353 |0.012201752 | 27.02 |27.288 / 27.152 / 27.408| 0.009340339 | 10,000| 2,000|
|
||||||
| rec_mv3_none_bilstm_ctc_v2.0 |[config](../configs/rec_mv3_none_bilstm_ctc_v2.0/train_infer_python.txt) | 2288.358 | 2291.906 / 2293.725 / 2290.05 |0.001602197 | 2336.17 |2327.042 / 2328.093 / 2344.915| 0.007622025 | 600,000| 160,000|
|
| rec_mv3_none_bilstm_ctc_v2.0 |[config](../configs/rec_mv3_none_bilstm_ctc_v2.0/train_infer_python.txt) | 2288.358 | 2291.906 / 2293.725 / 2290.05 |0.001602197 | 2336.17 |2327.042 / 2328.093 / 2344.915| 0.007622025 | 600,000| 160,000|
|
||||||
| layoutxlm_ser |[config](../configs/layoutxlm/train_infer_python.txt) | 18.001 | 18.114 / 18.107 / 18.307 |0.010924783 | 21.982 | 21.507 / 21.116 / 21.406| 0.018180127 | 1490 | 1490|
|
| layoutxlm_ser |[config](../configs/layoutxlm/train_infer_python.txt) | 18.001 | 18.114 / 18.107 / 18.307 |0.010924783 | 21.982 | 21.507 / 21.116 / 21.406| 0.018180127 | 1490 | 1490|
|
||||||
| PP-Structure-table |[config](../configs/en_table_structure/train_infer_python.txt) | 14.151 | 14.077 / 14.23 / 14.25 |0.012140351 | 16.285 | 16.595 / 16.878 / 16.531 | 0.020559308 | 20,000| 5,000|
|
| PP-Structure-table |[config](../configs/en_table_structure/train_infer_python.txt) | 14.151 | 14.077 / 14.23 / 14.25 |0.012140351 | 16.285 | 16.595 / 16.878 / 16.531 | 0.020559308 | 20,000| 5,000|
|
||||||
| det_r50_dcn_fce_ctw_v2.0 |[config](../configs/det_r50_dcn_fce_ctw_v2.0/train_infer_python.txt) | 14.057 | 14.029 / 14.02 / 14.014 |0.001069214 | 18.298 |18.411 / 18.376 / 18.331| 0.004345228 | 10,000| 2,000|
|
| det_r50_dcn_fce_ctw_v2.0 |[config](../configs/det_r50_dcn_fce_ctw_v2.0/train_infer_python.txt) | 14.057 | 14.029 / 14.02 / 14.014 |0.001069214 | 18.298 |18.411 / 18.376 / 18.331| 0.004345228 | 10,000| 2,000|
|
||||||
| ch_PP-OCRv3_det |[config](../configs/ch_PP-OCRv3_det/train_infer_python.txt) | 8.622 | 8.431 / 8.423 / 8.479|0.006604552 | 14.203 |14.346 14.468 14.23| 0.016450097 | 10,000| 2,000|
|
| ch_PP-OCRv3_det |[config](../configs/ch_PP-OCRv3_det/train_infer_python.txt) | 8.622 | 8.431 / 8.423 / 8.479|0.006604552 | 14.203 |14.346 14.468 14.23| 0.016450097 | 10,000| 2,000|
|
||||||
| ch_PP-OCRv3_rec |[config](../configs/ch_PP-OCRv3_rec/train_infer_python.txt) | 90.239 | 90.077 / 91.513 / 91.325|0.01569176 | | | | 160,000| 40,000|
|
| ch_PP-OCRv3_rec |[config](../configs/ch_PP-OCRv3_rec/train_infer_python.txt) | 90.239 | 90.077 / 91.513 / 91.325|0.01569176 | | | | 160,000| 40,000|
|
||||||
|
@ -1 +0,0 @@
|
|||||||
|
|
@ -28,4 +28,3 @@ TRAIN:
|
|||||||
VALID:
|
VALID:
|
||||||
batch_size: 64
|
batch_size: 64
|
||||||
num_workers: 4
|
num_workers: 4
|
||||||
|
|
||||||
|
@ -46,4 +46,3 @@ TRAIN:
|
|||||||
VALID:
|
VALID:
|
||||||
batch_size: 64
|
batch_size: 64
|
||||||
num_workers: 4
|
num_workers: 4
|
||||||
|
|
||||||
|
@ -112,7 +112,3 @@ if [ ${MODE} = "lite_train_lite_infer" ] || [ ${MODE} = "whole_train_whole_infer
|
|||||||
done
|
done
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -167,5 +167,3 @@ if [ ${MODE} = "whole_infer" ]; then
|
|||||||
Count=$(($Count + 1))
|
Count=$(($Count + 1))
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
@ -155,4 +155,3 @@ if [ ${MODE} = "whole_infer" ]; then
|
|||||||
Count=$(($Count + 1))
|
Count=$(($Count + 1))
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user