add PP-Structurev2 to hubserving
parent
4ac17fca55
commit
73ca6c2e7f
|
@ -59,6 +59,7 @@ pip3 install paddlehub==2.1.0 --upgrade -i https://mirror.baidu.com/pypi/simple
|
||||||
检测模型:./inference/ch_PP-OCRv3_det_infer/
|
检测模型:./inference/ch_PP-OCRv3_det_infer/
|
||||||
识别模型:./inference/ch_PP-OCRv3_rec_infer/
|
识别模型:./inference/ch_PP-OCRv3_rec_infer/
|
||||||
方向分类器:./inference/ch_ppocr_mobile_v2.0_cls_infer/
|
方向分类器:./inference/ch_ppocr_mobile_v2.0_cls_infer/
|
||||||
|
版面分析模型:
|
||||||
表格结构识别模型:./inference/en_ppocr_mobile_v2.0_table_structure_infer/
|
表格结构识别模型:./inference/en_ppocr_mobile_v2.0_table_structure_infer/
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -172,7 +173,7 @@ hub serving start -c deploy/hubserving/ocr_system/config.json
|
||||||
## 3. 发送预测请求
|
## 3. 发送预测请求
|
||||||
配置好服务端,可使用以下命令发送预测请求,获取预测结果:
|
配置好服务端,可使用以下命令发送预测请求,获取预测结果:
|
||||||
|
|
||||||
```python tools/test_hubserving.py server_url image_path```
|
```python tools/test_hubserving.py --server_url=server_url --image_dir=image_path```
|
||||||
|
|
||||||
需要给脚本传递2个参数:
|
需要给脚本传递2个参数:
|
||||||
- **server_url**:服务地址,格式为
|
- **server_url**:服务地址,格式为
|
||||||
|
|
|
@ -61,6 +61,7 @@ Before installing the service module, you need to prepare the inference model an
|
||||||
text detection model: ./inference/ch_PP-OCRv3_det_infer/
|
text detection model: ./inference/ch_PP-OCRv3_det_infer/
|
||||||
text recognition model: ./inference/ch_PP-OCRv3_rec_infer/
|
text recognition model: ./inference/ch_PP-OCRv3_rec_infer/
|
||||||
text angle classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/
|
text angle classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/
|
||||||
|
layout parse model:
|
||||||
tanle recognition: ./inference/en_ppocr_mobile_v2.0_table_structure_infer/
|
tanle recognition: ./inference/en_ppocr_mobile_v2.0_table_structure_infer/
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -177,7 +178,7 @@ hub serving start -c deploy/hubserving/ocr_system/config.json
|
||||||
## 3. Send prediction requests
|
## 3. Send prediction requests
|
||||||
After the service starts, you can use the following command to send a prediction request to obtain the prediction result:
|
After the service starts, you can use the following command to send a prediction request to obtain the prediction result:
|
||||||
```shell
|
```shell
|
||||||
python tools/test_hubserving.py server_url image_path
|
python tools/test_hubserving.py --server_url=server_url --image_dir=image_path
|
||||||
```
|
```
|
||||||
|
|
||||||
Two parameters need to be passed to the script:
|
Two parameters need to be passed to the script:
|
||||||
|
|
|
@ -119,7 +119,7 @@ class StructureSystem(hub.Module):
|
||||||
all_results.append([])
|
all_results.append([])
|
||||||
continue
|
continue
|
||||||
starttime = time.time()
|
starttime = time.time()
|
||||||
res = self.table_sys(img)
|
res, _ = self.table_sys(img)
|
||||||
elapse = time.time() - starttime
|
elapse = time.time() - starttime
|
||||||
logger.info("Predict time: {}".format(elapse))
|
logger.info("Predict time: {}".format(elapse))
|
||||||
|
|
||||||
|
@ -144,6 +144,6 @@ class StructureSystem(hub.Module):
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
structure_system = StructureSystem()
|
structure_system = StructureSystem()
|
||||||
structure_system._initialize()
|
structure_system._initialize()
|
||||||
image_path = ['./doc/table/1.png']
|
image_path = ['./ppstructure/docs/table/1.png']
|
||||||
res = structure_system.predict(paths=image_path)
|
res = structure_system.predict(paths=image_path)
|
||||||
print(res)
|
print(res)
|
||||||
|
|
|
@ -23,8 +23,10 @@ def read_params():
|
||||||
cfg = table_read_params()
|
cfg = table_read_params()
|
||||||
|
|
||||||
# params for layout parser model
|
# params for layout parser model
|
||||||
cfg.layout_path_model = 'lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config'
|
cfg.layout_model_dir = ''
|
||||||
cfg.layout_label_map = None
|
cfg.layout_dict_path = './ppocr/utils/dict/layout_publaynet_dict.txt'
|
||||||
|
cfg.layout_score_threshold = 0.5
|
||||||
|
cfg.layout_nms_threshold = 0.5
|
||||||
|
|
||||||
cfg.mode = 'structure'
|
cfg.mode = 'structure'
|
||||||
cfg.output = './output'
|
cfg.output = './output'
|
||||||
|
|
|
@ -118,11 +118,11 @@ class TableSystem(hub.Module):
|
||||||
all_results.append([])
|
all_results.append([])
|
||||||
continue
|
continue
|
||||||
starttime = time.time()
|
starttime = time.time()
|
||||||
pred_html = self.table_sys(img)
|
res, _ = self.table_sys(img)
|
||||||
elapse = time.time() - starttime
|
elapse = time.time() - starttime
|
||||||
logger.info("Predict time: {}".format(elapse))
|
logger.info("Predict time: {}".format(elapse))
|
||||||
|
|
||||||
all_results.append({'html': pred_html})
|
all_results.append({'html': res['html']})
|
||||||
return all_results
|
return all_results
|
||||||
|
|
||||||
@serving
|
@serving
|
||||||
|
@ -138,6 +138,6 @@ class TableSystem(hub.Module):
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
table_system = TableSystem()
|
table_system = TableSystem()
|
||||||
table_system._initialize()
|
table_system._initialize()
|
||||||
image_path = ['./doc/table/table.jpg']
|
image_path = ['./ppstructure/docs/table/table.jpg']
|
||||||
res = table_system.predict(paths=image_path)
|
res = table_system.predict(paths=image_path)
|
||||||
print(res)
|
print(res)
|
||||||
|
|
|
@ -51,12 +51,6 @@ pip install "paddleocr>=2.0.1" # Recommend to use version 2.0.1+
|
||||||
|
|
||||||
Reference: [Solve shapely installation on windows](https://stackoverflow.com/questions/44398265/install-shapely-oserror-winerror-126-the-specified-module-could-not-be-found)
|
Reference: [Solve shapely installation on windows](https://stackoverflow.com/questions/44398265/install-shapely-oserror-winerror-126-the-specified-module-could-not-be-found)
|
||||||
|
|
||||||
- **For layout analysis users**, run the following command to install **Layout-Parser**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip3 install -U https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
|
|
||||||
```
|
|
||||||
|
|
||||||
<a name="2-easy-to-use"></a>
|
<a name="2-easy-to-use"></a>
|
||||||
|
|
||||||
## 2. Easy-to-Use
|
## 2. Easy-to-Use
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
- [快速安装](#快速安装)
|
- [快速安装](#快速安装)
|
||||||
- [1. PaddlePaddle 和 PaddleOCR](#1-paddlepaddle-和-paddleocr)
|
- [1. PaddlePaddle 和 PaddleOCR](#1-paddlepaddle-和-paddleocr)
|
||||||
- [2. 安装其他依赖](#2-安装其他依赖)
|
- [2. 安装其他依赖](#2-安装其他依赖)
|
||||||
- [2.1 版面分析所需 Layout-Parser](#21-版面分析所需--layout-parser)
|
- [2.1 VQA所需依赖](#21--vqa所需依赖)
|
||||||
- [2.2 VQA所需依赖](#22--vqa所需依赖)
|
|
||||||
|
|
||||||
# 快速安装
|
# 快速安装
|
||||||
|
|
||||||
|
@ -12,14 +11,7 @@
|
||||||
|
|
||||||
## 2. 安装其他依赖
|
## 2. 安装其他依赖
|
||||||
|
|
||||||
### 2.1 版面分析所需 Layout-Parser
|
### 2.1 VQA所需依赖
|
||||||
|
|
||||||
Layout-Parser 可通过如下命令安装
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip3 install -U https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
|
|
||||||
```
|
|
||||||
### 2.2 VQA所需依赖
|
|
||||||
* paddleocr
|
* paddleocr
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
@ -1,21 +1,21 @@
|
||||||
# PP-Structure 快速开始
|
# PP-Structure 快速开始
|
||||||
|
|
||||||
- [1. 安装依赖包](#1)
|
- [1. 安装依赖包](#1-安装依赖包)
|
||||||
- [2. 便捷使用](#2)
|
- [2. 便捷使用](#2-便捷使用)
|
||||||
- [2.1 命令行使用](#21)
|
- [2.1 命令行使用](#21-命令行使用)
|
||||||
- [2.1.1 版面分析+表格识别](#211)
|
- [2.1.1 版面分析+表格识别](#211-版面分析表格识别)
|
||||||
- [2.1.2 版面分析](#212)
|
- [2.1.2 版面分析](#212-版面分析)
|
||||||
- [2.1.3 表格识别](#213)
|
- [2.1.3 表格识别](#213-表格识别)
|
||||||
- [2.1.4 DocVQA](#214)
|
- [2.1.4 DocVQA](#214-docvqa)
|
||||||
- [2.2 代码使用](#22)
|
- [2.2 代码使用](#22-代码使用)
|
||||||
- [2.2.1 版面分析+表格识别](#221)
|
- [2.2.1 版面分析+表格识别](#221-版面分析表格识别)
|
||||||
- [2.2.2 版面分析](#222)
|
- [2.2.2 版面分析](#222-版面分析)
|
||||||
- [2.2.3 表格识别](#223)
|
- [2.2.3 表格识别](#223-表格识别)
|
||||||
- [2.2.4 DocVQA](#224)
|
- [2.2.4 DocVQA](#224-docvqa)
|
||||||
- [2.3 返回结果说明](#23)
|
- [2.3 返回结果说明](#23-返回结果说明)
|
||||||
- [2.3.1 版面分析+表格识别](#231)
|
- [2.3.1 版面分析+表格识别](#231-版面分析表格识别)
|
||||||
- [2.3.2 DocVQA](#232)
|
- [2.3.2 DocVQA](#232-docvqa)
|
||||||
- [2.4 参数说明](#24)
|
- [2.4 参数说明](#24-参数说明)
|
||||||
|
|
||||||
|
|
||||||
<a name="1"></a>
|
<a name="1"></a>
|
||||||
|
@ -24,8 +24,6 @@
|
||||||
```bash
|
```bash
|
||||||
# 安装 paddleocr,推荐使用2.5+版本
|
# 安装 paddleocr,推荐使用2.5+版本
|
||||||
pip3 install "paddleocr>=2.5"
|
pip3 install "paddleocr>=2.5"
|
||||||
# 安装 版面分析依赖包layoutparser(如不需要版面分析功能,可跳过)
|
|
||||||
pip3 install -U https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
|
|
||||||
# 安装 DocVQA依赖包paddlenlp(如不需要DocVQA功能,可跳过)
|
# 安装 DocVQA依赖包paddlenlp(如不需要DocVQA功能,可跳过)
|
||||||
pip install paddlenlp
|
pip install paddlenlp
|
||||||
|
|
||||||
|
|
|
@ -1,21 +1,21 @@
|
||||||
# PP-Structure Quick Start
|
# PP-Structure Quick Start
|
||||||
|
|
||||||
- [1. Install package](#1)
|
- [1. Install package](#1-install-package)
|
||||||
- [2. Use](#2)
|
- [2. Use](#2-use)
|
||||||
- [2.1 Use by command line](#21)
|
- [2.1 Use by command line](#21-use-by-command-line)
|
||||||
- [2.1.1 layout analysis + table recognition](#211)
|
- [2.1.1 layout analysis + table recognition](#211-layout-analysis--table-recognition)
|
||||||
- [2.1.2 layout analysis](#212)
|
- [2.1.2 layout analysis](#212-layout-analysis)
|
||||||
- [2.1.3 table recognition](#213)
|
- [2.1.3 table recognition](#213-table-recognition)
|
||||||
- [2.1.4 DocVQA](#214)
|
- [2.1.4 DocVQA](#214-docvqa)
|
||||||
- [2.2 Use by code](#22)
|
- [2.2 Use by code](#22-use-by-code)
|
||||||
- [2.2.1 layout analysis + table recognition](#221)
|
- [2.2.1 layout analysis + table recognition](#221-layout-analysis--table-recognition)
|
||||||
- [2.2.2 layout analysis](#222)
|
- [2.2.2 layout analysis](#222-layout-analysis)
|
||||||
- [2.2.3 table recognition](#223)
|
- [2.2.3 table recognition](#223-table-recognition)
|
||||||
- [2.2.4 DocVQA](#224)
|
- [2.2.4 DocVQA](#224-docvqa)
|
||||||
- [2.3 Result description](#23)
|
- [2.3 Result description](#23-result-description)
|
||||||
- [2.3.1 layout analysis + table recognition](#231)
|
- [2.3.1 layout analysis + table recognition](#231-layout-analysis--table-recognition)
|
||||||
- [2.3.2 DocVQA](#232)
|
- [2.3.2 DocVQA](#232-docvqa)
|
||||||
- [2.4 Parameter Description](#24)
|
- [2.4 Parameter Description](#24-parameter-description)
|
||||||
|
|
||||||
|
|
||||||
<a name="1"></a>
|
<a name="1"></a>
|
||||||
|
@ -24,8 +24,6 @@
|
||||||
```bash
|
```bash
|
||||||
# Install paddleocr, version 2.5+ is recommended
|
# Install paddleocr, version 2.5+ is recommended
|
||||||
pip3 install "paddleocr>=2.5"
|
pip3 install "paddleocr>=2.5"
|
||||||
# Install layoutparser (if you do not use the layout analysis, you can skip it)
|
|
||||||
pip3 install -U https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
|
|
||||||
# Install the DocVQA dependency package paddlenlp (if you do not use the DocVQA, you can skip it)
|
# Install the DocVQA dependency package paddlenlp (if you do not use the DocVQA, you can skip it)
|
||||||
pip install paddlenlp
|
pip install paddlenlp
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,7 @@ logger = get_logger()
|
||||||
class StructureSystem(object):
|
class StructureSystem(object):
|
||||||
def __init__(self, args):
|
def __init__(self, args):
|
||||||
self.mode = args.mode
|
self.mode = args.mode
|
||||||
|
self.recovery = args.recovery
|
||||||
if self.mode == 'structure':
|
if self.mode == 'structure':
|
||||||
if not args.show_log:
|
if not args.show_log:
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(logging.INFO)
|
||||||
|
@ -110,7 +111,7 @@ class StructureSystem(object):
|
||||||
time_dict['rec'] += table_time_dict['rec']
|
time_dict['rec'] += table_time_dict['rec']
|
||||||
else:
|
else:
|
||||||
if self.text_system is not None:
|
if self.text_system is not None:
|
||||||
if args.recovery:
|
if self.recovery:
|
||||||
wht_im = np.ones(ori_im.shape, dtype=ori_im.dtype)
|
wht_im = np.ones(ori_im.shape, dtype=ori_im.dtype)
|
||||||
wht_im[y1:y2, x1:x2, :] = roi_img
|
wht_im[y1:y2, x1:x2, :] = roi_img
|
||||||
filter_boxes, filter_rec_res, ocr_time_dict = self.text_system(
|
filter_boxes, filter_rec_res, ocr_time_dict = self.text_system(
|
||||||
|
@ -133,7 +134,7 @@ class StructureSystem(object):
|
||||||
for token in style_token:
|
for token in style_token:
|
||||||
if token in rec_str:
|
if token in rec_str:
|
||||||
rec_str = rec_str.replace(token, '')
|
rec_str = rec_str.replace(token, '')
|
||||||
if not args.recovery:
|
if not self.recovery:
|
||||||
box += [x1, y1]
|
box += [x1, y1]
|
||||||
res.append({
|
res.append({
|
||||||
'text': rec_str,
|
'text': rec_str,
|
||||||
|
|
|
@ -101,7 +101,7 @@ class TableSystem(object):
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
structure_res, elapse = self._structure(copy.deepcopy(img))
|
structure_res, elapse = self._structure(copy.deepcopy(img))
|
||||||
result['cell_bbox'] = structure_res[1]
|
result['cell_bbox'] = structure_res[1].tolist()
|
||||||
time_dict['table'] = elapse
|
time_dict['table'] = elapse
|
||||||
|
|
||||||
dt_boxes, rec_res, det_elapse, rec_elapse = self._ocr(
|
dt_boxes, rec_res, det_elapse, rec_elapse = self._ocr(
|
||||||
|
|
|
@ -38,14 +38,17 @@ def init_args():
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--layout_dict_path",
|
"--layout_dict_path",
|
||||||
type=str,
|
type=str,
|
||||||
default="../ppocr/utils/dict/layout_pubalynet_dict.txt")
|
default="../ppocr/utils/dict/layout_publaynet_dict.txt")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--layout_score_threshold",
|
"--layout_score_threshold",
|
||||||
type=float,
|
type=float,
|
||||||
default=0.5,
|
default=0.5,
|
||||||
help="Threshold of score.")
|
help="Threshold of score.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--layout_nms_threshold", type=float, default=0.5, help="Threshold of nms.")
|
"--layout_nms_threshold",
|
||||||
|
type=float,
|
||||||
|
default=0.5,
|
||||||
|
help="Threshold of nms.")
|
||||||
# params for vqa
|
# params for vqa
|
||||||
parser.add_argument("--vqa_algorithm", type=str, default='LayoutXLM')
|
parser.add_argument("--vqa_algorithm", type=str, default='LayoutXLM')
|
||||||
parser.add_argument("--ser_model_dir", type=str)
|
parser.add_argument("--ser_model_dir", type=str)
|
||||||
|
|
Loading…
Reference in New Issue