From b25dcaae0e86f07dbf77ae216a718ed3f14aaa8b Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Mon, 19 May 2025 03:01:27 +0800 Subject: [PATCH] Add deployment docs and enhance CLI (#15117) * Add serving and hpi docs * Optimize CLI logging info * Update interface * Add on-device deployment and onnx model conversion docs * Enhance CLI * _gen->_iter * Fix CLI help message * Update table_recognition_v2 and PP-StructureV3 interfaces * Update installation doc * Update interface * Update interface * Add logging doc * Update default values --------- Co-authored-by: cuicheng01 <45199522+cuicheng01@users.noreply.github.com> --- docs/deployment/high_performance_inference.md | 91 +++++++++ docs/deployment/obtaining_onnx_models.md | 48 +++++ docs/deployment/on_device_deployment.md | 3 + docs/deployment/serving.md | 93 +++++++++ docs/logging.md | 22 +++ docs/paddleocr_and_paddlex.md | 69 +++++++ docs/ppocr/installation.en.md | 85 --------- docs/ppocr/installation.md | 74 +++----- paddleocr/__init__.py | 2 + paddleocr/_cli.py | 6 +- paddleocr/_common_args.py | 2 +- paddleocr/_models/_image_classification.py | 2 +- paddleocr/_models/_object_detection.py | 2 +- paddleocr/_models/base.py | 9 +- paddleocr/_models/doc_vlm.py | 9 +- paddleocr/_models/formula_recognition.py | 2 +- .../_models/table_structure_recognition.py | 2 +- paddleocr/_models/text_detection.py | 2 +- paddleocr/_models/text_image_unwarping.py | 2 +- paddleocr/_models/text_recognition.py | 2 +- paddleocr/_pipelines/base.py | 16 +- paddleocr/_pipelines/doc_preprocessor.py | 30 ++- paddleocr/_pipelines/doc_understanding.py | 20 +- paddleocr/_pipelines/formula_recognition.py | 59 ++++-- paddleocr/_pipelines/ocr.py | 62 ++++-- paddleocr/_pipelines/pp_chatocrv4_doc.py | 75 ++++++-- paddleocr/_pipelines/pp_structurev3.py | 176 +++++++++++++++--- paddleocr/_pipelines/seal_recognition.py | 82 +++++--- paddleocr/_pipelines/table_recognition_v2.py | 103 +++++++--- paddleocr/{utils => _utils}/__init__.py | 0 paddleocr/{utils => _utils}/cli.py | 21 ++- paddleocr/{utils => _utils}/deprecation.py | 0 paddleocr/{utils => _utils}/logging.py | 0 tests/pipelines/test_pp_chatocrv4_doc.py | 1 - tests/pipelines/test_pp_structurev3.py | 1 - 35 files changed, 873 insertions(+), 300 deletions(-) create mode 100644 docs/deployment/high_performance_inference.md create mode 100644 docs/deployment/obtaining_onnx_models.md create mode 100644 docs/deployment/on_device_deployment.md create mode 100644 docs/deployment/serving.md create mode 100644 docs/logging.md create mode 100644 docs/paddleocr_and_paddlex.md delete mode 100644 docs/ppocr/installation.en.md rename paddleocr/{utils => _utils}/__init__.py (100%) rename paddleocr/{utils => _utils}/cli.py (72%) rename paddleocr/{utils => _utils}/deprecation.py (100%) rename paddleocr/{utils => _utils}/logging.py (100%) diff --git a/docs/deployment/high_performance_inference.md b/docs/deployment/high_performance_inference.md new file mode 100644 index 0000000000..a6d4245816 --- /dev/null +++ b/docs/deployment/high_performance_inference.md @@ -0,0 +1,91 @@ +# 高性能推理 + +在实际生产环境中,许多应用对部署策略的性能指标(尤其是响应速度)有着较严苛的标准,以确保系统的高效运行与用户体验的流畅性。PaddleOCR 提供高性能推理能力,让用户无需关注复杂的配置和底层细节,一键提升模型的推理速度。具体而言,PaddleOCR 的高性能推理功能能够: + +- 结合先验知识自动选择合适的推理后端(Paddle Inference、OpenVINO、ONNX Runtime、TensorRT等),并配置加速策略(如增大推理线程数、设置 FP16 精度推理); +- 根据需要自动将飞桨静态图模型转换为 ONNX 格式,以使用更优的推理后端实现加速。 + +本文档主要介绍高性能推理功能的安装与使用方法。 + +## 1. 前置条件 + +## 1.1 安装高性能推理依赖 + +通过 PaddleOCR CLI 安装高性能推理所需依赖: + +```bash +paddleocr install_hpi_deps {设备类型} +``` + +支持的设备类型包括: + +- `cpu`:仅使用 CPU 推理。目前支持 Linux 系统、x86-64 架构处理器、Python 3.8-3.12。 +- `gpu`:使用 CPU 或 NVIDIA GPU 推理。目前支持 Linux 系统、x86-64 架构处理器、Python 3.8-3.12。请查看下一小节的详细说明。 + +同一环境中只应该存在一种设备类型的依赖。对于 Windows 系统,目前建议在 Docker 容器或者 [WSL](https://learn.microsoft.com/zh-cn/windows/wsl/install) 环境中安装。 + +**推荐使用飞桨官方 Docker 镜像安装高性能推理依赖。** 各设备类型对应的镜像如下: + +- `cpu`:`ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:3.0.0` +- `gpu`: + - CUDA 11.8:`ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:3.0.0-gpu-cuda11.8-cudnn8.9-trt8.6` + +## 1.2 GPU 环境详细说明 + +首先,需要确保环境中安装有符合要求的 CUDA 与 cuDNN。目前 PaddleOCR 仅支持与 CUDA 11.8 + cuDNN 8.9 兼容的 CUDA 和 cuDNN版本。以下分别是 CUDA 11.8 和 cuDNN 8.9 的安装说明文档: + +- [安装 CUDA 11.8](https://developer.nvidia.com/cuda-11-8-0-download-archive) +- [安装 cuDNN 8.9](https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-890/install-guide/index.html) + +如果使用飞桨官方镜像,则镜像中的 CUDA 和 cuDNN 版本已经是满足要求的,无需额外安装。 + +如果通过 pip 安装飞桨,通常 CUDA、cuDNN 的相关 Python 包将被自动安装。在这种情况下,**仍需要通过安装非 Python 专用的 CUDA 与 cuDNN**。同时,建议安装的 CUDA 和 cuDNN 版本与环境中存在的 Python 包版本保持一致,以避免不同版本的库共存导致的潜在问题。可以通过如下方式可以查看 CUDA 和 cuDNN 相关 Python 包的版本: + +```bash +# CUDA 相关 Python 包版本 +pip list | grep nvidia-cuda +# cuDNN 相关 Python 包版本 +pip list | grep nvidia-cudnn +``` + +其次,需确保环境中安装有符合要求的 TensorRT。目前 PaddleOCR 仅支持 TensorRT 8.6.1.6。如果使用飞桨官方镜像,可执行如下命令安装 TensorRT wheel 包: + +```bash +python -m pip install /usr/local/TensorRT-*/python/tensorrt-*-cp310-none-linux_x86_64.whl +``` + +对于其他环境,请参考 [TensorRT 文档](https://docs.nvidia.com/deeplearning/tensorrt/archives/index.html) 安装 TensorRT。示例如下: + +```bash +# 下载 TensorRT tar 文件 +wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.1/tars/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz +# 解压 TensorRT tar 文件 +tar xvf TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz +# 安装 TensorRT wheel 包 +python -m pip install TensorRT-8.6.1.6/python/tensorrt-8.6.1-cp310-none-linux_x86_64.whl +# 添加 TensorRT 的 `lib` 目录的绝对路径到 LD_LIBRARY_PATH 中 +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:TensorRT-8.6.1.6/lib" +``` + +## 2. 执行高性能推理 + +对于 PaddleOCR CLI,指定 `--enable_hpi` 为 `True` 即可执行高性能推理。例如: + +```bash +paddleocr ocr --enable_hpi True ... +``` + +对于 PaddleOCR Python API,在初始化产线对象或者模块对象时,设置 `enable_hpi` 为 `True` 即可在调用推理方法时执行高性能推理。例如: + +```python +from paddleocr import PaddleOCR +pipeline = PaddleOCR(enable_hpi=True) +result = pipeline.predict(...) +``` + +## 3. 说明 + +1. 对于部分模型,在首次执行高性能推理时,可能需要花费较长时间完成推理引擎的构建。推理引擎相关信息将在第一次构建完成后被缓存在模型目录,后续可复用缓存中的内容以提升初始化速度。 +2. 目前,由于使用的不是静态图格式模型、存在不支持算子等原因,部分模型可能无法获得推理加速。 +3. 在进行高性能推理时,PaddleOCR 会自动处理模型格式的转换,并尽可能选择最优的推理后端。同时,PaddleOCR 也支持用户指定 ONNX 模型。有关如何飞桨静态图模型转换为 ONNX 格式,可参考 [获取 ONNX 模型](./obtaining_onnx_models.md)。 +4. PaddleOCR 的高性能推理能力依托于 PaddleX 及其高性能推理插件。通过传入自定义 PaddleX 产线配置文件,可以对推理后端等进行配置。请参考 [使用 PaddleX 产线配置文件](../paddleocr_and_paddlex.md#3-使用-paddlex-产线配置文件) 和 [PaddleX 高性能推理指南](https://paddlepaddle.github.io/PaddleX/3.0/pipeline_deploy/high_performance_inference.html#22) 了解如何调整高性能推理配置。 diff --git a/docs/deployment/obtaining_onnx_models.md b/docs/deployment/obtaining_onnx_models.md new file mode 100644 index 0000000000..6291b0871e --- /dev/null +++ b/docs/deployment/obtaining_onnx_models.md @@ -0,0 +1,48 @@ +# 获取 ONNX 模型 + +PaddleOCR 提供了丰富的预训练模型,这些模型均采用飞桨的静态图格式进行存储。若需在部署阶段使用 ONNX 格式的模型,可借助 PaddleX 提供的 Paddle2ONNX 插件进行转换。关于 PaddleX 及其与 PaddleOCR 之间的关系,请参考 [PaddleOCR 与 PaddleX 的区别与联系](../paddleocr_and_paddlex.md#1-paddleocr-与-paddlex-的区别与联系)。 + +首先,执行如下命令,通过 PaddleX CLI 安装 PaddleX 的 Paddle2ONNX 插件: + +```bash +paddlex --install paddle2onnx +``` + +然后,执行如下命令完成模型转换: + +```bash +paddlex \ + --paddle2onnx \ # 使用paddle2onnx功能 + --paddle_model_dir /your/paddle_model/dir \ # 指定 Paddle 模型所在的目录 + --onnx_model_dir /your/onnx_model/output/dir \ # 指定转换后 ONNX 模型的输出目录 + --opset_version 7 # 指定要使用的 ONNX opset 版本 +``` + +参数说明如下: + + + + + + + + + + + + + + + + + + + + + + + + + + +
参数类型描述
paddle_model_dirstr包含 Paddle 模型的目录。
onnx_model_dirstrONNX 模型的输出目录,可以与 Paddle 模型目录相同。默认为 onnx
opset_versionint使用的 ONNX opset 版本。当使用低版本 opset 无法完成转换时,将自动选择更高版本的 opset 进行转换。默认为 7
diff --git a/docs/deployment/on_device_deployment.md b/docs/deployment/on_device_deployment.md new file mode 100644 index 0000000000..650c418de8 --- /dev/null +++ b/docs/deployment/on_device_deployment.md @@ -0,0 +1,3 @@ +# 端侧部署 + +PaddleOCR 模型可通过 [PaddleX 端侧部署方案](https://paddlepaddle.github.io/PaddleX/3.0/pipeline_deploy/edge_deploy.html) 实现端侧部署。关于 PaddleX 及其与 PaddleOCR 之间的关系,请参考 [PaddleOCR 与 PaddleX 的区别与联系](../paddleocr_and_paddlex.md#1-paddleocr-与-paddlex-的区别与联系)。 diff --git a/docs/deployment/serving.md b/docs/deployment/serving.md new file mode 100644 index 0000000000..91a0902a8b --- /dev/null +++ b/docs/deployment/serving.md @@ -0,0 +1,93 @@ +# 服务化部署 + +服务化部署是实际生产环境中常见的一种部署形式。通过将推理功能封装为服务,客户端可以通过网络请求来访问这些服务,以获取推理结果。PaddleOCR 推荐用户使用 [PaddleX](https://github.com/PaddlePaddle/PaddleX) 进行服务化部署。请阅读 [PaddleOCR 与 PaddleX 的区别与联系](../paddleocr_and_paddlex.md#1-paddleocr-与-paddlex-的区别与联系) 了解 PaddleOCR 与 PaddleX 的关系。 + +PaddleX 提供以下服务化部署方案: + +- **基础服务化部署**:简单易用的服务化部署方案,开发成本低。 +- **高稳定性服务化部署**:基于 [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) 打造。与基础服务化部署相比,该方案提供更高的稳定性,并允许用户调整配置以优化性能。 + +**建议首先使用基础服务化部署方案进行快速验证**,然后根据实际需要,评估是否尝试更复杂的方案。 + +## 1. 基础服务化部署 + +### 1.1 安装依赖 + +执行如下命令,通过 PaddleX CLI 安装 PaddleX 服务化部署插件: + +```bash +paddlex --install serving +``` + +### 1.2 运行服务器 + +通过 PaddleX CLI 运行服务器: + +```bash +paddlex --serve --pipeline {PaddleX 产线注册名或产线配置文件路径} [{其他命令行选项}] +``` + +以通用 OCR 产线为例: + +```bash +paddlex --serve --pipeline OCR +``` + +可以看到类似以下展示的信息: + +```text +INFO: Started server process [63108] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit) +``` + +如需调整配置(如模型路径、batch size、部署设备等),可指定 `--pipeline` 为自定义配置文件。请参考 [PaddleOCR 与 PaddleX](../advanced/paddleocr_and_paddlex.md) 了解 PaddleOCR 产线与 PaddleX 产线注册名的对应关系,以及 PaddleX 产线配置文件的获取与修改方式。 + +与服务化部署相关的命令行选项如下: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
名称说明
--pipelinePaddleX 产线注册名或产线配置文件路径。
--device产线部署设备。默认为 cpu(如 GPU 不可用)或 gpu(如 GPU 可用)。
--host服务器绑定的主机名或 IP 地址。默认为 0.0.0.0
--port服务器监听的端口号。默认为 8080
--use_hpip如果指定,则使用高性能推理。
--hpi_config高性能推理配置。请参考 PaddleX 高性能推理指南 了解更多信息。
+ + +### 1.3 调用服务 + +PaddleX 产线使用教程中的 “开发集成/部署” 部分提供了服务的 API 参考与多语言调用示例。在 [PaddleX模型产线使用概览](https://paddlepaddle.github.io/PaddleX/3.0/pipeline_usage/pipeline_develop_guide.html) 中可以找到各产线的使用教程。 + +## 2. 高稳定性服务化部署 + +请参考 [PaddleX 服务化部署指南](https://paddlepaddle.github.io/PaddleX/3.0/pipeline_deploy/serving.html#2)。在 [使用 PaddleX 产线配置文件](../paddleocr_and_paddlex.md#3-使用-paddlex-产线配置文件) 中,可以了解关于 PaddleX 产线配置文件的更多信息。 + +需要说明的是,由于缺乏细粒度优化等原因,当前 PaddleOCR 提供的高稳定性服务化部署方案在性能上可能不及 2.x 版本基于 PaddleServing 的方案;但该新方案已对飞桨 3.0 框架提供了全面支持,我们也将持续优化,后续考虑推出性能更优的部署方案。 diff --git a/docs/logging.md b/docs/logging.md new file mode 100644 index 0000000000..fa1927de8a --- /dev/null +++ b/docs/logging.md @@ -0,0 +1,22 @@ +# 日志 + +本文档主要介绍如何配置 PaddleOCR 推理包的日志系统。需要注意的是,PaddleOCR 推理包与训练脚本使用的是不同的日志系统,本文档不涉及训练脚本所使用的日志系统的配置方法。 + +PaddleOCR 构建了一个基于 Python [`logging` 标准库](https://docs.python.org/zh-cn/3/library/logging.html#module-logging) 的集中式日志系统。换言之,PaddleOCR 使用唯一的日志记录器(logger),可通过 `paddleocr.logger` 访问和配置。 + +默认情况下,PaddleOCR 的日志级别设为 `ERROR`,这意味着仅当日志级别为 `ERROR` 或更高(如 `CRITICAL`)时,日志信息才会输出。PaddleOCR 同时为该日志记录器配置了一个 `StreamHandler`,将日志输出到标准错误流,并将记录器的 `propagate` 属性设为 `False`,以避免日志信息传递到其父记录器。 + +若希望禁止 PaddleOCR 对日志系统的自动配置行为,可将环境变量 `DISABLE_AUTO_LOGGING_CONFIG` 设为 `1`。此时,PaddleOCR 将不会对日志记录器进行任何额外配置。 + +如需更灵活地定制日志行为,可参考 `logging` 标准库的相关文档。以下是一个将日志写入文件的示例: + +```python +import logging +from paddleocr import logger + +# 将日志写入文件 `paddleocr.log` +fh = logging.FileHandler("paddleocr.log") +logger.addHandler(fh) +``` + +请注意,PaddleOCR 依赖的其他库(如 [PaddleX](./paddleocr_and_paddlex.md))拥有各自独立的日志系统,以上配置不会影响这些库的日志输出。 diff --git a/docs/paddleocr_and_paddlex.md b/docs/paddleocr_and_paddlex.md new file mode 100644 index 0000000000..8f19bf7fdc --- /dev/null +++ b/docs/paddleocr_and_paddlex.md @@ -0,0 +1,69 @@ +# PaddleOCR 与 PaddleX + +[PaddleX](https://github.com/PaddlePaddle/PaddleX) 是一款基于飞桨框架构建的低代码开发工具,集成了众多开箱即用的预训练模型,支持模型从训练到推理的全流程开发,兼容多款国内外主流硬件,助力 AI 开发者在产业实践中高效落地。 + +PaddleOCR 在推理部署方面基于 PaddleX 构建,二者在该环节可实现无缝协同。在安装 PaddleOCR 时,PaddleX 也将作为其依赖一并安装。此外,PaddleOCR 与 PaddleX 在产线名称等方面也保持一致。对于快速体验,如果只使用基础配置,用户通常无需了解 PaddleX 的具体概念;但在涉及高级配置、服务化部署等使用场景时,了解 PaddleX 的相关知识将有所帮助。 + +本文档将介绍 PaddleOCR 与 PaddleX 之间的关系,并说明如何协同使用这两个工具。 + +## 1. PaddleOCR 与 PaddleX 的区别与联系 + +PaddleOCR 与 PaddleX 在定位和功能上各有侧重:PaddleOCR 专注于 OCR 相关任务,而 PaddleX 则覆盖了包括时序预测、人脸识别等在内的多种任务类型。此外,PaddleX 提供了丰富的基础设施,具备多模型组合推理的底层能力,能够以统一且灵活的方式接入不同模型,支持构建复杂的模型产线。 + +PaddleOCR 在推理部署环节充分复用了 PaddleX 的能力,具体包括: + +- PaddleOCR 在模型推理、前后处理及多模型组合等底层能力上,主要依赖于 PaddleX。 +- PaddleOCR 的高性能推理能力通过 PaddleX 的 Paddle2ONNX 插件及高性能推理插件实现。 +- PaddleOCR 的服务化部署方案基于 PaddleX 的实现。 + +## 2. PaddleOCR 产线与 PaddleX 产线注册名的对应关系 + +| PaddleOCR 产线 | PaddleX 产线注册名 | +| --- | --- | +| 通用 OCR | `OCR` | +| 通用版面解析 v3 | `PP-StructureV3` | +| 文档场景信息抽取 v4 | `PP-ChatOCRv4-doc` | +| 通用表格识别 v2 | `table_recognition_v2` | +| 公式识别 | `formula_recognition` | +| 印章文本识别 | `seal_recognition` | +| 文档图像预处理 | `doc_preprocessor` | +| 文档理解 | `doc_understanding` | + +## 3. 使用 PaddleX 产线配置文件 + +在推理部署阶段,PaddleOCR 支持导出和加载 PaddleX 的产线配置文件。用户可通过编辑配置文件,对推理部署相关参数进行深度配置。 + +### 3.1 导出产线配置文件 + +可调用 PaddleOCR 产线对象的 `export_paddlex_config_to_yaml` 方法,将当前产线配置导出为 YAML 文件。示例如下: + +```python +from paddleocr import PaddleOCR + +pipeline = PaddleOCR() +pipeline.export_paddlex_config_to_yaml("ocr_config.yaml") +``` + +上述代码会在工作目录下生成名为 `ocr_config.yaml` 的产线配置文件。 + +### 3.2 编辑产线配置文件 + +导出的 PaddleX 产线配置文件不仅包含 PaddleOCR CLI 和 Python API 支持的参数,还可进行更多高级配置。请在 [PaddleX模型产线使用概览](https://paddlepaddle.github.io/PaddleX/3.0/pipeline_usage/pipeline_develop_guide.html) 中找到对应的产线使用教程,参考其中的详细说明,根据需求调整各项配置。 + +### 3.3 在 CLI 中加载产线配置文件 + +通过 `--paddlex_config` 参数指定 PaddleX 产线配置文件的路径,PaddleOCR 会读取其中的内容作为产线的默认配置。示例如下: + +```bash +paddleocr ocr --paddlex_config ocr_config.yaml ... +``` + +### 3.4 在 Python API 中加载产线配置文件 + +初始化产线对象时,可通过 `paddlex_config` 参数传入 PaddleX 产线配置文件路径或配置字典,PaddleOCR 会将其作为默认配置。示例如下: + +```python +from paddleocr import PaddleOCR + +pipeline = PaddleOCR(paddlex_config="ocr_config.yaml") +``` diff --git a/docs/ppocr/installation.en.md b/docs/ppocr/installation.en.md deleted file mode 100644 index 244e88dffa..0000000000 --- a/docs/ppocr/installation.en.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -comments: true ---- - -## Quick Installation - -After testing, PaddleOCR can run on glibc 2.23. You can also test other glibc versions or install glibc 2.23 for the best compatibility. - -PaddleOCR working environment: - -- PaddlePaddle > 2.0.0 -- Python 3 -- glibc 2.23 - -It is recommended to use the docker provided by us to run PaddleOCR. Please refer to the docker tutorial [link](https://www.runoob.com/docker/docker-tutorial.html/). - -*If you want to directly run the prediction code on Mac or Windows, you can start from step 2.* - -### 1. (Recommended) Prepare a docker environment - -For the first time you use this docker image, it will be downloaded automatically. Please be patient. - -```bash linenums="1" -# Switch to the working directory -cd /home/Projects -# You need to create a docker container for the first run, and do not need to run the current command when you run it again -# Create a docker container named ppocr and map the current directory to the /paddle directory of the container - -#If using CPU, use docker instead of nvidia-docker to create docker -sudo docker run --name ppocr -v $PWD:/paddle --network=host -it paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82 /bin/bash -``` - -With CUDA10, please run the following command to create a container. -It is recommended to set a shared memory greater than or equal to 32G through the --shm-size parameter: - -```bash linenums="1" -sudo nvidia-docker run --name ppocr -v $PWD:/paddle --shm-size=64G --network=host -it paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82 /bin/bash -``` - -You can also visit [DockerHub](https://hub.docker.com/r/paddlepaddle/paddle/tags/) to get the image that fits your machine. - -```bash linenums="1" -# ctrl+P+Q to exit docker, to re-enter docker using the following command: -sudo docker container exec -it ppocr /bin/bash -``` - -### 2. Install PaddlePaddle 2.0 - -```bash linenums="1" -pip3 install --upgrade pip - -# If you have cuda9 or cuda10 installed on your machine, please run the following command to install -python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple - -# If you only have cpu on your machine, please run the following command to install -python3 -m pip install paddlepaddle==2.0.0 -i https://mirror.baidu.com/pypi/simple -``` - -For more software version requirements, please refer to the instructions in [Installation Document](https://www.paddlepaddle.org.cn/install/quick) for operation. - -### 3. Clone PaddleOCR repo - -```bash linenums="1" -# Recommend -git clone https://github.com/PaddlePaddle/PaddleOCR - -# If you cannot pull successfully due to network problems, you can switch to the mirror hosted on Gitee: - -git clone https://gitee.com/paddlepaddle/PaddleOCR - -# Note: The mirror on Gitee may not keep in synchronization with the latest update with the project on GitHub. There might be a delay of 3-5 days. Please try GitHub at first. -``` - -### 4. Install third-party libraries - -```bash linenums="1" -cd PaddleOCR -pip3 install -r requirements.txt -``` - -If you getting this error `OSError: [WinError 126] The specified module could not be found` when you install shapely on windows. - -Please try to download Shapely whl file from [http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely](http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely). - -Reference: [Solve shapely installation on windows](https://stackoverflow.com/questions/44398265/install-shapely-oserror-winerror-126-the-specified-module-could-not-be-found) diff --git a/docs/ppocr/installation.md b/docs/ppocr/installation.md index 30e35795fb..74a3eeef65 100644 --- a/docs/ppocr/installation.md +++ b/docs/ppocr/installation.md @@ -2,75 +2,49 @@ comments: true --- -## 快速安装 +# 安装 -经测试PaddleOCR可在glibc 2.23上运行,您也可以测试其他glibc版本或安装glic 2.23 -PaddleOCR 工作环境 +# 1. 安装飞桨框架 -- PaddlePaddle 2.0.0 -- python3 -- glibc 2.23 -- cuDNN 7.6+ (GPU) +请参考 [飞桨官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/develop/install/pip/linux-pip.html) 安装 `3.0` 及以上版本的飞桨框架。**推荐使用飞桨官方 Docker 镜像。** -建议使用我们提供的docker运行PaddleOCR,有关docker、nvidia-docker使用请参考[链接](https://www.runoob.com/docker/docker-tutorial.html/)。 +# 2. 安装 PaddleOCR -*如您希望使用 mac 或 windows直接运行预测代码,可以从第2步开始执行。* +如果只希望使用 PaddleOCR 的推理功能,请参考 [安装推理包](#21-安装推理包);如果希望进行模型训练、导出等,请参考 [安装训练依赖](#22-安装训练依赖)。在同一环境中安装推理包和训练依赖是允许的,无需进行环境隔离。 -### 1. (建议)准备docker环境 +## 2.1 安装推理包 -第一次使用这个镜像,会自动下载该镜像,请耐心等待 +从 PyPI 安装最新版本 PaddleOCR 推理包: -```bash linenums="1" -# 切换到工作目录下 -cd /home/Projects -# 首次运行需创建一个docker容器,再次运行时不需要运行当前命令 -# 创建一个名字为ppocr的docker容器,并将当前目录映射到容器的/paddle目录下 - -如果您希望在CPU环境下使用docker,使用docker而不是nvidia-docker创建docker -sudo docker run --name ppocr -v $PWD:/paddle --network=host -it paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82 /bin/bash - -如果使用CUDA10,请运行以下命令创建容器,设置docker容器共享内存shm-size为64G,建议设置32G以上 -sudo nvidia-docker run --name ppocr -v $PWD:/paddle --shm-size=64G --network=host -it paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82 /bin/bash - -您也可以访问[DockerHub](https://hub.docker.com/r/paddlepaddle/paddle/tags/)获取与您机器适配的镜像。 - -# ctrl+P+Q可退出docker 容器,重新进入docker 容器使用如下命令 -sudo docker container exec -it ppocr /bin/bash +```bash +python -m pip install paddleocr ``` -### 2. 安装PaddlePaddle 2.0 +或者从源码安装(默认为开发分支): -```bash linenums="1" -pip3 install --upgrade pip - -# 如果您的机器安装的是CUDA9或CUDA10,请运行以下命令安装 -python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple - -# 如果您的机器是CPU,请运行以下命令安装 -python3 -m pip install paddlepaddle==2.0.0 -i https://mirror.baidu.com/pypi/simple - -# 更多的版本需求,请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。 +```bash +python -m pip install "git+https://github.com/PaddlePaddle/PaddleOCR.git" ``` -### 3. 克隆PaddleOCR repo代码 +## 2.2 安装训练依赖 -```bash linenums="1" -#【推荐】 +要进行模型训练、导出等,需要首先将仓库克隆到本地: + +```bash +# 推荐方式 git clone https://github.com/PaddlePaddle/PaddleOCR -# 如果因为网络问题无法pull成功,也可选择使用码云上的托管: +# (可选)切换到指定分支 +git checkout release/3.0 +# 如果因为网络问题无法克隆成功,也可选择使用码云上的仓库: git clone https://gitee.com/paddlepaddle/PaddleOCR -# 注:码云托管代码可能无法实时同步本github项目更新,存在3~5天延时,请优先使用推荐方式。 +# 注:码云托管代码可能无法实时同步本 GitHub 项目更新,存在3~5天延时,请优先使用推荐方式。 ``` -### 4. 安装第三方库 +执行如下命令安装依赖: -```bash linenums="1" -cd PaddleOCR -pip3 install -r requirements.txt +```bash +python -m pip install -r requirements.txt ``` - -注意,windows环境下,建议从[这里](https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely)下载shapely安装包完成安装, -直接通过pip安装的shapely库可能出现`[winRrror 126] 找不到指定模块的问题`。 diff --git a/paddleocr/__init__.py b/paddleocr/__init__.py index 2cdb22985a..bd9280d970 100644 --- a/paddleocr/__init__.py +++ b/paddleocr/__init__.py @@ -36,6 +36,7 @@ from ._pipelines import ( SealRecognition, TableRecognitionPipelineV2, ) +from ._utils.logging import logger from ._version import version as __version__ __all__ = [ @@ -59,5 +60,6 @@ __all__ = [ "PPStructureV3", "SealRecognition", "TableRecognitionPipelineV2", + "logger", "__version__", ] diff --git a/paddleocr/_cli.py b/paddleocr/_cli.py index 3c644bfdd3..c707912eee 100644 --- a/paddleocr/_cli.py +++ b/paddleocr/_cli.py @@ -13,8 +13,10 @@ # limitations under the License. import argparse +import logging import subprocess import sys + import warnings from ._models import ( @@ -42,7 +44,8 @@ from ._pipelines import ( TableRecognitionPipelineV2, ) from ._version import version -from .utils.deprecation import CLIDeprecationWarning +from ._utils.deprecation import CLIDeprecationWarning +from ._utils.logging import logger def _register_pipelines(subparsers): @@ -110,6 +113,7 @@ def _execute(args): def main(): + logger.setLevel(logging.INFO) warnings.filterwarnings("default", category=CLIDeprecationWarning) args = _parse_args() _execute(args) diff --git a/paddleocr/_common_args.py b/paddleocr/_common_args.py index f34cf56350..50988c91bc 100644 --- a/paddleocr/_common_args.py +++ b/paddleocr/_common_args.py @@ -24,7 +24,7 @@ from ._constants import ( DEFAULT_USE_TENSORRT, SUPPORTED_PRECISION_LIST, ) -from .utils.cli import str2bool +from ._utils.cli import str2bool def parse_common_args(kwargs, *, default_enable_hpi): diff --git a/paddleocr/_models/_image_classification.py b/paddleocr/_models/_image_classification.py index e2e9ba2bf4..6bdccba3db 100644 --- a/paddleocr/_models/_image_classification.py +++ b/paddleocr/_models/_image_classification.py @@ -14,7 +14,7 @@ import abc -from ..utils.cli import ( +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, diff --git a/paddleocr/_models/_object_detection.py b/paddleocr/_models/_object_detection.py index 61d521c5ab..3570774810 100644 --- a/paddleocr/_models/_object_detection.py +++ b/paddleocr/_models/_object_detection.py @@ -14,7 +14,7 @@ import abc -from ..utils.cli import ( +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, diff --git a/paddleocr/_models/base.py b/paddleocr/_models/base.py index e57107d5b7..780eeb8757 100644 --- a/paddleocr/_models/base.py +++ b/paddleocr/_models/base.py @@ -23,7 +23,7 @@ from .._common_args import ( prepare_common_init_args, ) from .._mkldnn_blocklists import MODEL_MKLDNN_BLOCKLIST -from ..utils.logging import logger +from .._utils.logging import logger _DEFAULT_ENABLE_HPI = False @@ -59,10 +59,11 @@ class PaddleXPredictorWrapper(metaclass=abc.ABCMeta): def default_model_name(self): raise NotImplementedError + def predict_iter(self, *args, **kwargs): + return self.paddlex_predictor.predict(*args, **kwargs) + def predict(self, *args, **kwargs): - result = [] - for res in self.paddlex_predictor.predict(*args, **kwargs): - result.append(res) + result = list(self.predict_iter(*args, **kwargs)) return result @classmethod diff --git a/paddleocr/_models/doc_vlm.py b/paddleocr/_models/doc_vlm.py index 778147f9bb..c0df6bd697 100644 --- a/paddleocr/_models/doc_vlm.py +++ b/paddleocr/_models/doc_vlm.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils.cli import ( +from paddlex.utils.pipeline_arguments import custom_type + +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, @@ -50,7 +52,10 @@ class DocVLMSubcommandExecutor(PredictorCLISubcommandExecutor): return "doc_vlm" def _update_subparser(self, subparser): - add_simple_inference_args(subparser) + add_simple_inference_args( + subparser, + input_help='Input dict, e.g. `{"image": "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/medal_table.png", "query": "Recognize this table"}`.', + ) def execute_with_args(self, args): params = get_subcommand_args(args) diff --git a/paddleocr/_models/formula_recognition.py b/paddleocr/_models/formula_recognition.py index 46367b88f9..62b940808a 100644 --- a/paddleocr/_models/formula_recognition.py +++ b/paddleocr/_models/formula_recognition.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils.cli import ( +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, diff --git a/paddleocr/_models/table_structure_recognition.py b/paddleocr/_models/table_structure_recognition.py index 6489eade68..270bd7b27b 100644 --- a/paddleocr/_models/table_structure_recognition.py +++ b/paddleocr/_models/table_structure_recognition.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils.cli import ( +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, diff --git a/paddleocr/_models/text_detection.py b/paddleocr/_models/text_detection.py index 2547539f71..084ca81273 100644 --- a/paddleocr/_models/text_detection.py +++ b/paddleocr/_models/text_detection.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils.cli import ( +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, diff --git a/paddleocr/_models/text_image_unwarping.py b/paddleocr/_models/text_image_unwarping.py index b44f3b3686..6dbbe5698c 100644 --- a/paddleocr/_models/text_image_unwarping.py +++ b/paddleocr/_models/text_image_unwarping.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils.cli import ( +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, diff --git a/paddleocr/_models/text_recognition.py b/paddleocr/_models/text_recognition.py index 835373a9b0..1d359ecb98 100644 --- a/paddleocr/_models/text_recognition.py +++ b/paddleocr/_models/text_recognition.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils.cli import ( +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, diff --git a/paddleocr/_pipelines/base.py b/paddleocr/_pipelines/base.py index e6c55db770..3ac8e4c591 100644 --- a/paddleocr/_pipelines/base.py +++ b/paddleocr/_pipelines/base.py @@ -17,6 +17,7 @@ import abc import yaml from paddlex import create_pipeline from paddlex.inference import load_pipeline_config +from paddlex.utils.config import AttrDict from .._abstract import CLISubcommandExecutor from .._common_args import ( @@ -25,7 +26,7 @@ from .._common_args import ( prepare_common_init_args, ) from .._mkldnn_blocklists import PIPELINE_MKLDNN_BLOCKLIST -from ..utils.logging import logger +from .._utils.logging import logger _DEFAULT_ENABLE_HPI = None @@ -40,6 +41,14 @@ def _merge_dicts(d1, d2): return res +def _to_plain_dict(d): + res = d.copy() + for k, v in d.items(): + if isinstance(v, AttrDict): + res[k] = _to_plain_dict(v) + return res + + class PaddleXPipelineWrapper(metaclass=abc.ABCMeta): def __init__( self, @@ -70,7 +79,8 @@ class PaddleXPipelineWrapper(metaclass=abc.ABCMeta): def export_paddlex_config_to_yaml(self, yaml_path): with open(yaml_path, "w", encoding="utf-8") as f: - yaml.safe_dump(self._merged_paddlex_config, f) + config = _to_plain_dict(self._merged_paddlex_config) + yaml.safe_dump(config, f) @classmethod @abc.abstractmethod @@ -83,7 +93,7 @@ class PaddleXPipelineWrapper(metaclass=abc.ABCMeta): def _get_merged_paddlex_config(self): if self._paddlex_config is None: config = load_pipeline_config(self._paddlex_pipeline_name) - elif isinstance(self._config, str): + elif isinstance(self._paddlex_config, str): config = load_pipeline_config(self._paddlex_config) else: config = self._paddlex_config diff --git a/paddleocr/_pipelines/doc_preprocessor.py b/paddleocr/_pipelines/doc_preprocessor.py index 594ce61992..afb7dfea16 100644 --- a/paddleocr/_pipelines/doc_preprocessor.py +++ b/paddleocr/_pipelines/doc_preprocessor.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils.cli import ( +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, @@ -48,6 +48,19 @@ class DocPreprocessor(PaddleXPipelineWrapper): def _paddlex_pipeline_name(self): return "doc_preprocessor" + def predict_iter( + self, + input, + *, + use_doc_orientation_classify=None, + use_doc_unwarping=None, + ): + return self.paddlex_pipeline.predict( + input, + use_doc_orientation_classify=use_doc_orientation_classify, + use_doc_unwarping=use_doc_unwarping, + ) + def predict( self, input, @@ -55,14 +68,13 @@ class DocPreprocessor(PaddleXPipelineWrapper): use_doc_orientation_classify=None, use_doc_unwarping=None, ): - result = [] - for res in self.paddlex_pipeline.predict( - input, - use_doc_orientation_classify=use_doc_orientation_classify, - use_doc_unwarping=use_doc_unwarping, - ): - result.append(res) - return result + return list( + self.predict_iter( + input, + use_doc_orientation_classify=use_doc_orientation_classify, + use_doc_unwarping=use_doc_unwarping, + ) + ) @classmethod def get_cli_subcommand_executor(cls): diff --git a/paddleocr/_pipelines/doc_understanding.py b/paddleocr/_pipelines/doc_understanding.py index 041e71e7da..0cde72fc97 100644 --- a/paddleocr/_pipelines/doc_understanding.py +++ b/paddleocr/_pipelines/doc_understanding.py @@ -12,15 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils.cli import ( +from paddlex.utils.pipeline_arguments import custom_type + +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, - str2bool, ) from .base import PaddleXPipelineWrapper, PipelineCLISubcommandExecutor from .utils import create_config_from_structure -from paddlex.utils.pipeline_arguments import custom_type class DocUnderstanding(PaddleXPipelineWrapper): @@ -43,15 +43,15 @@ class DocUnderstanding(PaddleXPipelineWrapper): def _paddlex_pipeline_name(self): return "doc_understanding" + def predict_iter(self, input, **kwargs): + return self.paddlex_pipeline.predict(input, **kwargs) + def predict( self, input, **kwargs, ): - result = [] - for res in self.paddlex_pipeline.predict(input, **kwargs): - result.append(res) - return result + return list(self.predict_iter(input, **kwargs)) @classmethod def get_cli_subcommand_executor(cls): @@ -73,7 +73,6 @@ class DocUnderstanding(PaddleXPipelineWrapper): class DocUnderstandingCLISubcommandExecutor(PipelineCLISubcommandExecutor): - input_validator = staticmethod(custom_type(dict)) @property @@ -81,7 +80,10 @@ class DocUnderstandingCLISubcommandExecutor(PipelineCLISubcommandExecutor): return "doc_understanding" def _update_subparser(self, subparser): - add_simple_inference_args(subparser) + add_simple_inference_args( + subparser, + input_help='Input dict, e.g. `{"image": "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/medal_table.png", "query": "Recognize this table"}`.', + ) subparser.add_argument( "--doc_understanding_model_name", diff --git a/paddleocr/_pipelines/formula_recognition.py b/paddleocr/_pipelines/formula_recognition.py index 8e52450506..dbee291528 100644 --- a/paddleocr/_pipelines/formula_recognition.py +++ b/paddleocr/_pipelines/formula_recognition.py @@ -12,13 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils.cli import ( +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, str2bool, ) -from ..utils.logging import logger from .base import PaddleXPipelineWrapper, PipelineCLISubcommandExecutor from .utils import create_config_from_structure @@ -58,6 +57,33 @@ class FormulaRecognitionPipeline(PaddleXPipelineWrapper): def _paddlex_pipeline_name(self): return "formula_recognition" + def predict_iter( + self, + input, + *, + use_layout_detection=None, + use_doc_orientation_classify=None, + use_doc_unwarping=None, + layout_det_res=None, + layout_threshold=None, + layout_nms=None, + layout_unclip_ratio=None, + layout_merge_bboxes_mode=None, + **kwargs, + ): + return self.paddlex_pipeline.predict( + input, + use_layout_detection=use_layout_detection, + use_doc_orientation_classify=use_doc_orientation_classify, + use_doc_unwarping=use_doc_unwarping, + layout_det_res=layout_det_res, + layout_threshold=layout_threshold, + layout_nms=layout_nms, + layout_unclip_ratio=layout_unclip_ratio, + layout_merge_bboxes_mode=layout_merge_bboxes_mode, + **kwargs, + ) + def predict( self, input, @@ -72,21 +98,20 @@ class FormulaRecognitionPipeline(PaddleXPipelineWrapper): layout_merge_bboxes_mode=None, **kwargs, ): - result = [] - for res in self.paddlex_pipeline.predict( - input, - use_layout_detection=use_layout_detection, - use_doc_orientation_classify=use_doc_orientation_classify, - use_doc_unwarping=use_doc_unwarping, - layout_det_res=layout_det_res, - layout_threshold=layout_threshold, - layout_nms=layout_nms, - layout_unclip_ratio=layout_unclip_ratio, - layout_merge_bboxes_mode=layout_merge_bboxes_mode, - **kwargs, - ): - result.append(res) - return result + return list( + self.predict_iter( + input, + use_layout_detection=use_layout_detection, + use_doc_orientation_classify=use_doc_orientation_classify, + use_doc_unwarping=use_doc_unwarping, + layout_det_res=layout_det_res, + layout_threshold=layout_threshold, + layout_nms=layout_nms, + layout_unclip_ratio=layout_unclip_ratio, + layout_merge_bboxes_mode=layout_merge_bboxes_mode, + **kwargs, + ) + ) @classmethod def get_cli_subcommand_executor(cls): diff --git a/paddleocr/_pipelines/ocr.py b/paddleocr/_pipelines/ocr.py index ad38918ea5..b38dc5b5b8 100644 --- a/paddleocr/_pipelines/ocr.py +++ b/paddleocr/_pipelines/ocr.py @@ -18,18 +18,18 @@ import sys -from ..utils.cli import ( +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, str2bool, ) -from ..utils.deprecation import ( +from .._utils.deprecation import ( DeprecatedOptionAction, deprecated, warn_deprecated_param, ) -from ..utils.logging import logger +from .._utils.logging import logger from .base import PaddleXPipelineWrapper, PipelineCLISubcommandExecutor from .utils import create_config_from_structure @@ -147,6 +147,33 @@ class PaddleOCR(PaddleXPipelineWrapper): def _paddlex_pipeline_name(self): return "OCR" + def predict_iter( + self, + input, + *, + use_doc_orientation_classify=None, + use_doc_unwarping=None, + use_textline_orientation=None, + text_det_limit_side_len=None, + text_det_limit_type=None, + text_det_thresh=None, + text_det_box_thresh=None, + text_det_unclip_ratio=None, + text_rec_score_thresh=None, + ): + return self.paddlex_pipeline.predict( + input, + use_doc_orientation_classify=use_doc_orientation_classify, + use_doc_unwarping=use_doc_unwarping, + use_textline_orientation=use_textline_orientation, + text_det_limit_side_len=text_det_limit_side_len, + text_det_limit_type=text_det_limit_type, + text_det_thresh=text_det_thresh, + text_det_box_thresh=text_det_box_thresh, + text_det_unclip_ratio=text_det_unclip_ratio, + text_rec_score_thresh=text_rec_score_thresh, + ) + def predict( self, input, @@ -161,21 +188,20 @@ class PaddleOCR(PaddleXPipelineWrapper): text_det_unclip_ratio=None, text_rec_score_thresh=None, ): - result = [] - for res in self.paddlex_pipeline.predict( - input, - use_doc_orientation_classify=use_doc_orientation_classify, - use_doc_unwarping=use_doc_unwarping, - use_textline_orientation=use_textline_orientation, - text_det_limit_side_len=text_det_limit_side_len, - text_det_limit_type=text_det_limit_type, - text_det_thresh=text_det_thresh, - text_det_box_thresh=text_det_box_thresh, - text_det_unclip_ratio=text_det_unclip_ratio, - text_rec_score_thresh=text_rec_score_thresh, - ): - result.append(res) - return result + return list( + self.predict_iter( + input, + use_doc_orientation_classify=use_doc_orientation_classify, + use_doc_unwarping=use_doc_unwarping, + use_textline_orientation=use_textline_orientation, + text_det_limit_side_len=text_det_limit_side_len, + text_det_limit_type=text_det_limit_type, + text_det_thresh=text_det_thresh, + text_det_box_thresh=text_det_box_thresh, + text_det_unclip_ratio=text_det_unclip_ratio, + text_rec_score_thresh=text_rec_score_thresh, + ) + ) @deprecated("Please use `predict` instead.") def ocr(self, img, **kwargs): diff --git a/paddleocr/_pipelines/pp_chatocrv4_doc.py b/paddleocr/_pipelines/pp_chatocrv4_doc.py index 91e47d037b..0b18115bc9 100644 --- a/paddleocr/_pipelines/pp_chatocrv4_doc.py +++ b/paddleocr/_pipelines/pp_chatocrv4_doc.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils.cli import ( +from .._utils.cli import ( get_subcommand_args, str2bool, ) @@ -43,7 +43,6 @@ class PPChatOCRv4Doc(PaddleXPipelineWrapper): seal_text_recognition_batch_size=None, use_doc_orientation_classify=None, use_doc_unwarping=None, - use_general_ocr=None, use_seal_recognition=None, use_table_recognition=None, layout_threshold=None, @@ -78,13 +77,12 @@ class PPChatOCRv4Doc(PaddleXPipelineWrapper): def _paddlex_pipeline_name(self): return "PP-ChatOCRv4-doc" - def visual_predict( + def visual_predict_iter( self, input, *, use_doc_orientation_classify=None, use_doc_unwarping=None, - use_general_ocr=None, use_seal_recognition=None, use_table_recognition=None, layout_threshold=None, @@ -105,12 +103,10 @@ class PPChatOCRv4Doc(PaddleXPipelineWrapper): seal_rec_score_thresh=None, **kwargs, ): - result = [] - for res in self.paddlex_pipeline.visual_predict( + return self.paddlex_pipeline.visual_predict( input, use_doc_orientation_classify=use_doc_orientation_classify, use_doc_unwarping=use_doc_unwarping, - use_general_ocr=use_general_ocr, use_seal_recognition=use_seal_recognition, use_table_recognition=use_table_recognition, layout_threshold=layout_threshold, @@ -130,9 +126,60 @@ class PPChatOCRv4Doc(PaddleXPipelineWrapper): seal_det_unclip_ratio=seal_det_unclip_ratio, seal_rec_score_thresh=seal_rec_score_thresh, **kwargs, - ): - result.append(res) - return result + ) + + def visual_predict( + self, + input, + *, + use_doc_orientation_classify=None, + use_doc_unwarping=None, + use_seal_recognition=None, + use_table_recognition=None, + layout_threshold=None, + layout_nms=None, + layout_unclip_ratio=None, + layout_merge_bboxes_mode=None, + text_det_limit_side_len=None, + text_det_limit_type=None, + text_det_thresh=None, + text_det_box_thresh=None, + text_det_unclip_ratio=None, + text_rec_score_thresh=None, + seal_det_limit_side_len=None, + seal_det_limit_type=None, + seal_det_thresh=None, + seal_det_box_thresh=None, + seal_det_unclip_ratio=None, + seal_rec_score_thresh=None, + **kwargs, + ): + return list( + self.visual_predict_iter( + input, + use_doc_orientation_classify=use_doc_orientation_classify, + use_doc_unwarping=use_doc_unwarping, + use_seal_recognition=use_seal_recognition, + use_table_recognition=use_table_recognition, + layout_threshold=layout_threshold, + layout_nms=layout_nms, + layout_unclip_ratio=layout_unclip_ratio, + layout_merge_bboxes_mode=layout_merge_bboxes_mode, + text_det_limit_side_len=text_det_limit_side_len, + text_det_limit_type=text_det_limit_type, + text_det_thresh=text_det_thresh, + text_det_box_thresh=text_det_box_thresh, + text_det_unclip_ratio=text_det_unclip_ratio, + text_rec_score_thresh=text_rec_score_thresh, + seal_det_limit_side_len=seal_det_limit_side_len, + seal_det_limit_type=seal_det_limit_type, + seal_det_thresh=seal_det_thresh, + seal_det_box_thresh=seal_det_box_thresh, + seal_det_unclip_ratio=seal_det_unclip_ratio, + seal_rec_score_thresh=seal_rec_score_thresh, + **kwargs, + ) + ) def build_vector( self, @@ -269,9 +316,6 @@ class PPChatOCRv4Doc(PaddleXPipelineWrapper): "SubPipelines.LayoutParser.SubPipelines.DocPreprocessor.use_doc_unwarping": self._params[ "use_doc_unwarping" ], - "SubPipelines.LayoutParser.use_general_ocr": self._params[ - "use_general_ocr" - ], "SubPipelines.LayoutParser.use_seal_recognition": self._params[ "use_seal_recognition" ], @@ -468,11 +512,6 @@ class PPChatOCRv4DocCLISubcommandExecutor(PipelineCLISubcommandExecutor): type=str2bool, help="Whether to use the text image unwarping model.", ) - subparser.add_argument( - "--use_general_ocr", - type=str2bool, - help="Whether to use general OCR.", - ) subparser.add_argument( "--use_seal_recognition", type=str2bool, diff --git a/paddleocr/_pipelines/pp_structurev3.py b/paddleocr/_pipelines/pp_structurev3.py index 38c7432551..112c22e6cb 100644 --- a/paddleocr/_pipelines/pp_structurev3.py +++ b/paddleocr/_pipelines/pp_structurev3.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils.cli import ( +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, @@ -31,6 +31,11 @@ class PPStructureV3(PaddleXPipelineWrapper): layout_nms=None, layout_unclip_ratio=None, layout_merge_bboxes_mode=None, + chart_recognition_model_name=None, + chart_recognition_model_dir=None, + chart_recognition_batch_size=None, + region_detection_model_name=None, + region_detection_model_dir=None, doc_orientation_classify_model_name=None, doc_orientation_classify_model_dir=None, doc_unwarping_model_name=None, @@ -75,10 +80,11 @@ class PPStructureV3(PaddleXPipelineWrapper): formula_recognition_batch_size=None, use_doc_orientation_classify=None, use_doc_unwarping=None, - use_general_ocr=None, use_seal_recognition=None, use_table_recognition=None, use_formula_recognition=None, + use_chart_recognition=None, + use_region_detection=None, **kwargs, ): params = locals().copy() @@ -92,16 +98,17 @@ class PPStructureV3(PaddleXPipelineWrapper): def _paddlex_pipeline_name(self): return "PP-StructureV3" - def predict( + def predict_iter( self, input, - use_doc_orientation_classify=None, - use_doc_unwarping=None, + use_doc_orientation_classify=False, + use_doc_unwarping=False, use_textline_orientation=None, - use_general_ocr=None, use_seal_recognition=None, use_table_recognition=None, use_formula_recognition=None, + use_chart_recognition=False, + use_region_detection=None, layout_threshold=None, layout_nms=None, layout_unclip_ratio=None, @@ -118,21 +125,24 @@ class PPStructureV3(PaddleXPipelineWrapper): seal_det_box_thresh=None, seal_det_unclip_ratio=None, seal_rec_score_thresh=None, - use_table_cells_ocr_results=None, - use_e2e_wired_table_rec_model=None, - use_e2e_wireless_table_rec_model=None, + use_wired_table_cells_trans_to_html=False, + use_wireless_table_cells_trans_to_html=False, + use_table_orientation_classify=True, + use_ocr_results_with_table_cells=True, + use_e2e_wired_table_rec_model=False, + use_e2e_wireless_table_rec_model=True, **kwargs, ): - result = [] - for res in self.paddlex_pipeline.predict( + return self.paddlex_pipeline.predict( input, use_doc_orientation_classify=use_doc_orientation_classify, use_doc_unwarping=use_doc_unwarping, use_textline_orientation=use_textline_orientation, - use_general_ocr=use_general_ocr, use_seal_recognition=use_seal_recognition, use_table_recognition=use_table_recognition, use_formula_recognition=use_formula_recognition, + use_chart_recognition=use_chart_recognition, + use_region_detection=use_region_detection, layout_threshold=layout_threshold, layout_nms=layout_nms, layout_unclip_ratio=layout_unclip_ratio, @@ -149,13 +159,86 @@ class PPStructureV3(PaddleXPipelineWrapper): seal_det_box_thresh=seal_det_box_thresh, seal_det_unclip_ratio=seal_det_unclip_ratio, seal_rec_score_thresh=seal_rec_score_thresh, - use_table_cells_ocr_results=use_table_cells_ocr_results, + use_wired_table_cells_trans_to_html=use_wired_table_cells_trans_to_html, + use_wireless_table_cells_trans_to_html=use_wireless_table_cells_trans_to_html, + use_table_orientation_classify=use_table_orientation_classify, + use_ocr_results_with_table_cells=use_ocr_results_with_table_cells, use_e2e_wired_table_rec_model=use_e2e_wired_table_rec_model, use_e2e_wireless_table_rec_model=use_e2e_wireless_table_rec_model, **kwargs, - ): - result.append(res) - return result + ) + + def predict( + self, + input, + use_doc_orientation_classify=False, + use_doc_unwarping=False, + use_textline_orientation=None, + use_seal_recognition=None, + use_table_recognition=None, + use_formula_recognition=None, + use_chart_recognition=False, + use_region_detection=None, + layout_threshold=None, + layout_nms=None, + layout_unclip_ratio=None, + layout_merge_bboxes_mode=None, + text_det_limit_side_len=None, + text_det_limit_type=None, + text_det_thresh=None, + text_det_box_thresh=None, + text_det_unclip_ratio=None, + text_rec_score_thresh=None, + seal_det_limit_side_len=None, + seal_det_limit_type=None, + seal_det_thresh=None, + seal_det_box_thresh=None, + seal_det_unclip_ratio=None, + seal_rec_score_thresh=None, + use_wired_table_cells_trans_to_html=False, + use_wireless_table_cells_trans_to_html=False, + use_table_orientation_classify=True, + use_ocr_results_with_table_cells=True, + use_e2e_wired_table_rec_model=False, + use_e2e_wireless_table_rec_model=True, + **kwargs, + ): + return list( + self.predict_iter( + input, + use_doc_orientation_classify=use_doc_orientation_classify, + use_doc_unwarping=use_doc_unwarping, + use_textline_orientation=use_textline_orientation, + use_seal_recognition=use_seal_recognition, + use_table_recognition=use_table_recognition, + use_formula_recognition=use_formula_recognition, + use_chart_recognition=use_chart_recognition, + use_region_detection=use_region_detection, + layout_threshold=layout_threshold, + layout_nms=layout_nms, + layout_unclip_ratio=layout_unclip_ratio, + layout_merge_bboxes_mode=layout_merge_bboxes_mode, + text_det_limit_side_len=text_det_limit_side_len, + text_det_limit_type=text_det_limit_type, + text_det_thresh=text_det_thresh, + text_det_box_thresh=text_det_box_thresh, + text_det_unclip_ratio=text_det_unclip_ratio, + text_rec_score_thresh=text_rec_score_thresh, + seal_det_limit_side_len=seal_det_limit_side_len, + seal_det_limit_type=seal_det_limit_type, + seal_det_thresh=seal_det_thresh, + seal_det_box_thresh=seal_det_box_thresh, + seal_det_unclip_ratio=seal_det_unclip_ratio, + seal_rec_score_thresh=seal_rec_score_thresh, + use_wired_table_cells_trans_to_html=use_wired_table_cells_trans_to_html, + use_wireless_table_cells_trans_to_html=use_wireless_table_cells_trans_to_html, + use_table_orientation_classify=use_table_orientation_classify, + use_ocr_results_with_table_cells=use_ocr_results_with_table_cells, + use_e2e_wired_table_rec_model=use_e2e_wired_table_rec_model, + use_e2e_wireless_table_rec_model=use_e2e_wireless_table_rec_model, + **kwargs, + ) + ) @classmethod def get_cli_subcommand_executor(cls): @@ -169,7 +252,6 @@ class PPStructureV3(PaddleXPipelineWrapper): "SubPipelines.DocPreprocessor.use_doc_unwarping": self._params[ "use_doc_unwarping" ], - "use_general_ocr": self._params["use_general_ocr"], "use_seal_recognition": self._params["use_seal_recognition"], "use_table_recognition": self._params["use_table_recognition"], "use_formula_recognition": self._params["use_formula_recognition"], @@ -187,6 +269,21 @@ class PPStructureV3(PaddleXPipelineWrapper): "SubModules.LayoutDetection.layout_merge_bboxes_mode": self._params[ "layout_merge_bboxes_mode" ], + "SubModules.ChartRecognition.model_name": self._params[ + "chart_recognition_model_name" + ], + "SubModules.ChartRecognition.model_dir": self._params[ + "chart_recognition_model_dir" + ], + "SubModules.ChartRecognition.batch_size": self._params[ + "chart_recognition_batch_size" + ], + "SubModules.RegionDetection.model_name": self._params[ + "region_detection_model_name" + ], + "SubModules.RegionDetection.model_dir": self._params[ + "region_detection_model_dir" + ], "SubPipelines.DocPreprocessor.SubModules.DocOrientationClassify.model_name": self._params[ "doc_orientation_classify_model_name" ], @@ -353,6 +450,33 @@ class PPStructureV3CLISubcommandExecutor(PipelineCLISubcommandExecutor): help="Overlapping box filtering method.", ) + subparser.add_argument( + "--chart_recognition_model_name", + type=str, + help="Name of the chart recognition model.", + ) + subparser.add_argument( + "--chart_recognition_model_dir", + type=str, + help="Path to the chart recognition model directory.", + ) + subparser.add_argument( + "--chart_recognition_batch_size", + type=int, + help="Batch size for the chart recognition model.", + ) + + subparser.add_argument( + "--region_detection_model_name", + type=str, + help="Name of the region detection model.", + ) + subparser.add_argument( + "--region_detection_model_dir", + type=str, + help="Path to the region detection model directory.", + ) + subparser.add_argument( "--doc_orientation_classify_model_name", type=str, @@ -571,18 +695,15 @@ class PPStructureV3CLISubcommandExecutor(PipelineCLISubcommandExecutor): subparser.add_argument( "--use_doc_orientation_classify", type=str2bool, + default=False, help="Whether to use the document image orientation classification model.", ) subparser.add_argument( "--use_doc_unwarping", type=str2bool, + default=False, help="Whether to use the text image unwarping model.", ) - subparser.add_argument( - "--use_general_ocr", - type=str2bool, - help="Whether to use general OCR.", - ) subparser.add_argument( "--use_seal_recognition", type=str2bool, @@ -598,6 +719,17 @@ class PPStructureV3CLISubcommandExecutor(PipelineCLISubcommandExecutor): type=str2bool, help="Whether to use formula recognition.", ) + subparser.add_argument( + "--use_chart_recognition", + type=str2bool, + default=False, + help="Whether to use chart recognition.", + ) + subparser.add_argument( + "--use_region_detection", + type=str2bool, + help="Whether to use region detection.", + ) def execute_with_args(self, args): params = get_subcommand_args(args) diff --git a/paddleocr/_pipelines/seal_recognition.py b/paddleocr/_pipelines/seal_recognition.py index 0e02543a46..13b566684c 100644 --- a/paddleocr/_pipelines/seal_recognition.py +++ b/paddleocr/_pipelines/seal_recognition.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils.cli import ( +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, @@ -84,6 +84,45 @@ class SealRecognition(PaddleXPipelineWrapper): def _paddlex_pipeline_name(self): return "seal_recognition" + def predict_iter( + self, + input, + *, + use_doc_orientation_classify=None, + use_doc_unwarping=None, + use_layout_detection=None, + layout_det_res=None, + layout_threshold=None, + layout_nms=None, + layout_unclip_ratio=None, + layout_merge_bboxes_mode=None, + seal_det_limit_side_len=None, + seal_det_limit_type=None, + seal_det_thresh=None, + seal_det_box_thresh=None, + seal_det_unclip_ratio=None, + seal_rec_score_thresh=None, + **kwargs, + ): + return self.paddlex_pipeline.predict( + input, + use_doc_orientation_classify=use_doc_orientation_classify, + use_doc_unwarping=use_doc_unwarping, + use_layout_detection=use_layout_detection, + layout_det_res=layout_det_res, + layout_threshold=layout_threshold, + layout_nms=layout_nms, + layout_unclip_ratio=layout_unclip_ratio, + layout_merge_bboxes_mode=layout_merge_bboxes_mode, + seal_det_limit_side_len=seal_det_limit_side_len, + seal_det_limit_type=seal_det_limit_type, + seal_det_thresh=seal_det_thresh, + seal_det_box_thresh=seal_det_box_thresh, + seal_det_unclip_ratio=seal_det_unclip_ratio, + seal_rec_score_thresh=seal_rec_score_thresh, + **kwargs, + ) + def predict( self, input, @@ -104,27 +143,26 @@ class SealRecognition(PaddleXPipelineWrapper): seal_rec_score_thresh=None, **kwargs, ): - result = [] - for res in self.paddlex_pipeline.predict( - input, - use_doc_orientation_classify=use_doc_orientation_classify, - use_doc_unwarping=use_doc_unwarping, - use_layout_detection=use_layout_detection, - layout_det_res=layout_det_res, - layout_threshold=layout_threshold, - layout_nms=layout_nms, - layout_unclip_ratio=layout_unclip_ratio, - layout_merge_bboxes_mode=layout_merge_bboxes_mode, - seal_det_limit_side_len=seal_det_limit_side_len, - seal_det_limit_type=seal_det_limit_type, - seal_det_thresh=seal_det_thresh, - seal_det_box_thresh=seal_det_box_thresh, - seal_det_unclip_ratio=seal_det_unclip_ratio, - seal_rec_score_thresh=seal_rec_score_thresh, - **kwargs, - ): - result.append(res) - return result + return list( + self.predict_iter( + input, + use_doc_orientation_classify=use_doc_orientation_classify, + use_doc_unwarping=use_doc_unwarping, + use_layout_detection=use_layout_detection, + layout_det_res=layout_det_res, + layout_threshold=layout_threshold, + layout_nms=layout_nms, + layout_unclip_ratio=layout_unclip_ratio, + layout_merge_bboxes_mode=layout_merge_bboxes_mode, + seal_det_limit_side_len=seal_det_limit_side_len, + seal_det_limit_type=seal_det_limit_type, + seal_det_thresh=seal_det_thresh, + seal_det_box_thresh=seal_det_box_thresh, + seal_det_unclip_ratio=seal_det_unclip_ratio, + seal_rec_score_thresh=seal_rec_score_thresh, + **kwargs, + ) + ) @classmethod def get_cli_subcommand_executor(cls): diff --git a/paddleocr/_pipelines/table_recognition_v2.py b/paddleocr/_pipelines/table_recognition_v2.py index c2ae4f1189..a9315a20db 100644 --- a/paddleocr/_pipelines/table_recognition_v2.py +++ b/paddleocr/_pipelines/table_recognition_v2.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils.cli import ( +from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, perform_simple_inference, @@ -69,6 +69,52 @@ class TableRecognitionPipelineV2(PaddleXPipelineWrapper): def _paddlex_pipeline_name(self): return "table_recognition_v2" + def predict_iter( + self, + input, + use_doc_orientation_classify=None, + use_doc_unwarping=None, + use_layout_detection=None, + use_ocr_model=None, + overall_ocr_res=None, + layout_det_res=None, + text_det_limit_side_len=None, + text_det_limit_type=None, + text_det_thresh=None, + text_det_box_thresh=None, + text_det_unclip_ratio=None, + text_rec_score_thresh=None, + use_e2e_wired_table_rec_model=False, + use_e2e_wireless_table_rec_model=False, + use_wired_table_cells_trans_to_html=False, + use_wireless_table_cells_trans_to_html=False, + use_table_orientation_classify=True, + use_ocr_results_with_table_cells=True, + **kwargs, + ): + return self.paddlex_pipeline.predict( + input, + use_doc_orientation_classify=use_doc_orientation_classify, + use_doc_unwarping=use_doc_unwarping, + use_layout_detection=use_layout_detection, + use_ocr_model=use_ocr_model, + overall_ocr_res=overall_ocr_res, + layout_det_res=layout_det_res, + text_det_limit_side_len=text_det_limit_side_len, + text_det_limit_type=text_det_limit_type, + text_det_thresh=text_det_thresh, + text_det_box_thresh=text_det_box_thresh, + text_det_unclip_ratio=text_det_unclip_ratio, + text_rec_score_thresh=text_rec_score_thresh, + use_e2e_wired_table_rec_model=use_e2e_wired_table_rec_model, + use_e2e_wireless_table_rec_model=use_e2e_wireless_table_rec_model, + use_wired_table_cells_trans_to_html=use_wired_table_cells_trans_to_html, + use_wireless_table_cells_trans_to_html=use_wireless_table_cells_trans_to_html, + use_table_orientation_classify=use_table_orientation_classify, + use_ocr_results_with_table_cells=use_ocr_results_with_table_cells, + **kwargs, + ) + def predict( self, input, @@ -84,33 +130,38 @@ class TableRecognitionPipelineV2(PaddleXPipelineWrapper): text_det_box_thresh=None, text_det_unclip_ratio=None, text_rec_score_thresh=None, - use_table_cells_ocr_results=None, - use_e2e_wired_table_rec_model=None, - use_e2e_wireless_table_rec_model=None, + use_e2e_wired_table_rec_model=False, + use_e2e_wireless_table_rec_model=False, + use_wired_table_cells_trans_to_html=False, + use_wireless_table_cells_trans_to_html=False, + use_table_orientation_classify=True, + use_ocr_results_with_table_cells=True, **kwargs, ): - result = [] - for res in self.paddlex_pipeline.predict( - input, - use_doc_orientation_classify=use_doc_orientation_classify, - use_doc_unwarping=use_doc_unwarping, - use_layout_detection=use_layout_detection, - use_ocr_model=use_ocr_model, - overall_ocr_res=overall_ocr_res, - layout_det_res=layout_det_res, - text_det_limit_side_len=text_det_limit_side_len, - text_det_limit_type=text_det_limit_type, - text_det_thresh=text_det_thresh, - text_det_box_thresh=text_det_box_thresh, - text_det_unclip_ratio=text_det_unclip_ratio, - text_rec_score_thresh=text_rec_score_thresh, - use_table_cells_ocr_results=use_table_cells_ocr_results, - use_e2e_wired_table_rec_model=use_e2e_wired_table_rec_model, - use_e2e_wireless_table_rec_model=use_e2e_wireless_table_rec_model, - **kwargs, - ): - result.append(res) - return result + return list( + self.predict_iter( + input, + use_doc_orientation_classify=use_doc_orientation_classify, + use_doc_unwarping=use_doc_unwarping, + use_layout_detection=use_layout_detection, + use_ocr_model=use_ocr_model, + overall_ocr_res=overall_ocr_res, + layout_det_res=layout_det_res, + text_det_limit_side_len=text_det_limit_side_len, + text_det_limit_type=text_det_limit_type, + text_det_thresh=text_det_thresh, + text_det_box_thresh=text_det_box_thresh, + text_det_unclip_ratio=text_det_unclip_ratio, + text_rec_score_thresh=text_rec_score_thresh, + use_e2e_wired_table_rec_model=use_e2e_wired_table_rec_model, + use_e2e_wireless_table_rec_model=use_e2e_wireless_table_rec_model, + use_wired_table_cells_trans_to_html=use_wired_table_cells_trans_to_html, + use_wireless_table_cells_trans_to_html=use_wireless_table_cells_trans_to_html, + use_table_orientation_classify=use_table_orientation_classify, + use_ocr_results_with_table_cells=use_ocr_results_with_table_cells, + **kwargs, + ) + ) @classmethod def get_cli_subcommand_executor(cls): diff --git a/paddleocr/utils/__init__.py b/paddleocr/_utils/__init__.py similarity index 100% rename from paddleocr/utils/__init__.py rename to paddleocr/_utils/__init__.py diff --git a/paddleocr/utils/cli.py b/paddleocr/_utils/cli.py similarity index 72% rename from paddleocr/utils/cli.py rename to paddleocr/_utils/cli.py index 33e4fa5964..059a426c53 100644 --- a/paddleocr/utils/cli.py +++ b/paddleocr/_utils/cli.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import time + +from .logging import logger + def str2bool(v, /): return v.lower() in ("true", "yes", "t", "y", "1") @@ -24,9 +28,15 @@ def get_subcommand_args(args): return args -def add_simple_inference_args(subparser): +def add_simple_inference_args(subparser, *, input_help=None): + if input_help is None: + input_help = "Input path or URL." subparser.add_argument( - "-i", "--input", type=str, required=True, help="Input path or URL." + "-i", + "--input", + type=str, + required=True, + help=input_help, ) subparser.add_argument( "--save_path", @@ -42,9 +52,12 @@ def perform_simple_inference(wrapper_cls, params): wrapper = wrapper_cls(**params) - result = wrapper.predict(input_) + result = wrapper.predict_iter(input_) - for res in result: + t1 = time.time() + for i, res in enumerate(result): + logger.info(f"Processed item {i} in {(time.time()-t1) * 1000} ms") + t1 = time.time() res.print() if save_path: res.save_all(save_path) diff --git a/paddleocr/utils/deprecation.py b/paddleocr/_utils/deprecation.py similarity index 100% rename from paddleocr/utils/deprecation.py rename to paddleocr/_utils/deprecation.py diff --git a/paddleocr/utils/logging.py b/paddleocr/_utils/logging.py similarity index 100% rename from paddleocr/utils/logging.py rename to paddleocr/_utils/logging.py diff --git a/tests/pipelines/test_pp_chatocrv4_doc.py b/tests/pipelines/test_pp_chatocrv4_doc.py index 047555cc28..6a92d45e83 100644 --- a/tests/pipelines/test_pp_chatocrv4_doc.py +++ b/tests/pipelines/test_pp_chatocrv4_doc.py @@ -33,7 +33,6 @@ def test_visual_predict(pp_chatocrv4_doc_pipeline, image_path): [ {"use_doc_orientation_classify": False}, {"use_doc_unwarping": False}, - {"use_general_ocr": False}, {"use_table_recognition": False}, {"layout_threshold": 0.88}, {"layout_threshold": [0.45, 0.4]}, diff --git a/tests/pipelines/test_pp_structurev3.py b/tests/pipelines/test_pp_structurev3.py index 9ede2257d2..c04543d588 100644 --- a/tests/pipelines/test_pp_structurev3.py +++ b/tests/pipelines/test_pp_structurev3.py @@ -36,7 +36,6 @@ def test_visual_predict(pp_structurev3_pipeline, image_path): [ {"use_doc_orientation_classify": False}, {"use_doc_unwarping": False}, - {"use_general_ocr": False}, {"use_table_recognition": False}, {"use_formula_recognition": False}, {"layout_threshold": 0.88},