remove some of the less common dependencies (#13461)

* remove some of the less common dependencies * remove dependencies
2024-07-24 19:29:58 +08:00 · 2024-07-24 19:29:58 +08:00 · 428832f6ee
parent 9231cbe039
commit 428832f6ee
6 changed files with 15 additions and 6 deletions
--- a/doc/doc_ch/algorithm_rec_latex_ocr.md
+++ b/doc/doc_ch/algorithm_rec_latex_ocr.md
@ -33,6 +33,11 @@
 ## 2. 环境配置
 请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境，参考[《项目克隆》](./clone.md)克隆项目代码。

+此外，需要安装额外的依赖：
+```shell
+pip install "tokenizers==0.19.1" "imagesize"
+```
+
 <a name="3"></a>
 ## 3. 模型训练、评估、预测

--- a/doc/doc_en/algorithm_rec_latex_ocr_en.md
+++ b/doc/doc_en/algorithm_rec_latex_ocr_en.md
@ -31,6 +31,10 @@ Using LaTeX-OCR printed mathematical expression recognition datasets for trainin
 ## 2. Environment
 Please refer to ["Environment Preparation"](./environment_en.md) to configure the PaddleOCR environment, and refer to ["Project Clone"](./clone_en.md) to clone the project code.

+Furthermore, additional dependencies need to be installed:
+```shell
+pip install "tokenizers==0.19.1" "imagesize"
+```

 <a name="3"></a>
 ## 3. Model Training / Evaluation / Prediction
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@ -26,7 +26,6 @@ import copy
 import random
 from random import sample
 from collections import defaultdict
-from tokenizers import Tokenizer as TokenizerFast

 from ppocr.utils.logging import get_logger
 from ppocr.data.imaug.vqa.augment import order_by_tbyx
@ -1780,6 +1779,8 @@ class LatexOCRLabelEncode(object):
        rec_char_dict_path,
        **kwargs,
    ):
+        from tokenizers import Tokenizer as TokenizerFast
+
        self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)
        self.model_input_names = ["input_ids", "token_type_ids", "attention_mask"]
        self.pad_token_id = 0
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@ -15,7 +15,6 @@
 import numpy as np
 import paddle
 from paddle.nn import functional as F
-from tokenizers import Tokenizer as TokenizerFast
 import re


@ -1217,6 +1216,8 @@ class LaTeXOCRDecode(object):
    """Convert between latex-symbol and symbol-index"""

    def __init__(self, rec_char_dict_path, **kwargs):
+        from tokenizers import Tokenizer as TokenizerFast
+
        super(LaTeXOCRDecode, self).__init__()
        self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)

--- a/ppocr/utils/formula_utils/math_txt2pkl.py
+++ b/ppocr/utils/formula_utils/math_txt2pkl.py
@ -15,8 +15,7 @@
 import pickle
 from tqdm import tqdm
 import os
-import cv2
-import imagesize
+from paddle.utils import try_import
 from collections import defaultdict
 import glob
 from os.path import join
@ -24,6 +23,7 @@ import argparse


 def txt2pickle(images, equations, save_dir):
+    imagesize = try_import("imagesize")
    save_p = os.path.join(save_dir, "latexocr_{}.pkl".format(images.split("/")[-1]))
    min_dimensions = (32, 32)
    max_dimensions = (672, 192)
--- a/requirements.txt
+++ b/requirements.txt
@ -13,5 +13,3 @@ Pillow
 pyyaml
 requests
 albumentations==1.4.10
-tokenizers==0.19.1
-imagesize