remove some of the less common dependencies (#13461)

* remove some of the less common dependencies

* remove dependencies
pull/13487/head
Wang Xin 2024-07-24 19:29:58 +08:00 committed by GitHub
parent 9231cbe039
commit 428832f6ee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 15 additions and 6 deletions

View File

@ -33,6 +33,11 @@
## 2. 环境配置
请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境参考[《项目克隆》](./clone.md)克隆项目代码。
此外,需要安装额外的依赖:
```shell
pip install "tokenizers==0.19.1" "imagesize"
```
<a name="3"></a>
## 3. 模型训练、评估、预测

View File

@ -31,6 +31,10 @@ Using LaTeX-OCR printed mathematical expression recognition datasets for trainin
## 2. Environment
Please refer to ["Environment Preparation"](./environment_en.md) to configure the PaddleOCR environment, and refer to ["Project Clone"](./clone_en.md) to clone the project code.
Furthermore, additional dependencies need to be installed:
```shell
pip install "tokenizers==0.19.1" "imagesize"
```
<a name="3"></a>
## 3. Model Training / Evaluation / Prediction

View File

@ -26,7 +26,6 @@ import copy
import random
from random import sample
from collections import defaultdict
from tokenizers import Tokenizer as TokenizerFast
from ppocr.utils.logging import get_logger
from ppocr.data.imaug.vqa.augment import order_by_tbyx
@ -1780,6 +1779,8 @@ class LatexOCRLabelEncode(object):
rec_char_dict_path,
**kwargs,
):
from tokenizers import Tokenizer as TokenizerFast
self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)
self.model_input_names = ["input_ids", "token_type_ids", "attention_mask"]
self.pad_token_id = 0

View File

@ -15,7 +15,6 @@
import numpy as np
import paddle
from paddle.nn import functional as F
from tokenizers import Tokenizer as TokenizerFast
import re
@ -1217,6 +1216,8 @@ class LaTeXOCRDecode(object):
"""Convert between latex-symbol and symbol-index"""
def __init__(self, rec_char_dict_path, **kwargs):
from tokenizers import Tokenizer as TokenizerFast
super(LaTeXOCRDecode, self).__init__()
self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)

View File

@ -15,8 +15,7 @@
import pickle
from tqdm import tqdm
import os
import cv2
import imagesize
from paddle.utils import try_import
from collections import defaultdict
import glob
from os.path import join
@ -24,6 +23,7 @@ import argparse
def txt2pickle(images, equations, save_dir):
imagesize = try_import("imagesize")
save_p = os.path.join(save_dir, "latexocr_{}.pkl".format(images.split("/")[-1]))
min_dimensions = (32, 32)
max_dimensions = (672, 192)

View File

@ -13,5 +13,3 @@ Pillow
pyyaml
requests
albumentations==1.4.10
tokenizers==0.19.1
imagesize