remove some of the less common dependencies (#13461)
* remove some of the less common dependencies * remove dependenciespull/13487/head
parent
9231cbe039
commit
428832f6ee
|
@ -33,6 +33,11 @@
|
|||
## 2. 环境配置
|
||||
请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境,参考[《项目克隆》](./clone.md)克隆项目代码。
|
||||
|
||||
此外,需要安装额外的依赖:
|
||||
```shell
|
||||
pip install "tokenizers==0.19.1" "imagesize"
|
||||
```
|
||||
|
||||
<a name="3"></a>
|
||||
## 3. 模型训练、评估、预测
|
||||
|
||||
|
|
|
@ -31,6 +31,10 @@ Using LaTeX-OCR printed mathematical expression recognition datasets for trainin
|
|||
## 2. Environment
|
||||
Please refer to ["Environment Preparation"](./environment_en.md) to configure the PaddleOCR environment, and refer to ["Project Clone"](./clone_en.md) to clone the project code.
|
||||
|
||||
Furthermore, additional dependencies need to be installed:
|
||||
```shell
|
||||
pip install "tokenizers==0.19.1" "imagesize"
|
||||
```
|
||||
|
||||
<a name="3"></a>
|
||||
## 3. Model Training / Evaluation / Prediction
|
||||
|
|
|
@ -26,7 +26,6 @@ import copy
|
|||
import random
|
||||
from random import sample
|
||||
from collections import defaultdict
|
||||
from tokenizers import Tokenizer as TokenizerFast
|
||||
|
||||
from ppocr.utils.logging import get_logger
|
||||
from ppocr.data.imaug.vqa.augment import order_by_tbyx
|
||||
|
@ -1780,6 +1779,8 @@ class LatexOCRLabelEncode(object):
|
|||
rec_char_dict_path,
|
||||
**kwargs,
|
||||
):
|
||||
from tokenizers import Tokenizer as TokenizerFast
|
||||
|
||||
self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)
|
||||
self.model_input_names = ["input_ids", "token_type_ids", "attention_mask"]
|
||||
self.pad_token_id = 0
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
import numpy as np
|
||||
import paddle
|
||||
from paddle.nn import functional as F
|
||||
from tokenizers import Tokenizer as TokenizerFast
|
||||
import re
|
||||
|
||||
|
||||
|
@ -1217,6 +1216,8 @@ class LaTeXOCRDecode(object):
|
|||
"""Convert between latex-symbol and symbol-index"""
|
||||
|
||||
def __init__(self, rec_char_dict_path, **kwargs):
|
||||
from tokenizers import Tokenizer as TokenizerFast
|
||||
|
||||
super(LaTeXOCRDecode, self).__init__()
|
||||
self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)
|
||||
|
||||
|
|
|
@ -15,8 +15,7 @@
|
|||
import pickle
|
||||
from tqdm import tqdm
|
||||
import os
|
||||
import cv2
|
||||
import imagesize
|
||||
from paddle.utils import try_import
|
||||
from collections import defaultdict
|
||||
import glob
|
||||
from os.path import join
|
||||
|
@ -24,6 +23,7 @@ import argparse
|
|||
|
||||
|
||||
def txt2pickle(images, equations, save_dir):
|
||||
imagesize = try_import("imagesize")
|
||||
save_p = os.path.join(save_dir, "latexocr_{}.pkl".format(images.split("/")[-1]))
|
||||
min_dimensions = (32, 32)
|
||||
max_dimensions = (672, 192)
|
||||
|
|
|
@ -13,5 +13,3 @@ Pillow
|
|||
pyyaml
|
||||
requests
|
||||
albumentations==1.4.10
|
||||
tokenizers==0.19.1
|
||||
imagesize
|
||||
|
|
Loading…
Reference in New Issue