diff --git a/configs/rec/rec_latex_ocr.yml b/configs/rec/rec_latex_ocr.yml index cde344907..2c604bf81 100644 --- a/configs/rec/rec_latex_ocr.yml +++ b/configs/rec/rec_latex_ocr.yml @@ -69,6 +69,7 @@ Metric: Train: dataset: name: LaTeXOCRDataSet + data_dir: ./train_data/LaTeXOCR/train data: ./train_data/LaTeXOCR/latexocr_train.pkl min_dimensions: [32, 32] max_dimensions: [672, 192] @@ -99,6 +100,7 @@ Train: Eval: dataset: name: LaTeXOCRDataSet + data_dir: ./train_data/LaTeXOCR/val data: ./train_data/LaTeXOCR/latexocr_val.pkl min_dimensions: [32, 32] max_dimensions: [672, 192] diff --git a/docs/algorithm/formula_recognition/algorithm_rec_latex_ocr.en.md b/docs/algorithm/formula_recognition/algorithm_rec_latex_ocr.en.md index 7f844294d..5febbfdc2 100644 --- a/docs/algorithm/formula_recognition/algorithm_rec_latex_ocr.en.md +++ b/docs/algorithm/formula_recognition/algorithm_rec_latex_ocr.en.md @@ -63,7 +63,7 @@ Evaluation: # Validation set evaluation python3 tools/eval.py -c configs/rec/rec_latex_ocr.yml -o Global.pretrained_model=./rec_latex_ocr_train/best_accuracy.pdparams # Test set evaluation -python3 tools/eval.py -c configs/rec/rec_latex_ocr.yml -o Global.pretrained_model=./rec_latex_ocr_train/best_accuracy.pdparams Eval.dataset.data=./train_data/LaTeXOCR/latexocr_test.pkl +python3 tools/eval.py -c configs/rec/rec_latex_ocr.yml -o Global.pretrained_model=./rec_latex_ocr_train/best_accuracy.pdparams Eval.dataset.data_dir=./train_data/LaTeXOCR/test Eval.dataset.data=./train_data/LaTeXOCR/latexocr_test.pkl ``` Prediction: diff --git a/docs/algorithm/formula_recognition/algorithm_rec_latex_ocr.md b/docs/algorithm/formula_recognition/algorithm_rec_latex_ocr.md index 414e869e1..882db013c 100644 --- a/docs/algorithm/formula_recognition/algorithm_rec_latex_ocr.md +++ b/docs/algorithm/formula_recognition/algorithm_rec_latex_ocr.md @@ -71,7 +71,7 @@ python3 tools/train.py -c configs/rec/rec_latex_ocr.yml -o Global.eval_batch_ste # 验证集评估 python3 tools/eval.py -c configs/rec/rec_latex_ocr.yml -o Global.pretrained_model=./rec_latex_ocr_train/best_accuracy.pdparams # 测试集评估 -python3 tools/eval.py -c configs/rec/rec_latex_ocr.yml -o Global.pretrained_model=./rec_latex_ocr_train/best_accuracy.pdparams Eval.dataset.data=./train_data/LaTeXOCR/latexocr_test.pkl +python3 tools/eval.py -c configs/rec/rec_latex_ocr.yml -o Global.pretrained_model=./rec_latex_ocr_train/best_accuracy.pdparams Eval.dataset.data_dir=./train_data/LaTeXOCR/test Eval.dataset.data=./train_data/LaTeXOCR/latexocr_test.pkl ``` ### 3.4 预测 diff --git a/ppocr/data/latexocr_dataset.py b/ppocr/data/latexocr_dataset.py index a1a747f04..b15d6c1f3 100644 --- a/ppocr/data/latexocr_dataset.py +++ b/ppocr/data/latexocr_dataset.py @@ -42,6 +42,7 @@ class LaTeXOCRDataSet(Dataset): loader_config = config[mode]["loader"] pkl_path = dataset_config.pop("data") + self.data_dir = dataset_config["data_dir"] self.min_dimensions = dataset_config.pop("min_dimensions") self.max_dimensions = dataset_config.pop("max_dimensions") self.batchsize = dataset_config.pop("batch_size_per_pair") @@ -128,7 +129,8 @@ class LaTeXOCRDataSet(Dataset): images_transform = [] - for img_path in ims: + for file_name in ims: + img_path = os.path.join(self.data_dir, file_name) data = { "img_path": img_path, } diff --git a/ppocr/utils/formula_utils/math_txt2pkl.py b/ppocr/utils/formula_utils/math_txt2pkl.py index 9fbf9b787..beafc13f4 100644 --- a/ppocr/utils/formula_utils/math_txt2pkl.py +++ b/ppocr/utils/formula_utils/math_txt2pkl.py @@ -44,6 +44,7 @@ def txt2pickle(images, equations, save_dir): ): divide_h = math.ceil(height / 16) * 16 divide_w = math.ceil(width / 16) * 16 + im = os.path.basename(im) data[(divide_w, divide_h)].append((eqs[indices[i]], im)) data = dict(data) with open(save_p, "wb") as file: