parent
01e60ff9e1
commit
1752c56cb7
|
@ -69,6 +69,7 @@ Metric:
|
|||
Train:
|
||||
dataset:
|
||||
name: LaTeXOCRDataSet
|
||||
data_dir: ./train_data/LaTeXOCR/train
|
||||
data: ./train_data/LaTeXOCR/latexocr_train.pkl
|
||||
min_dimensions: [32, 32]
|
||||
max_dimensions: [672, 192]
|
||||
|
@ -99,6 +100,7 @@ Train:
|
|||
Eval:
|
||||
dataset:
|
||||
name: LaTeXOCRDataSet
|
||||
data_dir: ./train_data/LaTeXOCR/val
|
||||
data: ./train_data/LaTeXOCR/latexocr_val.pkl
|
||||
min_dimensions: [32, 32]
|
||||
max_dimensions: [672, 192]
|
||||
|
|
|
@ -63,7 +63,7 @@ Evaluation:
|
|||
# Validation set evaluation
|
||||
python3 tools/eval.py -c configs/rec/rec_latex_ocr.yml -o Global.pretrained_model=./rec_latex_ocr_train/best_accuracy.pdparams
|
||||
# Test set evaluation
|
||||
python3 tools/eval.py -c configs/rec/rec_latex_ocr.yml -o Global.pretrained_model=./rec_latex_ocr_train/best_accuracy.pdparams Eval.dataset.data=./train_data/LaTeXOCR/latexocr_test.pkl
|
||||
python3 tools/eval.py -c configs/rec/rec_latex_ocr.yml -o Global.pretrained_model=./rec_latex_ocr_train/best_accuracy.pdparams Eval.dataset.data_dir=./train_data/LaTeXOCR/test Eval.dataset.data=./train_data/LaTeXOCR/latexocr_test.pkl
|
||||
```
|
||||
|
||||
Prediction:
|
||||
|
|
|
@ -71,7 +71,7 @@ python3 tools/train.py -c configs/rec/rec_latex_ocr.yml -o Global.eval_batch_ste
|
|||
# 验证集评估
|
||||
python3 tools/eval.py -c configs/rec/rec_latex_ocr.yml -o Global.pretrained_model=./rec_latex_ocr_train/best_accuracy.pdparams
|
||||
# 测试集评估
|
||||
python3 tools/eval.py -c configs/rec/rec_latex_ocr.yml -o Global.pretrained_model=./rec_latex_ocr_train/best_accuracy.pdparams Eval.dataset.data=./train_data/LaTeXOCR/latexocr_test.pkl
|
||||
python3 tools/eval.py -c configs/rec/rec_latex_ocr.yml -o Global.pretrained_model=./rec_latex_ocr_train/best_accuracy.pdparams Eval.dataset.data_dir=./train_data/LaTeXOCR/test Eval.dataset.data=./train_data/LaTeXOCR/latexocr_test.pkl
|
||||
```
|
||||
|
||||
### 3.4 预测
|
||||
|
|
|
@ -42,6 +42,7 @@ class LaTeXOCRDataSet(Dataset):
|
|||
loader_config = config[mode]["loader"]
|
||||
|
||||
pkl_path = dataset_config.pop("data")
|
||||
self.data_dir = dataset_config["data_dir"]
|
||||
self.min_dimensions = dataset_config.pop("min_dimensions")
|
||||
self.max_dimensions = dataset_config.pop("max_dimensions")
|
||||
self.batchsize = dataset_config.pop("batch_size_per_pair")
|
||||
|
@ -128,7 +129,8 @@ class LaTeXOCRDataSet(Dataset):
|
|||
|
||||
images_transform = []
|
||||
|
||||
for img_path in ims:
|
||||
for file_name in ims:
|
||||
img_path = os.path.join(self.data_dir, file_name)
|
||||
data = {
|
||||
"img_path": img_path,
|
||||
}
|
||||
|
|
|
@ -44,6 +44,7 @@ def txt2pickle(images, equations, save_dir):
|
|||
):
|
||||
divide_h = math.ceil(height / 16) * 16
|
||||
divide_w = math.ceil(width / 16) * 16
|
||||
im = os.path.basename(im)
|
||||
data[(divide_w, divide_h)].append((eqs[indices[i]], im))
|
||||
data = dict(data)
|
||||
with open(save_p, "wb") as file:
|
||||
|
|
Loading…
Reference in New Issue