Add Syriac script support (#13800)
* Add Syriac Language support dictionary The Syriac Script is a Unicode block containing characters for all forms of the Syriac alphabet, including the Estrangela, Serto, Eastern Syriac, and the Christian Palestinian Aramaic variants. It is used in Literary Syriac, Neo-Aramaic, and Arabic among Syriac-speaking Christians. It was used historically to write Armenian, Persian, Ottoman Turkish, and Malayalam. The script, like Arabic and Hebrew is RTL. https://en.wikipedia.org/wiki/Syriac_(Unicode_block) https://en.wikipedia.org/wiki/Syriac_language * Add Syriac script support for training The Syriac Script is a Unicode block containing characters for all forms of the Syriac alphabet, including the Estrangela, Serto, Eastern Syriac, and the Christian Palestinian Aramaic variants. It is used in Literary Syriac, Neo-Aramaic, and Arabic among Syriac-speaking Christians. It was used historically to write Armenian, Persian, Ottoman Turkish, and Malayalam. The script, like Arabic and Hebrew is RTL. https://en.wikipedia.org/wiki/Syriac_(Unicode_block) https://en.wikipedia.org/wiki/Syriac_languagepull/13820/head
parent
6225a90ef0
commit
ada310811a
|
@ -0,0 +1,110 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 500
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec_syriac_lite
|
||||
save_epoch_step: 3
|
||||
eval_batch_step:
|
||||
- 0
|
||||
- 2000
|
||||
cal_metric_during_train: true
|
||||
pretrained_model: null
|
||||
checkpoints: null
|
||||
save_inference_dir: null
|
||||
use_visualdl: false
|
||||
infer_img: null
|
||||
character_dict_path: ppocr/utils/dict/syriac_dict.txt
|
||||
max_text_length: 25
|
||||
infer_mode: false
|
||||
use_space_char: true
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.001
|
||||
regularizer:
|
||||
name: L2
|
||||
factor: 1.0e-05
|
||||
Architecture:
|
||||
model_type: rec
|
||||
algorithm: CRNN
|
||||
Transform: null
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: small
|
||||
small_stride:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 2
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: rnn
|
||||
hidden_size: 48
|
||||
Head:
|
||||
name: CTCHead
|
||||
fc_decay: 1.0e-05
|
||||
Loss:
|
||||
name: CTCLoss
|
||||
PostProcess:
|
||||
name: CTCLabelDecode
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/syriac_train.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- RecAug: null
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: true
|
||||
batch_size_per_card: 256
|
||||
drop_last: true
|
||||
num_workers: 8
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/syriac_val.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
batch_size_per_card: 256
|
||||
num_workers: 8
|
|
@ -0,0 +1,157 @@
|
|||
!
|
||||
#
|
||||
$
|
||||
%
|
||||
&
|
||||
'
|
||||
(
|
||||
+
|
||||
,
|
||||
-
|
||||
.
|
||||
/
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
:
|
||||
?
|
||||
@
|
||||
A
|
||||
B
|
||||
C
|
||||
D
|
||||
E
|
||||
F
|
||||
G
|
||||
H
|
||||
I
|
||||
J
|
||||
K
|
||||
L
|
||||
M
|
||||
N
|
||||
O
|
||||
P
|
||||
Q
|
||||
R
|
||||
S
|
||||
T
|
||||
U
|
||||
V
|
||||
W
|
||||
X
|
||||
Y
|
||||
Z
|
||||
_
|
||||
a
|
||||
b
|
||||
c
|
||||
d
|
||||
e
|
||||
f
|
||||
g
|
||||
h
|
||||
i
|
||||
j
|
||||
k
|
||||
l
|
||||
m
|
||||
n
|
||||
o
|
||||
p
|
||||
q
|
||||
r
|
||||
s
|
||||
t
|
||||
u
|
||||
v
|
||||
w
|
||||
x
|
||||
y
|
||||
z
|
||||
É
|
||||
é
|
||||
܀
|
||||
܁
|
||||
܂
|
||||
܃
|
||||
܄
|
||||
܅
|
||||
܆
|
||||
܇
|
||||
܈
|
||||
܉
|
||||
܊
|
||||
܋
|
||||
܌
|
||||
܍
|
||||
|
||||
ܐ
|
||||
ܑ
|
||||
ܒ
|
||||
ܓ
|
||||
ܔ
|
||||
ܕ
|
||||
ܖ
|
||||
ܗ
|
||||
ܘ
|
||||
ܙ
|
||||
ܚ
|
||||
ܛ
|
||||
ܜ
|
||||
ܝ
|
||||
ܞ
|
||||
ܟ
|
||||
ܠ
|
||||
ܡ
|
||||
ܢ
|
||||
ܣ
|
||||
ܤ
|
||||
ܥ
|
||||
ܦ
|
||||
ܧ
|
||||
ܨ
|
||||
ܩ
|
||||
ܪ
|
||||
ܫ
|
||||
ܬ
|
||||
ܭ
|
||||
ܮ
|
||||
ܯ
|
||||
ܰ
|
||||
ܱ
|
||||
ܲ
|
||||
ܳ
|
||||
ܴ
|
||||
ܵ
|
||||
ܶ
|
||||
ܷ
|
||||
ܸ
|
||||
ܹ
|
||||
ܺ
|
||||
ܻ
|
||||
ܼ
|
||||
ܽ
|
||||
ܾ
|
||||
ܿ
|
||||
݀
|
||||
݁
|
||||
݂
|
||||
݃
|
||||
݄
|
||||
݅
|
||||
݆
|
||||
݇
|
||||
݈
|
||||
݉
|
||||
݊
|
||||
ݍ
|
||||
ݎ
|
||||
ݏ
|
Loading…
Reference in New Issue