Y$C4FKphGuJkn{VC%)C{o{^XH*Jll0@FB-V`Ym`ScsL0!(K}p>E$JJp_rfL
zBo8=}mS_;%r1Zq=ETiQ4uKkuRQ3I+%3V#5rG>iEzJM6U{!8FI_4>)};lP?d$aC#z#A{
zbF3-C8XRi|8&6v&&|`~HBSevO2wx{0$NCDVqsxh9Aae8Dc~Q3T8;=d8F8HWrx7|EOJ}OiD35REp2aF#
z?+G*x7{}H&H`Sdgw*%@`US!;NS9Mf;TF*avvv7(0j~Erw3@oSxLu1{eRWdoH7x_*E
zfg~*OHsdloWP(NL>1pCqG-f07#jWKkx6zUEwk*wy(>Iz+H}uWlXVVYWyK>7}oT(qT{f^2gcXh9mEm+`}_gzS)W>Chbi*}V|cAa+p
z2NEa&x7UW3d7s6o=pfNMS_Qtvz`R5COleE6)HF|w+Fah`Smb6;d`2`=J(!>87vgJj
zs>jlHi^UUqf{np@-}D^UZUIa!0&93cfqxgHl4eKOeg{oEl3oFB@}e4!@YXetT1<&T`TYEg*;Uv|22kI?~BYN8Dc^#=Lw=Hxj~1
zllPbb>FL^nazM9lLEy(}*!Ee6kzeyI`;fMibO6)|C@-%AUWid??uISzDuW<3Z=cf2!_eO=qEl(mRY)
z&2JVdh75oIkLxJ$^Y~l+nIP>cE{WtWaG|+cHdZ;D{s%XEE&?E!&Xtt7oNX@LFM&Fk
z&4H??a(t6~URFLZ4A#sr*?U&T04wy^Qv3++0Hy)~!Tupey*mY6qVV@)B*m!d#ShIe
z&>kH}q{#|M;QJ&-)cfnuS`(pyC!$+=SI(xhk6B#rty??zLuh=ezCK-|!D^F-)$rj>
zB{@!Bzt3nIw$QZnR=kCcQ3?WmIT8R2OAcyvf}x41MmBCvP8E&gc)73_
z=oEk2tHl#!e$I8BUlc>WYsfZPk-tBcv4CicQE#xxw3=msDxQBzuoYPYFCXU;k`_e!
z4dy8mLhW^9-u6Dtip?+GE^2KpQRWToYt6}UIk7Ep{#^=EL?{Omzoc>p1PC0mM~q4`
zO_N9b@U+H{)>1ReD@va@8BRKR*BJSkfy45~YtP>w{qp6A*KZvMzWvU*z8;97m+U@4
z*aTeLRbW(v+-ze+3de85IERYipv%;g?k2O32Y(Vv>hn%#Zg@OCHGVEFZf~!`B=ITk
zzJ|gofV;^qH-6JB0RUV%@y1YrTwF>~fEhj{6bdy?VAsd(YKia-^R=~=$a#p8G3BeX
zjS!RU3BS2n7Jg$O7Cvr-7}&RfWkl(6rADoLivulAOSVqFy)VgEpDKAez39bB^1PI(
zJpHX^!uotI^F1z@fS4|@{oc?k^WcuK+w0hWJ^d~o20(DM{Gs(Rw(^P0Cm#`d-D
z_Q@A92pqMCrqV@b!+V{!os{W6YU}WLOgFOtpch~mF)lR!1!NcYwv{>mc+`NkwHOua
z3YU6~L==x`$2ha`@}FhS?Iec#tX0z^KJ=LEYP|n;B=vx8@ki%vDc^zIuP)317`^_D
z@J_R3xB^c{7;PoTM&s1N(R7X&g)Suw{j?mVa-1HOdCJsqdafE}@+i6=UOk$-XA^AA
zRHeR8QN;fBZ?^D?r>8MgCKp
zXi(8n=Z?~l4cXf}wBtI|p5EX0>QeHa;^ZUdKlva8Y6;6}=Xw#{VfxrQ-k7ki>0s$=
zW~?cC%y{BMwMtCf)939^$KCK+b*h&O_g((hSYMr?lm0bYQnpP0S+OfOmVZP*koC{LiS(-*EE!nF8)nomIrw_%;Sy*3V!`QYhy5Aq_NN_pyt&oB_mRWy9Y))U
z?za{PDdtNyqK$immEweaZC3FrERO;MTx
zdxzX1x^uW^LIwQg&d9dBcK0aV$ydwmAeOdjqCNeg#YVT_Zj+eL&|YX
zw0=e8GkmYC-WMDz$65iVra`P?O7-{K(=ZjJFQ5+GQfJK9G#HcDUYJwm}wW1&QTosm1MN&EAP<9D{v&eJj^gf`N
za%ob2t0Q!*21uq*`!4Smc=}IJL*^Xg(?v3-Fj^$Ni3NS8@#ygK+(CY@;5avpJ_b$P
z@D$t}COa1QDh5bpJXIUJlA?%8f(@ZLo6
z+Y<&03sbUELUvAF>$WSJ`p_X@6*ge*0LGvx47Oxb;+jnb2E$O50M7!+*qs=)ZBW#$
z2+#?wCQrBiQFo5_0W&96vtbvPjxF*I)XxiurQPr!!jk4SQLB4D)q`O@E-3U8oI<=$
zwW}tFg{fpUTnmQl^Ot<$aC@B^VseGRfnsVzxIkPL
z)IuHkE|Ttsb=fM(24uAfLM+d`S--CN=2dLMk69ayyh4NbAMZqbi~Z3(QWD>t`lU;|
zrod}5?X6&MA9&Ifq$@)W&DDNaNs$VU
z+s>_qH1#tX0oP+)b4||;T!v!As9*}Bi{YsAH;BG?^v;>Gdr{#ru3RaB*4lwL`#k<}
z{vQ6JmHn+?fcpI0?Wv`xPq)_ZZ7Sb3?%Rkp(>*Mdf?8y8zG*Q8*oJ^;)qdx6f*`33
zKc)cW(0*gSK&PBdlMftUyN@W^(pQVlv+n4$%XK=M&t+%085lCYJlmq(v|E1{?C?|2
z7xj^*DmVjjR3x{Pvvsl@hp!nDK$CL#w%ncjC
zxOA@RCNcKUnZBBxM=ksOfgb`#X*tw%2CUvh*dz$$yUZ%gZ1T@dE#P6+`ZHoI>N5ZK
zR8W6FyFc*Z-m$JniT%2J)yf{_$gk0k0?yO&ga+ej>ViXHBzwUXLMla`d^iC9&dU^i
z1d@nvI{27rD~dg@UNZjR2~oMEICp<=uy&Ty1?_ql>}S|grYJmGNCT7K1p}0kK{Xhy
zE9w}!60S#&!p3wk;^VL`jj&MQ9`?xNDlS0SBPF?YNnYrf7ktZ^W4oT^q8v-la
zw*_nqOh_t9D{RE1K-I&Y%n@q4i%3FPWGxj=f1^4!o}bNYFi#;Hhn9Fb2li?aucUj~
zrS_>Lo!%(f-}myv>B+QJhLBdUfS8zzWaQv#knD=*GkPecrRX!!LXamA~LTtw<%3He7A0!WedGx8bcSjizE1UFViN@F=Kt
z8911swE}W?VmXD33jrLS=cL4kvdJOMx9ph`AX+LxW_eEX$m|j;cO!EdROn
zYfYc;z+;MY1@ZHF{*`mSsgh$??jm#`gdM{Ae~n55K!r%YKhpVT1&VtiAU^SIvXIQV
zvSfdeNDPp8@Z8R|CHDj=NF`_2X?Lyh*=@&KP2m+jAT*r-ZffGX3Y64%OK>l}=VFUR
z>_RJ3c1E6vRWW_Yx=JKHw(~wZ@b!W2?a3_RgjwP@$M1z*rmdRgHH{F6`vD|mKY?ct
zEb4UBC_xU#dO)ZOYcaslnG&Ln2p!)i+J!u_8!F6yr?$J?u#i@hDjoDRua+aMDYB^T8#S})9`vF;EgLoqS=>K{#h&DElNw}tO%^(^0G
zlFRVCK5&cH6diC07tB99T2ifh@UhRWwn3%qkFr!n}orAhst^;D_NC
zCrcAd{;{?QWC$A%zL)RR>?A!KRrB|DXl_wloU4vAMvunTJKVb)v7L|e21)`8Z&{0D
zRY&bXUbC@?=66oc6Mw9-0$!Du4U3$uBh6b^uBFbrxHzz2@|L9B;dt1d=MsP}Lj1~5IcxIc%hCzYn)sW{c
z#L{-F{_T9ZVjf99{c!Vd^|n`cO8Jl8DD4GPA7Mu4??HrMoZG^@CNzsKnhC3
zGXhKxcxjq780UFUuQ?hrRr@;m>6Y!g)9)GY%$ViB1~N1Q`!}UsHbVAfw@~Br^@O*Y
zwF9y8K~}nxXpWIL;`}%~)1j^}Wl(2+eA3n2T;Yw}&yu#y(~E~S-}mL@U39wSM5}>J
z2-s$nz!wzv`;d4ZS@^d|V@5-Xn;jdv&iBPX9=U6@n;vd>zSYt3!Zn|a14Wlo-M%_%
z`q{hui?fyf=xcP57O!Z_iO#i7LA&@z?nIQ#&KB
z28MOly6HvyI@c~wosf>~N7ZmqKo5x4Hi0WXcab7?xwQdl(ytR;Zro|s@JHGPm#XP3Mt#DIa{V1HuHhGM|H#{~fBXGh
z-QQ07agM)JqD@@E0PybsN_^+w!Jvl`&IA-sK$9_SIn5ReT7u31*`6Cnl4g4SYqwFnBY?8?d+eju@4+wS9
z)<~I1wu^t(UM0bff&olzLtM@?vTHt%Gvvf~_f0S8-|nED3Yo#4XYKs#Z0l3BGb8TV
zY+wRL0rsLG2UI5O=RjJOaD|~3qY-pxbJ(qB2_*W%clP!4;|f#SIFq4BPhmT2PF=#}+JH}&4SoKSW2RgGObIxj~3rJ=t{Zs1ri$G34^N+&p2
zp54GtzLVS$;!UqGMlZtaTDKGpn7JG@!qBSt)}N)N|oER
z`|T;T2BNm?9TD~ugBaljo3eSpb)!Fpm(dAz3jzf`Zy0pSK({qu97Bsa1e<=pdp>BN
zT3MLho_}1gpEy{&IM#!Sd`G+y*7O%9F246%?n<%b8#DSD|>29Ado|@@Z2yuPdR@Zm&6meFyTbzNXt|0*UKZ9s$YVt>2m$FQO1-h&X3WS
zxDlv%T8#QUD?;OdDdB{-n`F#enIoROX0BAcV6P6d8VNa3RW+oqzgK_B{4?k0E>K
zra@c3BQG?q=aQVx;xqdSsO#V
zeq{7AfVW_D0Q=Q-<_-Cp@HrF+F=F^eB2@w}3%Siz;A474K=yZLJ6stw9+da(*SI~S
zB=g;p)sy<5j)mREc|}+V2&;ry^}Umn1l-5PNLth{Lc!*v0tNUH2i&PJrEx6XXjQaY
z6N3vwoy+@>tf?hl?PyTItIw2VZq<@9oq6>Z7O{>NW|B{Muh&g>8(Ia4Jm_FrH7~#Q^wutG@}$!YMbGl|@+C#$v|yWxuUgC-cB!?D>BMkg=xou9I9y*zIR2Ran;MMpfpJ1rEy;rJnnvm
z*tDfR{(Xr1&Jc?5l`L4lG!h_>zWK75QCF@#Hk?(JkY1EJ@QYhWyGRROkU`2?AlT1u
z?m-dKNwJ;4zfl&+F-Du}`1WPL4ajR635W
zWY)wB)Nh0AL7B_*<8OsplxaQ-@?@QU6B6b#199qz(u~}D3I5RqxYWZXRyXdLi^zK>
z=1X5GSPf?$oAJT#!{5WPG9cJvAqlFY36xq1qusn3E>56Bfosjf!`R=GLV16E`8@5K
zeVwE&tn%m>jq2oCUUzbv@G*l;YxcG`=-vf>ix`9n)Bxm@vFA|)iWHGFdmM{Ag-bi~
zx9`Kzp_nWz1NS*%k@J}1WES$pu;?-)+Ur+^rxtW!JiS;)+Ls7@z#!Ka1WM+j^iuvI
zXyOsTBFYVzSQ>0=gy_YC4)bS$1xyhrWKT*wE_m{b=J%3|zdirMa{VGZ!n3D&Fazzj
z@5zy0883Ky?!Fp3Ej$lMDp6EBsBQou1u~VC1{sH};f{jSE5!(PVv17EM40%he095n
z%0siQ%XZ`Jn|F1y)+c@_@VULrSu!k4zBi`~^vYsmM{a_K72k_2gOvt$_Xr%~5K^K^
zR=BHvT+P;x$TKy$uns7&q5iNh`U^wLtnQ=DwlqD+--h?(#f$CXIVVm+-_Y#dR;IdO
zBLdQXG)1w-XpW|U;M!CgbQ6B`*4nN*hHkjwj-?%bsm>W4v#+*3sC=L$Poxwk>3y`8
z5DnpKadR}`Q^3CU@tw$rkTBt^uTan`6F8~I(d657Em=`;b5)&FzoITThWU|lk&a0U
zt{fwSRJVJt<+|idQr9Anj`xMWRyiIgM%fKrdo*0vP|*BMLLV7FSvrYih22{`~NIRd_`>E`0VK))j0H5y|aGLSW+w!rkgg5`)e+
z_8g|pk?jq0AT5i$=c;*vu0Y=hFANF?I6s7&LlfFJ+yza0rBZ)i!FgdWE|AKwaB~-2~3TwZa1a>D^YRgt=4J7aQvW&L!qEe8YVf2Q7ZvLjAPxxmUEY
zhs$ikYz$ylQ3T5CcvlgK`{<4LPOfB502{Y;R@Dk%;R_WOiMee4@&1CUSG9wp_m5?E
z#Ep-S#D9+az3QzN0R=+bJ#Yd7kLb9*SzuVwAyr@wIe`18gQg_{u}QqV;Y7h2c&TkA
z>go9jHU9NFOO{y(ZY1bMb3aGDY?oUu<2*XTc6&WwJO6VS>j&mGRGY@Z^Fc@pgy<$w
z{Y-glbwK4sU{?uaGCI|~_vQEAe^$o;PA8JN$R|Sd
zMu_z{1x>r`nYWIA3J*gGYWy4R2IOBQ{VzD*DrtO+tpzTxl(UT=hP{2Fvnl(X`9o$82H8%XE1RUAr1>h;2q)<}GR^Q7uR)&Y|sTeTtG#T_{Iu{Uq
z3hy|uRQuK?v2HjNb1^0^BU(mQ@n{&q(34uJt|-rQcK
zOql~}kQ^p^2rI9V5c{9%M;1g1v;>iCWy8|c9q3X3@!~e~x6P->!#DKXicf}YNKfC{
zxUY1nh|8Z`?jSI-Cyf#ye;fnDRxV&uw}zJ;H*c74eEM?VS;t$?3Qn(2(z11!j5Bp(
zr<$ai9<@C&3o^bFm|I(n>SQ8%=AB~%UWo870f8k{@$4gGudHUsW`q8h^(GziDO@bEd&0*O2m*QQNPZ%}Zbp8hm%q_W7L3T_51G
z%O-1+ef<|4d)ESDNE2esMHh`WK#NsrNRp)pO%!sbrDvI`l
zsV-RS2u>oyT%$IP-?(N^`M4P@BTAF|N40eU6!vXqW&Pe?^$Lw$I&X_oO}}&o7Cw1{
za{Vqf7Yc0uu<#}AJcVIFV#KgWoeT0%>)|rwb||DeLcH<1XDmK>YV5#0qI^vZhc6lk
z;dfeJ3%at{u62@gFRE9J>QIAP#wnHc0)WY6#i$@mY!nutT2F$flo)lpC$tc6Liy1@
z>GYB!@+f9<_X<`)S0-37cq-Tk(I%yK#z6s;IA#DW>>Y7+MA9bwUj#RqKXtY`>0nUU
zl(D7mEH(X#D&utOp^2&>x=7`C-*>2Lf$JvO@_L`&kO&Kypu7rJCi;rBHT4j96*#b33(8~ua#3X>O
zFrAG-&Qf3?>`K~%1i0ZI!lTs8Es=YT`M568=pw{+<F_{Pb#I44Q*enIR0Ty5#^qyyOJja
zVJw)b1huRKO#@2-7O~}T3CGbXmAQe+-YQ&5s;g^QL%$(agVWUL>EP{DpEa)L{!{O6
z;<*MQFQ;f}=U+9{UoAkRen7+A|Kp~r-~<(z+ypod1-ly|Cvflw3Jg;c
zuyC3f_0fg{vOqBK*kK0ktNFKo0r~S^(ByZa>pb)+YQFiT2nqYy>LGCIilA#>~Usr=sEZ8(1OA91kR@L!awhD3{>k=P9=ly5|u^ev6P=kGQ*9eATLrh~*Xp3S3<5&o#jv{`FY3dd*s~b>iS$
zDewV`47lf-HQ-KS;5OjX;o$#>wHw7Y?c8U&PS(~;@`Tk%yL0F59UPsU-90?7czOHy2Hgn0
z8A1rXb?0tmRCG*iTuSOcY3Ugc9%km`=H(X@7Cn1jQCUT)u6gykwyC+LwXOZ#`wu<6
zeP8+q28U=&*4Vf4iOKI%)7-iFU%z<^@Z!=k4Y7VjY#4+H`koWF>;L&FOcZ7;BN(5s
zG^mxlVVGsep5oiX7{=C`%*eE;gr^4{o9w7HbTBOOlz-D#J~~V@*X}jW9L|&dTGdG?
zPnOH=2r%P;E8IB@uJU{tAubwtE|BUYp#(0yT`_7^Ju_9vP15M|VaU-3Wu&ku2NQhIw4oBV!bn0UdKnq`F8HTX8e8l_06Y$1yaq>Ee^sotELIHG6B
z0b%-kSH;e7gSMZ2v*$OVGmbDl&J17Jj|Q7;^v5tplkQPDmN-cm%M##}Z!9BXaY&*~
zk@R+GV|z}5$`70Vli0(~R#yh}GUBWWYRQ_37RuQV)lQU4VHcU^a<>1|XE=Eov5rgd
zVhqG%7**s-Q3jVbiH$!V-?nLVu)45VdDo{(fA^rkyj}NCJjy@(*#6kT>&JWKVv1uW
z4U3@0KBx-A-p0j=B!7?1-k~cA?CP4A5z;2jZVcoG$LY=>w|qxepX!jP8y{sZPyFai
z^yyHoFc=Qh=Q_XrJr_L_4P>-SSig+8^u|J}YY4@A&W(;>@9fj0;*n0i7U4=0(|&>e
z>Mcv2Z&N-^*lo&*PFM8Ycl?8yMI;nLWn^JEw&*B`6Sxq0qim>IBpI}fSR2-7WV6Xv
zFB?#?q+>xvd6~bKa_8x#DP(bmcg1R
z!A9A3lJPPkqKJ5!i*7;2n{0;9Gk*;b#Toor$@XV-m^3lEqnG}w>Ri&9*P9YJHfr|O
zJW1!!M|*X~sjO&$B32|>SOiq)K=m(PM#PK$;-c=!G6UiZX+uOkPIUaGn(9=ugc2Rm
zXq%LiXMATCS3S94zfqFXaPD45_e4x3SW3}F#4;i(7+Cvhj63!=$(UOf{f%oFW3t0+
zOh?nYysz>=TUwFx588udmE^OQb1MDXDo@f*TIx^N4H{itM%>f?AFE%^!IVR0Wh9Ek
ziFTZUHfm(;EDskeK(VBB@XeXzq08i%3|?v;Z$30gt6k&0y9I6f!(j7j33)z{(nbTQ
z@gP`lHf}YWU4^8Mi}D<((RE|+dR=Xeir(CT
z-((&C57z$1{n=Zp(lsAuo^h{;IxevZzvG1G;xa-c0en;(308g#Y$J=j1;{)mv#`bA
zjP)~iEd1Dq$@Z+R@T0O(-1{OcL%43@9y<7{Wlqdkkl`ahpXl_zi@C7H|vcHO!7}hxMs`|HO0#ReV)U!uEeG*i|^X
zeblqo)zpY3(MJMWzw?yJ$XiBKCb4t*mIQI$72!RH<^_x-2|*zJIR5!JUV6$8?-m3d
zG2Qm@aA+J>!_{E@)vIo|bK?$ZXGm{JUl(Dvc>BNgefcF6Sfe$t@4u!ooO4J?xRRF-
zbGegP*olK;_CLMEb%^_xK2lodQFB6fwq#82MEMlg+#UU3VPREnY<~1rLj?Alkj}RR
zBHMyt++Id-&R~Yx$8Np`*S*Na#bB6ML|r1(n>uO_QOuyGbdV6WjJP`IJb3%rB9(^n
z7JaObp!x3)uPa^qLf7Ga0u|v6VoPZ=?-sn9RYtQLn+4bO!mOTF3NNSu!rbhWrQBUg
z@ws@!F`%q8ml8YS-92&{xhH*3Vx#F!M@~3KB#8z09u($M8EMq2Yr=K
zeFhT;?^%~SMo&Uf(5>NS%z?L1WM&vOPH^7$y_Y(>yC&UTPI2Jcn+{1|D{tQG8bIx?_X#+S*sX3Vt*n(A%-yGFuRO^H6Ve~Bp)m4UJ71l2d{S}<`$7Y
zVZ@1;!Y2;r`ixxsqzft`Km0Y;Z|OIEF*gXgH8rnpzG*mGw0-n=#DwNeYz&5xhmlxD
zctE$I>Vc#N8B!BeG(7lBz2_d
zTUsPLc|gdORDv2rWmIn0=z|!K=^X#EnS7wZzHa||x11l4{4aiZNZS%V_Qs#iysn<0QI)Rab-sL`N}HPFOkSeX_02Zp!1_G9T5#K7NF?buhKp9x-@c=s16){y
zq}!A_`OxlJx%nI@&A#MMk$UHug)!`yzd}jKu`00WJH7F8=r?Q(n#y>z{RrH+5~;Z%
zf=%;03p{(gtf8Vf!tf6YH$4G&4TL2Q6MziG7lyw*-L`
zbKr8Y{1BjB6QVXAR%?HYgEhz?3dUYEu7fe*KE(n@E|PbgHXB*?d%Z^DrzO_R5H{E(
zZwpP(WRuw}mkxPp$;G@R$#k2`Is6O}Y<&X~4icTru_h1ciKNyrksDx)-9NC2GBQ51
zE8<1gm5SQFr&Y;1qvvEpZyGp1R!Vzl9iPy@;P8IEw06%Jr|mxzHmZw26U28nsL`R`
zlIg0~a=5(}d3#B0L`0Tu?T;)b)Y*lV9JV~#>96NJm3p+N&B|h*wVLNYGa(oD{SsnD
z(l5Zaj)>Z`j&M(>fxZ3(UQQ7p(QXL&KKL1|+#G!Z$U9bhVlJmPp|fh?R$6P@kISt(
zm#gLr^p3f^H(5;rCAERyD|DfHJn!#iMALO(iw{qJH`xs}*3voVY+QQ?ExHj4ALKKZ
z5z&HunrXs^d4xh&&O|wckGXF(F>3n4$a#FmSL~nU&Dn1$_rBU&cc*9iuHhRS-s<~rO=A2TH2#f}s~
z?upc9d8D6c*{dD+BEbE;#N5F2GGYe!uoxcb@)p$S@q3#cOLL}~L&es@-JED_pDt0J
z*>&%*XV#+^LzW={_pL&_La(An``efwZ)WHXho)E4F8o8xIM$XHy;HRGj=GKeDIXWKw${0`NIOO%0p51FiJiqkB1}v19&5bXd=cs~HvW!atbrnry|3-+d+F?6gkm
zTRuy>?#jK%zg`2qN1YZvTShRC1M%2`Uwy$*kb8J8Bg_>=GQ+boXn2XQDS#Gnz)q6L
zUx5^VQ#D6f^`f|4Wk-dn4PxM&Go(HcqA^#J;FHixaIMS4quurUgf%==6^KW|h9zZO
zRp^nic_P)2)HzibV5oZ?1L7!Sj-EsdY3YLZ7~J7_HIrp1(#Lw4Wlchp4kwNv=~{KI
zMP+DUV7Qj$*1s)0Gu!-kXL;AGI~Qee`ST365CADw0xtd&8txE{Lim@6)|^k-OfpS%
ztUp^UI_g)r^LtipcWGYSgt5}7_hVO=SbrU;DmcnI=8~6>^gX7>@pcT*{Wf?vrwfEA
zaM;bA6)0kO@A~xlxQ#?JPS_NAz0sCu9bttR>CTHOo%lV~%+)(Rl71
zn<9TcC=!9tA_G7#`#K97nw{yTGV&pgXxQA$f|R~mZr$(t
zG|XN8#g0y~gs4h)pHRJHY6;)I1gagn4}5vUN+R&*xr7l(0AO89h55itufY6b(I=>q
z94|oS{J4d_^V7&GIC#wU*fi}6+pV^W;$cCleigJk>|Ko5YPQ3X+Xz(8G6Li|jd@g#
zGm3VHl@-gH9aa=cN&~2BZNLsy4bRH`5E*e%F%(zKN!iL;7xmxwNcfD2E)|!DOpB09
z7ma7eFu>FN9R!2JSs
zA8m(K#Fmty;f?hKizB^Bz%GGn-Z5B4Oljk)#6_|hkl=%ZDGR*?YRo`YYTgEDw9Sx-
zX+zR9&t$O4G}7+g(%=qdIB*w1G?+T_y}D3qcOs_ZEy?AwXY>8JERY^XWJ8UGsLC=P
zJ&6R&wo$2J2jNc39Dy`3VBJH~aH9QCI^WWMHCgSBT|Z?#+t6ChHz21v#k)I?R=OoO
zNTebJp0SZrDUu%$$mWV9v^Wr>hDJ0M=0A54QrXVu`@b7K!_6G_EXasYpJPYUlpGd!
z4BA$dpvIHfVSH~=?=qsMu+2$?5e=Xee2#xlt(8l(2^<&MIt9{&uq|r&0#lvR#`)j;j}Am0;gC90tw>Oh`BTKvZR}
zZ$H*Q*ezJTnm+fgR`-R%GVG
zpl)`#K#08YNGj}6@e`%9{=biSyT4Y^)9%-EJ(mAavg&r)*rja%YD9k>5av)X<7j-5
zr0a81AN6Hdo4;8bO*5e$mG_lUDq66tD^Kils!phy_h0L9vwAKj^2w9CcawMQx6wtR
z>XLYTkwhg&oM#EPPlg(00BF!@;vz{M&O+n-6mE4Dzhl@X!Ca-fSGBiFf!duGyd@x>
z{F3q}A%Fa<#tHUh)(6!+4`kkkf{ahoK_E|M3koeWp!e}v;C>=NTitWE=HV;;DGnUad@$w5&Ap5FRBy8Ac?RRAQ%$Scn6J!f!C^*nk;EIC7aY9oJ
z(;0f#wT!?nBWRep)xcd)zrkmai#`{Zfdh+a0!YA;ju3Ua+u1vT5KFjt0UCSPIXaph
z?5}s)#L$D^#%y-D*2*{jy7Q0@*|#a^NMx-F!<&
z(K|@GK?=Te27XjTh;ZG65i`EyLT`j*4|hqo1YPB$lgB^w>bEM0#aaH7(>7yF1y%^|
z>MtzCFj7VGFph(!(L~E}2
z9RW6fkvf#6%Bik2AwxbB$2srEFC3jx3uC3681|F?FjKfA8COgPnqmB0MkLab-afJ8
z0dtPXAhnW_0HFL;`0s?ga-J)SJ*=At3ftJovcZ5C0o4a7GK=>8BHWOf;Um@1_|F93
z-I9?Yy?#vU#tmi}!&5-AzVK~e6SD10zMrd|73}5Zmpf%YFWqx8IYH04cdTw$yJA;u
zI_*%NdD8W#=MzBfqzo)V*W}1D;xh3VXO`{Gy|V(z7=PaE>hDScuY-9m=_6Xg0&BL`
zxy}!Z5sG}{-eD2*R9Hoi)n`&Cuq_K5Y=GwRxg(P^V|;)Qjb|J7`l4imkx05L;c`dI
zO)I=qr`+*bnNjbX82sG)r11;Aysiy)dp#USj|)K39Z?NV(rEw=)Tj#i5~x>_;&7Xc
zFf1w?hb)UB9xC}RVMHRO>HHoqsk_j;A6faeW^iOwE;QM&$SLpJz{i!O*FO5LK$BIr
z9WFk_LEk3o75O=jQi{yna=mFbsb5EMBd1sgOPFzcFFLsn+1i?3xq?`21DVq0>fJyz
zk_JIC;v8;eH&UW~1gHg7E#8LJvd^%sbmzS&Lxz(1FtY(g8gAIm*-p+A9DBsNOHv*P
zexhYfiLfE{|FFt34?lK3x~n$*tB(TLB_s8!ZO4B0kqoc-+<%9Se~~PBuq3e+l4Tk3
zHx8u#zPMN%_ZyEKo;IjCYwkO6EB+MDmSSM*?DFPug8tuaKWNouj{E&662<|!mwUz;+UkT
z`hIhAb_#1VdA_LuIBJ9!P^uQjFA3hW(B?9G7gaG3t^X_Dyti{UwjRf_8n5oIIyi{A
zU!&J5S2pqKdw1)G!1_dcOPWfQkLnzfrpPFaqh6sVH5?Q^76cdhEZn01ZRGY0#yi#a
zc!ug+sV%Qf>34|CJAN~!{?xe&Rrwuhc5lbN=fo{Rj1Yl*4X;r61bEtG2!sj71G&dv8$q
zqYgE-B=Q+>_Kav40=4w;*qClKcZKwu=kFslGZ*51zcI6?!i`0QthUil#-wRC4;@{V
zn(98?ONkfSZ_OW8)6ve@?h)m1(~T1;=PY!g$tA^}I{Z4O-!HSz<)Yd7^kxvfuM&7Iw?gsURvZEkwMxQ6SsLg*e8*l`WpsBzA-yuw(4
z8%t^g-*O8Ox_8(i3qkb@vZ2?WTW(+UxJiq9{<`UJgnZWkKx7V<4r^i90HW7+aL@Xo
zDl*z#%}#f#_lb>GGHe_ljjXZ1%@*(|Y)4-WfhhsZzHIDi%CEj6zoKovF
zcQhGrs(S=hWRXl68#lC!IGt&OZ9ROGyw$)!vD?f@>27)R+8SgDrHbq0`}yn1o?}0J
zHd(!10~ugA7a&-xERM=3BU74)4&|Jm*-UD5!=_%kB^<=`W1D_U(X!LB(#;OqocuI#
z>D$cy#T|Rv#j4aN_y;qw_jZ+C0_qMU#J7`VgbfYia3vGpTn3*q!sZJuaXRVIM(V;c
z1+uUP+FpE>$HVwO*vzhg*fp*_-p1sUPI;u=x1XwEb%x_&at=SBnKeLm#$Xv2rpcU>
z!ZjkvVi4r8!n%4l22PUDdnUd4SWmsWC*EH@p--tayO8o~#2+##)^7UZb*`o+_G#jg
zs6Z7}D-;gc8c2>j+utV5V51whfkJgJ(T8ss&2AVOD_L`PK!^0@dR-1w)gJzQitrk*
zeF_w+6l;iHloX(J@RyMd(1HxqDD6+$-<}O05lPcv9K`uGtq^M%cK{;}U+HynvEL|s
zZku25L|(?Brj=?=>ldiyKoz&H9cr|t
z`p?R%H5`VI6cOb){-sZM^b>8l4py&vOCyShLyLX#-xY@hqf?JqDerIgQ#Dn)GHtlI
z3}hLsF4(YHB%Rjh(QmYk!E(wWKBx{^w$bmw`_A>@ou+d
zB#q+d{af5ZQRbbgV4v_g)dz}$sx8^u#;~-cv5O@PAf^fm`DZ&0ALtonZ+VWAATtOgICh8
z*{9NdV4K;@_O;!Vb;16cN2@iCXhFLRpX^9WHI&UeeHk=E_wQJJasde&ka-GRIy>!8
zvJ}Ybz^@}jKx+jWT(O#aMqVtNojGyaH2n6de@c_<`S_xQmvC%!{k`=YdXDKj8N4`a
zt@s7_b?5?nypVWKSY))3zlxyf+$UYwRAT(QY-{uO+`w%wmOkZgK>KV+FHve&tL{JI
zX`L2oF>={EI{p1F=pzc&OcKa0jLiznHlUOaD*qUG`mxrQ^r!c>5S=!E(M+|>(>R5#9J
z*-^-Mx_Y6@eSD}ey%Ia|@j)!{uXZA2PU|S!*4jzwwD7EVMEICi*X$gTTC>&?qcX+|
z6Kxn5mYM7qIKh!zlt=>4B`sjNP-7@baXM>f6{j$EMr`Kheq&);{>bBF$LwYdt%^>~
zJJ;6x?-;$kp=Nu?-%ukO!vNgea2KzYYah!M=hbl1vpEh-JTJne!pLe%HP<5Fx9nO<
zt7oB=vhj;L#<_WXy8OA7mJyW6NL75**Ow{%?@LU&Qq?v&LniwFCUtQoz@M%t
z-W6!EFz}vbMBITS8Q2R;t@~^uKE1Fc|An1mt?}drEi2V`Kt^x1+dso!zaDel+jeOD
z6#J(5{2E>)Jn*kpS0cu{Yn#u~zdK#NB(;A38xOMnbh`3t({?&-1D!4{u?;^PkBU8*
zgS@DqM=k4%*^Xh*%ZB55+OW0%m>75TVd0O%Gg{0y8lyjLE*d*D)4eFYI=O_&(EL~7
z3e&W6j`mpHpP0%Im3s-$25-QSW18rHXg+3H1X=0^`d>7kg0p6{eiz0356yQ%hdHm|
zu|o63zaBatl)huN-x5gUB4(h*{mX~|G>y)4ghAc17UVS%R|#RwA|=?oa5lPKsV0W#
z8szVv=jh$P*D`x*LA*14dWmK+FM+EZF{EkFjEBU#B~!H_UkE->*+hMrF_TnG3=mde
z7{xd#fht#WpmB*;4$L~L+{fFUYurzk?buE(7{%e7u_4+|X}MP`b>uQm-d0;w!oHW(
z`UQ!ky@ipKq+t+f%b4hxrLH@eDkE8ryGRSUY4q!XZ+lxI8MoY)RF_gZU~;`f4XxIe
z#%miWI?XE>BnU7fsSN%Zf&;G=Y@|wX9p*C5^$s7eBe*If8)zn<_5Sh25R`UNj@iqT#NBL6nBAq%d*JCiPt6{KB)Ktq;BH3+|#nIo%Bx
zzlULrl)*bEXSM-|O~??))rq9scu#OgMP0y1?cP2sm^%FP;UpVF?Wrurxh}>A`l&3Q
zX&-OdZckUnCvn?(k=+f_grKq6L?dJVA-M9)fN1+5q1mp6qfu9EarTF8cgnPR#p{R8
zyH0kfzlt5)oV)XZAa=nL1qvUWCD^Ab<}zxf$6<+K&L
z!K%heH@&|D`?^m(4yE_a%G26D($h0IHhxzX3HWLyTeUw(l#W5Hq>J
z$ZEObi6q_cxT%bb7&tP=CWl^+m+#LA3G*xQO+V{uJ$c}v@{J!JJ>xFVXc874vDCeo
z05W_q30MR_{QFYZ-wmpw&p1vjLGwb>D2!yjiRsbaE_esI`
z6@5sIa|w1V$v-r7Lm4m$P?t8Is|SH&?HysMF67gNByR5&1p5>dOct7N%|@36d%a9?
zaT>3A?GQ2U{*T$7ug)$h7k8h}owxI?d{Hg}oMn@6rM|~pP1*>Kf5GJ(hZfYj0|X;e
zz!D(Ep{7HnKIyd1s#_sLy+YaM>#mN&E{bXDpuX_)i+)uxqT!xm8u%0SQe3dLlOr`&
zBam|d6}v#TnWrmEsYh|#8S5&GeAA`FSSS2%)IBRL=$)vms;MY1Pu8v)^86CgTp2#>
zO6l`FzW=q|;wq36t|q;Mwm=@J73gXmM#b}-Z?$q!ZAxWHf}NoSxY)7!Zi?&aImfN{
z#N*BdXG*9BpH-{5{)pv0o(lL9_<+<~loZAt8fxdo0|AYYK3_b**Nh@cGx=rf2B&V`
z`e$Qj@OVjjVd$2)arcCuUjIxf^qPm2lZAugRY>j
zp42pA)`F?BjH5{4-ZpIGI>g@p5l4G)CV6k2D{tw|*!z&cFx`NFr!Nz+BY6=#+G$Ai
z*HK}C;5^qwv|gRRn@$47i(Hk^kCoAe-Rw*}88KkR>6qe``=t6fEMR>ki%-TYWMxh~
zyj-GP5Ef~whj-tHYgE#DDa>k+Boy=hf%yz*sOxQQ6xpX_r6H%}74^%n#MvRSU%B{4
z5&LpzrhbUyoe%kOxi8P0us1^0ffi_EAy~Z&E?RV?fs=F>WZ!5cJc+HEYv^Ku8sd15
z+silfO-Dklo?UQumA=9gf7Ww^!Xca?TbY5
zjVS6$R31Xv3&}=kt`wHLjA++NFvb&YvVBfRw_Mcix8LO~)~B>{zqkFKjA!cghSGPd
zeexpl0ZLc0qS;}<62(c~qq4v`!Udq57BhrhIAzTu6nC}_8>3WqS8zx%YvAVS#=E{s
z-)=5E(l}PLu+Q1&1od#c)K}|C{flQGS&FB;*YlcYYD}eNv4Z-JB~no;H#uFQ;(j5<+d@7bpwYJ%bt*sCdj^^w`hW
z`~cAxDiSPh?f0xbFfQn+_vofCluf~N1BTA#0tY+iwtMO2S0lVLcYjNqKIFcK_ua%-
zbi;23xZ%!rf&4%#j&lM-V~Vusgpe~_&1Sm7)Ksp(fl;#nCruyEymsa6d
zLmt#K&gW)L$Z&*DJ5h9zHrG(F1v`e7_k}10<^E&q1AgPhJHvuG%2qu@D>vWFs#5$G
zHy20HiGdXitT#AE_?&n(`26W`5E%VQpaRx|30Tx)gK=RaZb}|?M>urbfe9h<|RN;
zpm?)09Vj_km+@Vy-^*9!shuY@qAXe*_R9~eFe6g$OOo-zG6=NM0hPrL=cYsP3v^1?
zJ(I1+X+#Bkk_>FddV@XM{bDR;Yel1ccc^EGSMo%rm4|xMx{a1{fdV)4aFD^TP2u1L
zvTNEf6eB8!M~>gVgHzb7qt(nV8U#a#E*IYmE1$Q9dsW)J=1b?dHVs=}9e6}TtGBfd
z8Tr>>vX>E6Xu;MMUaDId2VFsKC9Oc%G4x>F8BQ=g4$Mp(>oY;q>oZ+@EGJJ?FFm2W
z&2qJ~{l_wd$SVswP?fIzd86zuzhHswe{k4f_=$mRpS?$|7-+*GNuQz0$rc^6riT5T
z(x>PfIw7a7snw*4Ih}mDTl@4q?)wdK-5pz#w#I#*DFlYo1aU%V1@d9dS@OL`#xICy
z7*#eh=)?JgBX@)KYx_KoOAhGilooD~(!Q(cCTH8WC9%c4DUk~5kz1N1N-J*I`o!b_M8U${OJVE-4GXs5cd>&Mzal8|i{g
z9ip@(A*gOSs%FcGiGUr&SK+Tx&zhV7PdAI+S*VVG-Ym)*r<3G;*BW@x2<6k6qD7h7*
zO!zqu)@_Fzk_CPnYqF7Wr+I;fkGVmQ5^a4Oe~&J#UK6BXAI=}{
z*xRw_4ghASFuP$D>C2zNB_mP?u5yCRsbv1FDRL={G(r~m^j8NC(uQfKzUl_Nn3ty7
z6AG_Wb}j6&y7ptjI_ZGG
zPCP(@8#Cs}cTqKpZFCy>tpf&0T0=a_#Wta%apIHxvj^B~-i8?7vX6!W!wJEMO7V=p
zUl6K#e=P*3+mx&MGBh6m47#gt`=_OaybQKcu#D)j#&{zIMwye#2z6UPj{G0qc8DQV
zwq~usCIz#N@S}zHFW4Md`czqb@y#FD^hjs2T7S6w>@wmdz&jKV
z+%>`vSUsQxHrR2yL^4#^#st(L5{3f1!xwXTUFE`D@EH(`{4p7oY&&6RU5#Lf_G%uP
zL`OC37xhuM5Y70eqR)^F1u0MHXm!5*W*~UPrZI(eO6
z^u*ez+#K^F5U~1IATPG;0ow*7mtX#P*N;vPs!D*B>`#?{S$P%+CE*K^lSO}hOYeZEKR2z
zobKF&p}OaX@KxX%Uw|z+EhF$4cz2;dp3eHKoNq4(?mMt;6Z-ZNWi)|dqxMK=jg?#3
zl)qxC=a#{_mJGXNHFk?N7AhMju4yfD{}asq4+n3WnLvEeJQ}RXSB2Yo@~|m){tRcK
ziH@3<9fhgQnODd@TQl#an&a0_g
zBMNG!JcPd+ZgzyBJf9_c^tWy!cUJhC#_V9+CPy20ohM%nG2ZEZj-<~ORf+xSyqVJC
z@y)5bpo4!B`xQU__p=Oa9M&)6^@H(O9Pd3hdS)O0sP}eMJqOF)&4h34rZRU@BCLIW
zyPT`#>qNHGt~ff==9D{r*h$2BWwHDYrX8z)$ft@VI?0OO32I7&PF5&r`5CTH><
z+DL-a%p}%OHykM09~SmbV8h(v`KIsZxk~eZ6km=;yyu4dKWmPscwH`$ZmpVJEdS4S
z{riB51?D%5hSdz;l7}bTp^X?{^l*py5J!rMi5?gAzv`p>C{b!!Mw}zTp6_I|D5O|H3grL}smDes(<
z>N2w$=1Dl47FuL7l7m
z?taEyG>_hFr@~&(7MkC7C?jy;
ziwp3afqO>hxnmBTaPY4k@a6KO!YE`$<
z0wd%1n=ewe2WkKjqkGMt>>+&svl!@zQh*pgfQ{|JvvWC>E%fpMfgZk*58iPxLA~MN
z2Z@>t_H|eLN&Z>!^0}E_{#n-rok^DZ+_#Q2W#ymbyCz$w=qT_71gJW?sVkmZGp-+3ZL0pdmraZjb>lba#`%wJ6rF%teCeEum5G2RRc^0HqLIlbM>vCI
ze;H9OaA)G-lNbuciMYsf{fd889oI8AIHaEpDgf)XB)`WLnyTE^WdwT>$RZbL`W)1F
zoC;dNGN7*qam@KR83O(c#{#H!tKa~CXGKpAA=lN~>CVhTzWt|({vOexa(AO@b!}3Y
zpHzK>DSYRDbBO=urNapD6LUJOiv|tEgzfV1doKoVXc)_FtKnj>J*gI8Ol6btlhWAW
zOPMioP9ygFvB@
zCnik6c+5(Yx*#RsvXOuS+)p5bBqu=B3n#N4R%JuQiwg^i`xbM;H%(wi%=NG7PLaoN
z2UfW`fJ}phro*;?p<%j0jfXLe6sovLD!i}-MRU%l4vW6zy9V6(kS1y_aP|Q`L*2pf
z?0hIiI#PL0w+IXoHy))jvR8Og(CHcbA?hYm@Nrc_OP6@;;VTJ#dEYrh&Qpgk)xNHc
zYm!e0skb%mYxH~ojj#pU*47?0>+)*H}cB8lcg
zf;dz0PJKuroWYolph!hOVqPqKI>l)Z_X`d*ZtXL(wYCf0P~h^=Gto}<#r3NrTg!Lf
zrdopGA1bUi3y@ay*C9?>9E8Wis*~+Dm?qTb(gV`_aWM@NT=V)&(gSpWZT_oN+p5G-
zD>gd+FRM2cLj?!B!3$!Wtkqj-JQU|BZZL%R02sfx=o6-@jNP%qa3qOjkQ>aI{4FXR
z9dhgqmnjPVrB2JULn(&dtshfH^3(p23PU|P@Nr6C?U9xE{QuDI|6|r%{xg^uDF5>f
z#~IRYq8yy@uWvZ6%)MJO{jdD%8;;D>53g?btbD^Uxw>CDZRoVAN*yu*s-6X=f0AHN
ze+z(<9l|whUs5l4n!&3zOdvu<)739W_E;%>RdcXAtevf%w&wOL?L@t(Lvu_2e=hZ3
M!{Ptc`^!WBADBZ#_5c6?
literal 0
HcmV?d00001
diff --git a/doc/imgs_hme/hme_02.jpg b/doc/imgs_hme/hme_02.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ecc760f5382bfe3d94de6141379f6a5a196e8430
GIT binary patch
literal 4928
zcmbVQXFS|nxBlw}Q6gH@FfyVf(TO3^iHIY5FnUA~hQVO;L?_B2hd3m955dvKFv^S&
zoaiA$jYORpC8N%Da^Cm8w|=;H@BMA>-`abx^{i(V$~0vGV85$ps0UC{0RYwc1SqqB
zHo!eNT)M!*%*?`fk(2F`;ALSU!OH>yBH-%~5m9L|0Rai+YtlDm
z6%-VNA*$*sa?tDY3UZV#fSD1nrs}4m;s7YUfbh9+I;x)o_{X54rlF;yXJ7;|F`pl3
zVh5lz5;HP?&B}iHTTX6aQE^FWS$Rd}yN1T5Kbl)w+q!#t`*8UF_XDG2;}erp
z)1PJ*mzGynzpj1zzD^|V?(HA^ARiu4W&p;&)OH&V-T^AdW6)B>kf-kEHw!dVm``;FfNSNJ>;LzY$&!=u7iH$d&j!mmM
z3g;wMt!zV8-TW$VrZQN*WzGlHJQZ(lNi=Xz7%&t_v`CM_(h6xZB%34tvrMi(pI|^l
z?u5k+nF!Odli%fz2alm=2=C4i9*19Low8_2bpCs$x}kbS0jDpIrM}DB+IDr$w8iKm
zj&BYnxcPqK9SmRPf!vnBmqKBG>$HPr@k`GQ^8LmwThq1)3h+)EaoXCNJ&W@0kw57h
zzwr8+x-YN0;K{$N&=GS0rvMpo5F#ba!%kg-d(-k!|e=
zC_kIw7t;5Ihq)I-`1~8WRIk$}MKc#h7388h`?8ge+>@IInN?2OZSA#|c+KEyrml`f
zkjzMihxl^@RER`-R>jwBEoFGD`ljPAXS+N50yG(h%UoLp!akY$V`$lhBvpvgAKXdu
zhm~a?n4smUi^B`tb_-OytQ}~LM2WYK!&U{
z#$fqlT@DSyS;Y^Hu8c$PCHaHnhvPhHgW2Gb`F6T~%JdLHUF5$5&t7~dx=Biy>ihi7
z>`?8pFQ565MOHovH|1FBW|PCW+BnC<1>-i4E3{pR3bq69csk!@&x^ixyeW0vHvg52
z6E0Y69ZCUu+q`ZmCpLv$pDTVW{q|9FO0h`2x!>zNKtAf5)b&KkzUTY6obp+RPw!Xf
z9u8z`9c#|4YnsJiu{1TI3|nF@aN*>}R|hY+8$8bL`Lv*hGgO^Y(IC#<1I49tX#VXX
zT-(A7CC!9_9tR%e;tWjy&yh?^HubE%IhTFcnkTwGnqFf8e1y!~9SVTU^!2_ohStIi
z!OkBx?;C%;Q1aHnf33|?`~jdMWr3rr
zn+GETQ<}s{+|t+*rbL0bnYt};HFBFFgmF?1`F&T6HB4O==A(Pbsu87oEYw60>&jh$
zg&2A?o0y{jIG-U%fC6x}U`Mq3M@)Era+H)y&pg)@$_nX?P?rn+#Iy4&01l>28TPIE
zy^K#xrR!@m)RfTP8_r6Y_@lW-rWmB&)qE^BgfVv(h@REF=LA}OVY-ugJyB{(LkRg2
z>%N%Xv_Jv4p9F=3z+g4_99rqVmmF!ADlv?YTl>HD@U02lPVqutbE)p(iP=}{&^A$8
zA2mXhSR1vzE0W0bW(o;{9{D4z;EOlnc!VGXpLo>WsZQ6XQRkrKU@eozTY9dp$6clL
z!@l7ZpuScp@f#FG0mRvQ91ggeD8R=mdj{N>L#dga({>8*9U8FqH#oS~3usT=3(5ZG
z<9lJ@Dt)ny>D^;PF&DY!mHHWRxQpzD3-I)4gSE4(NO~Ut`KwL!J3Egb5^fDG2$8H|
zCRrTwTRyyV8BHLACSiB6G^qxuW=6D1+S%TlyUH+2Ti0TY4L?Tp&qg^{wg*e|x~ROB
zDm*8&)V~O=i~>wc-ah4*L7sBeFYYjHV5HDWZ9+Nfn@K+gfQ!&*y)_k!;j!fOJV$@?
zeS67c?TPCOL-%KlJO_h5Nt;ZVkVKN%^=aAlG%FFNu!@CF5E8bru$G82tLXv1%pMW%
zD9RC#eh}=jnpTmV39e+_Ol8o_JG8%Ko~Efd(QA1?G*lq97%Zn4$=9m@RPVvW#0?EG
zGS3+o1&H_FE7%flKAcSQ|9U87DYxAYZ|!W&TkC-DZwWUtOz)eJbBy?XUt3(Jk{wwP
z#1M(M=7cZ`(Cy9B9mq3(rM`Uvt$7d|T=UT`KGLNj`oYzEk{KXpqcr4#6)~>YhiM~M
z{On~BnZ|ewW@SqMI_cuw#9DqeOATfTELF?rVD*#X^DhXHWzhl4{9#v$Go*C&^0%!I
zwhdnL;Y=R%S-rt+E<}2cK|P4D5p2X0LR`d8NO+Yny<-!VZmM3M=A@r#e1V#`k(+!wdl^vV8j
zFoU)R8MZ^k)YHd_$Bc9dACj3!cW=IBs~MrgUYJTntNXw&`D+{z3EDx6`{^&Cr47R+P%}to?H5bT|5FhlPLL{r;fiRPtsbInjY_^6nvAk}4CCz5m
zw&`;{gx;~bsv*$t^-DgnJZgGS|K9P9M5~z}iPdOv!mLhX!{~Ag-S$xPKplf+Fzub!
zDH>;*YOjzuylCsljC`-sQCykR9h<=5t{l
z+1?wNEBNd$HFmlT_lnFUuYsvUu5H06Kp{$!NY{)F3+h(BIE}Axcp`+j=jOtzi);m7
z6B8BH7v%GgNrTe#Y4D>7FXFfN)DA^5)~6o*h)U3I<-&o0J*S!ajcS5V&yXiR@Tj;=
zeuif(oo^{uE9IKwv4x7*?JL5vH0dK{ttVizEd^lIp#U@UVxFAUb6bf6BFDL3BtEQz
z7W(fv2F5(s>tV;C&6awP&|f$MdMCxxS7R45=4_%sKEHpY!vkJ!DjOMwE4fP>>zyK)%UN8@U9y}taq9ZXNc#j}k?*z2
znqe^|PUHV8e;k<1aC?Rd>i57dH#CvF5kpWktWI#k9!@qsDfTyr+gysY~
ziH?8=b7vf+i|0xi%Zxua=Y~8lZ9F);+^%CKp*tqt?hX*%$z`Z+BnRv%4sC>7Em|CM
z4ue=WAFJ>2qxzexwm!d!lQe*`BNYo^^#nD1u5yf`ptGP~M8m`RKK>dU9tCxaG%hbD
zfgw;ApAH1KhCJDfur#cCBITmdQI)r}j&Mb+q`D&bDe9GUdeW5~v}8}qD${gOlvo{Qp5c~Lr##6OAkTPQtO6P$ev
zNuOvKiFG(}AkLSS*-gVdF>W>ECM@p{4Cq%JwOO9^_d`m2_h(S@g!!f_>DGym?7y*u_?c{R&7%aS
zW_u_AC!%x?l8!phmU2ujl0b}%s^4hyZNUDKA$xKt@AL52QA+`4JI^xlu!#RccdyB_
z=aD_s|&KtYj*`^GhY?M_-Fb$$>CmjOp^*xXbwSkJLqY+D#7~?8pxui?zEv`n=}%
z_6Ppnk6MjJ1!Esm!q?RQufX{R6
z@K&+yQd?Wc;|w1DW>JnP?uC`;5_lcedtPG4!(l-+8_c`r_EZvcXo6_!SEzT6MLg?G
z11ycY39PhYVo2F?ywNkZWptmpo$nKqLUx0~Ju8ugERkgqfl#Ja;Y7t3WCO&hAL%Ge
z>!{m$_4!UoaQO0$1vqbO7)cwTxP6pYz=#0{!=%UeZ*0ycp^~a>AO=@k%(2u4xUUhv
zI`R%Lv3_2?KPrYj;#VC>=dNm#rAtMV3y*gGp}CDt8fgL3OsChwj}S&Sw$vmv!KMr#Vl>TN^mM
zQimp#gG<>vQ4%6t0~y7&)=%H&L!Fjv&`wK(ry-%pt@*9-_+K(OEVLxvuLqXDUxq+g
zv*KVm1TmdBYw{}}g)*AYxEzfV$bpZq2
zDLG|2_G{fvwpsm$@ylNysqodp5?kG?o;(cenCQrU6XZ=dqqWmod
z3H>?#WXIPyRp~Fv-~-}c<(gDA;YY8!XHPiS#EI(T?2PY6g|`M8^o;cEmE^C``dJD3
zX_(^eNQm=WuaR5XBCE?r0|b?fe%A<%fv?%t?YcNe8O%(xu#RuH9KL`ueV*i5C`j1h
zv}hw*3%0!&K!ux5XT1XsFq>};`B{?wqYoqb_0_rY6npGELY~hhZ$7_l
dWRrVUD$Vz98a$0v{rUgYRQbPkaHJ@o{tbqwXEOi*
literal 0
HcmV?d00001
diff --git a/ppocr/data/__init__.py b/ppocr/data/__init__.py
index b602a346d..1f3de63de 100644
--- a/ppocr/data/__init__.py
+++ b/ppocr/data/__init__.py
@@ -37,6 +37,7 @@ from ppocr.data.simple_dataset import SimpleDataSet
from ppocr.data.lmdb_dataset import LMDBDataSet, LMDBDataSetSR
from ppocr.data.pgnet_dataset import PGDataSet
from ppocr.data.pubtab_dataset import PubTabDataSet
+from ppocr.data.hmer_dataset import HMERDataSet
__all__ = ['build_dataloader', 'transform', 'create_operators']
@@ -55,7 +56,7 @@ def build_dataloader(config, mode, device, logger, seed=None):
support_dict = [
'SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet',
- 'LMDBDataSetSR'
+ 'LMDBDataSetSR', 'HMERDataSet'
]
module_name = config[mode]['dataset']['name']
assert module_name in support_dict, Exception(
diff --git a/ppocr/data/collate_fn.py b/ppocr/data/collate_fn.py
index 0da6060f0..fec1e895f 100644
--- a/ppocr/data/collate_fn.py
+++ b/ppocr/data/collate_fn.py
@@ -70,3 +70,49 @@ class SSLRotateCollate(object):
def __call__(self, batch):
output = [np.concatenate(d, axis=0) for d in zip(*batch)]
return output
+
+
+class DyMaskCollator(object):
+ """
+ batch: [
+ image [batch_size, channel, maxHinbatch, maxWinbatch]
+ image_mask [batch_size, channel, maxHinbatch, maxWinbatch]
+ label [batch_size, maxLabelLen]
+ label_mask [batch_size, maxLabelLen]
+ ...
+ ]
+ """
+
+ def __call__(self, batch):
+ max_width, max_height, max_length = 0, 0, 0
+ bs, channel = len(batch), batch[0][0].shape[0]
+ proper_items = []
+ for item in batch:
+ if item[0].shape[1] * max_width > 1600 * 320 or item[0].shape[
+ 2] * max_height > 1600 * 320:
+ continue
+ max_height = item[0].shape[1] if item[0].shape[
+ 1] > max_height else max_height
+ max_width = item[0].shape[2] if item[0].shape[
+ 2] > max_width else max_width
+ max_length = item[1].shape[0] if item[1].shape[
+ 0] > max_length else max_length
+ proper_items.append(item)
+
+ images, image_masks = np.zeros(
+ (len(proper_items), channel, max_height, max_width),
+ dtype='float32'), np.zeros(
+ (len(proper_items), 1, max_height, max_width), dtype='float32')
+ labels, label_masks = np.zeros(
+ (len(proper_items), max_length), dtype='int64'), np.zeros(
+ (len(proper_items), max_length), dtype='int64')
+
+ for i in range(len(proper_items)):
+ _, h, w = proper_items[i][0].shape
+ images[i][:, :h, :w] = proper_items[i][0]
+ image_masks[i][:, :h, :w] = 1
+ l = proper_items[i][1].shape[0]
+ labels[i][:l] = proper_items[i][1]
+ label_masks[i][:l] = 1
+
+ return images, image_masks, labels, label_masks
diff --git a/ppocr/data/hmer_dataset.py b/ppocr/data/hmer_dataset.py
new file mode 100644
index 000000000..d5d92f264
--- /dev/null
+++ b/ppocr/data/hmer_dataset.py
@@ -0,0 +1,99 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os, json, random, traceback
+import numpy as np
+
+from PIL import Image
+from paddle.io import Dataset
+
+from .imaug import transform, create_operators
+
+
+class HMERDataSet(Dataset):
+ def __init__(self, config, mode, logger, seed=None):
+ super(HMERDataSet, self).__init__()
+
+ self.logger = logger
+ self.seed = seed
+ self.mode = mode
+
+ global_config = config['Global']
+ dataset_config = config[mode]['dataset']
+ self.data_dir = config[mode]['dataset']['data_dir']
+
+ label_file_list = dataset_config['label_file_list']
+ data_source_num = len(label_file_list)
+ ratio_list = dataset_config.get("ratio_list", [1.0])
+
+ self.data_lines, self.labels = self.get_image_info_list(label_file_list,
+ ratio_list)
+ self.data_idx_order_list = list(range(len(self.data_lines)))
+ if self.mode == "train" and self.do_shuffle:
+ self.shuffle_data_random()
+
+ if isinstance(ratio_list, (float, int)):
+ ratio_list = [float(ratio_list)] * int(data_source_num)
+
+ assert len(
+ ratio_list
+ ) == data_source_num, "The length of ratio_list should be the same as the file_list."
+
+ self.ops = create_operators(dataset_config['transforms'], global_config)
+ self.need_reset = True in [x < 1 for x in ratio_list]
+
+ def get_image_info_list(self, file_list, ratio_list):
+ if isinstance(file_list, str):
+ file_list = [file_list]
+ labels = {}
+ for idx, file in enumerate(file_list):
+ with open(file, "r") as f:
+ lines = json.load(f)
+ labels.update(lines)
+ data_lines = [name for name in labels.keys()]
+ return data_lines, labels
+
+ def shuffle_data_random(self):
+ random.seed(self.seed)
+ random.shuffle(self.data_lines)
+ return
+
+ def __len__(self):
+ return len(self.data_idx_order_list)
+
+ def __getitem__(self, idx):
+ file_idx = self.data_idx_order_list[idx]
+ data_name = self.data_lines[file_idx]
+ try:
+ file_name = data_name + '.jpg'
+ img_path = os.path.join(self.data_dir, file_name)
+ if not os.path.exists(img_path):
+ raise Exception("{} does not exist!".format(img_path))
+ with open(img_path, 'rb') as f:
+ img = f.read()
+
+ label = self.labels.get(data_name).split()
+ label = np.array([int(item) for item in label])
+
+ data = {'image': img, 'label': label}
+ outs = transform(data, self.ops)
+ except:
+ self.logger.error(
+ "When parsing line {}, error happened with msg: {}".format(
+ file_name, traceback.format_exc()))
+ outs = None
+ if outs is None:
+ # during evaluation, we should fix the idx to get same results for many times of evaluation.
+ rnd_idx = np.random.randint(self.__len__())
+ return self.__getitem__(rnd_idx)
+ return outs
diff --git a/ppocr/data/imaug/__init__.py b/ppocr/data/imaug/__init__.py
index 93d97446d..a64092286 100644
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -27,7 +27,7 @@ from .make_pse_gt import MakePseGt
from .rec_img_aug import BaseDataAugmentation, RecAug, RecConAug, RecResizeImg, ClsResizeImg, \
SRNRecResizeImg, GrayRecResizeImg, SARRecResizeImg, PRENResizeImg, \
ABINetRecResizeImg, SVTRRecResizeImg, ABINetRecAug, VLRecResizeImg, SPINRecResizeImg, RobustScannerRecResizeImg, \
- RFLRecResizeImg
+ RFLRecResizeImg, GrayImageChannelFormat
from .ssl_img_aug import SSLRotateResize
from .randaugment import RandAugment
from .copy_paste import CopyPaste
diff --git a/ppocr/data/imaug/rec_img_aug.py b/ppocr/data/imaug/rec_img_aug.py
index e22153bde..bc7fbc604 100644
--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -465,6 +465,36 @@ class RobustScannerRecResizeImg(object):
return data
+class GrayImageChannelFormat(object):
+ """
+ format gray scale image's channel: (3,h,w) -> (1,h,w)
+ Args:
+ normalize: True/False
+ when True convert image dynamic range [0,255]->[0,1]
+ inverse: inverse gray image
+ """
+
+ def __init__(self, normalize=True, inverse=False, **kwargs):
+ self.normalize = normalize
+ self.inverse = inverse
+
+ def __call__(self, data):
+ img = data['image']
+ img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ img_single_channel = np.expand_dims(img_single_channel, 0)
+
+ if self.normalize:
+ img_single_channel = img_single_channel / 255.0
+
+ if self.inverse:
+ data['image'] = np.abs(img_single_channel - 1).astype('float32')
+ else:
+ data['image'] = img_single_channel.astype('float32')
+
+ data['src_image'] = img
+ return data
+
+
def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25):
imgC, imgH, imgW_min, imgW_max = image_shape
h = img.shape[0]
diff --git a/ppocr/losses/__init__.py b/ppocr/losses/__init__.py
index 6abaa408b..6a34dd1c8 100755
--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -40,6 +40,7 @@ from .rec_multi_loss import MultiLoss
from .rec_vl_loss import VLLoss
from .rec_spin_att_loss import SPINAttentionLoss
from .rec_rfl_loss import RFLLoss
+from .rec_can_loss import CANLoss
# cls loss
from .cls_loss import ClsLoss
@@ -71,7 +72,7 @@ def build_loss(config):
'CELoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss', 'MultiLoss',
'TableMasterLoss', 'SPINAttentionLoss', 'VLLoss', 'StrokeFocusLoss',
- 'SLALoss', 'CTLoss', 'RFLLoss', 'DRRGLoss'
+ 'SLALoss', 'CTLoss', 'RFLLoss', 'DRRGLoss', 'CANLoss'
]
config = copy.deepcopy(config)
module_name = config.pop('name')
diff --git a/ppocr/losses/rec_can_loss.py b/ppocr/losses/rec_can_loss.py
new file mode 100644
index 000000000..a6c655e0e
--- /dev/null
+++ b/ppocr/losses/rec_can_loss.py
@@ -0,0 +1,61 @@
+import paddle
+import paddle.nn as nn
+import numpy as np
+
+
+class CANLoss(nn.Layer):
+ '''
+ CANLoss is consist of two part:
+ word_average_loss: average accuracy of the symbol
+ counting_loss: counting loss of every symbol
+ '''
+
+ def __init__(self):
+ super(CANLoss, self).__init__()
+
+ self.use_label_mask = False
+ self.out_channel = 111
+ self.cross = nn.CrossEntropyLoss(
+ reduction='none') if self.use_label_mask else nn.CrossEntropyLoss()
+ self.counting_loss = nn.SmoothL1Loss(reduction='mean')
+ self.ratio = 16
+
+ def forward(self, preds, batch):
+ word_probs = preds[0]
+ counting_preds = preds[1]
+ counting_preds1 = preds[2]
+ counting_preds2 = preds[3]
+ labels = batch[2]
+ labels_mask = batch[3]
+ counting_labels = gen_counting_label(labels, self.out_channel, True)
+ counting_loss = self.counting_loss(counting_preds1, counting_labels) + self.counting_loss(counting_preds2, counting_labels) \
+ + self.counting_loss(counting_preds, counting_labels)
+
+ word_loss = self.cross(
+ paddle.reshape(word_probs, [-1, word_probs.shape[-1]]),
+ paddle.reshape(labels, [-1]))
+ word_average_loss = paddle.sum(
+ paddle.reshape(word_loss * labels_mask, [-1])) / (
+ paddle.sum(labels_mask) + 1e-10
+ ) if self.use_label_mask else word_loss
+ loss = word_average_loss + counting_loss
+ return {'loss': loss}
+
+
+def gen_counting_label(labels, channel, tag):
+ b, t = labels.shape
+ counting_labels = np.zeros([b, channel])
+
+ if tag:
+ ignore = [0, 1, 107, 108, 109, 110]
+ else:
+ ignore = []
+ for i in range(b):
+ for j in range(t):
+ k = labels[i][j]
+ if k in ignore:
+ continue
+ else:
+ counting_labels[i][k] += 1
+ counting_labels = paddle.to_tensor(counting_labels, dtype='float32')
+ return counting_labels
diff --git a/ppocr/metrics/__init__.py b/ppocr/metrics/__init__.py
index 20aea8b59..5e840a194 100644
--- a/ppocr/metrics/__init__.py
+++ b/ppocr/metrics/__init__.py
@@ -22,7 +22,7 @@ import copy
__all__ = ["build_metric"]
from .det_metric import DetMetric, DetFCEMetric
-from .rec_metric import RecMetric, CNTMetric
+from .rec_metric import RecMetric, CNTMetric, CANMetric
from .cls_metric import ClsMetric
from .e2e_metric import E2EMetric
from .distillation_metric import DistillationMetric
@@ -38,7 +38,7 @@ def build_metric(config):
support_dict = [
"DetMetric", "DetFCEMetric", "RecMetric", "ClsMetric", "E2EMetric",
"DistillationMetric", "TableMetric", 'KIEMetric', 'VQASerTokenMetric',
- 'VQAReTokenMetric', 'SRMetric', 'CTMetric', 'CNTMetric'
+ 'VQAReTokenMetric', 'SRMetric', 'CTMetric', 'CNTMetric', 'CANMetric'
]
config = copy.deepcopy(config)
diff --git a/ppocr/metrics/rec_metric.py b/ppocr/metrics/rec_metric.py
index 4758e71d0..305b913c7 100644
--- a/ppocr/metrics/rec_metric.py
+++ b/ppocr/metrics/rec_metric.py
@@ -13,6 +13,9 @@
# limitations under the License.
from rapidfuzz.distance import Levenshtein
+from difflib import SequenceMatcher
+
+import numpy as np
import string
@@ -106,3 +109,71 @@ class CNTMetric(object):
def reset(self):
self.correct_num = 0
self.all_num = 0
+
+
+class CANMetric(object):
+ def __init__(self, main_indicator='exp_rate', **kwargs):
+ self.main_indicator = main_indicator
+ self.word_right = []
+ self.exp_right = []
+ self.word_total_length = 0
+ self.exp_total_num = 0
+ self.word_rate = 0
+ self.exp_rate = 0
+ self.reset()
+ self.epoch_reset()
+
+ def __call__(self, preds, batch, **kwargs):
+ for k, v in kwargs.items():
+ epoch_reset = v
+ if epoch_reset:
+ self.epoch_reset()
+ word_probs = preds
+ word_label, word_label_mask = batch
+ line_right = 0
+ if word_probs is not None:
+ word_pred = word_probs.argmax(2)
+ word_pred = word_pred.cpu().detach().numpy()
+ word_scores = [
+ SequenceMatcher(
+ None,
+ s1[:int(np.sum(s3))],
+ s2[:int(np.sum(s3))],
+ autojunk=False).ratio() * (
+ len(s1[:int(np.sum(s3))]) + len(s2[:int(np.sum(s3))])) /
+ len(s1[:int(np.sum(s3))]) / 2
+ for s1, s2, s3 in zip(word_label, word_pred, word_label_mask)
+ ]
+ batch_size = len(word_scores)
+ for i in range(batch_size):
+ if word_scores[i] == 1:
+ line_right += 1
+ self.word_rate = np.mean(word_scores) #float
+ self.exp_rate = line_right / batch_size #float
+ exp_length, word_length = word_label.shape[:2]
+ self.word_right.append(self.word_rate * word_length)
+ self.exp_right.append(self.exp_rate * exp_length)
+ self.word_total_length = self.word_total_length + word_length
+ self.exp_total_num = self.exp_total_num + exp_length
+
+ def get_metric(self):
+ """
+ return {
+ 'word_rate': 0,
+ "exp_rate": 0,
+ }
+ """
+ cur_word_rate = sum(self.word_right) / self.word_total_length
+ cur_exp_rate = sum(self.exp_right) / self.exp_total_num
+ self.reset()
+ return {'word_rate': cur_word_rate, "exp_rate": cur_exp_rate}
+
+ def reset(self):
+ self.word_rate = 0
+ self.exp_rate = 0
+
+ def epoch_reset(self):
+ self.word_right = []
+ self.exp_right = []
+ self.word_total_length = 0
+ self.exp_total_num = 0
diff --git a/ppocr/modeling/backbones/__init__.py b/ppocr/modeling/backbones/__init__.py
index 84892fa9c..e2c2e9c4a 100755
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -43,10 +43,12 @@ def build_backbone(config, model_type):
from .rec_svtrnet import SVTRNet
from .rec_vitstr import ViTSTR
from .rec_resnet_rfl import ResNetRFL
+ from .rec_densenet import DenseNet
support_dict = [
'MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB',
'ResNet31', 'ResNet45', 'ResNet_ASTER', 'MicroNet',
- 'EfficientNetb3_PREN', 'SVTRNet', 'ViTSTR', 'ResNet32', 'ResNetRFL'
+ 'EfficientNetb3_PREN', 'SVTRNet', 'ViTSTR', 'ResNet32', 'ResNetRFL',
+ 'DenseNet'
]
elif model_type == 'e2e':
from .e2e_resnet_vd_pg import ResNet
diff --git a/ppocr/modeling/backbones/rec_densenet.py b/ppocr/modeling/backbones/rec_densenet.py
new file mode 100644
index 000000000..d3391d408
--- /dev/null
+++ b/ppocr/modeling/backbones/rec_densenet.py
@@ -0,0 +1,135 @@
+import math
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+
+class Bottleneck(nn.Layer):
+ '''
+ ratio: 16
+ growthRate: 24
+ reduction: 0.5
+ bottleneck: True
+ use_dropout: True
+ '''
+
+ def __init__(self, nChannels, growthRate, use_dropout):
+ super(Bottleneck, self).__init__()
+ interChannels = 4 * growthRate
+ self.bn1 = nn.BatchNorm2D(interChannels)
+ self.conv1 = nn.Conv2D(
+ nChannels, interChannels, kernel_size=1,
+ bias_attr=None) # Xavier initialization
+ self.bn2 = nn.BatchNorm2D(growthRate)
+ self.conv2 = nn.Conv2D(
+ interChannels, growthRate, kernel_size=3, padding=1,
+ bias_attr=None) # Xavier initialization
+ self.use_dropout = use_dropout
+ self.dropout = nn.Dropout(p=0.2)
+
+ def forward(self, x):
+ out = F.relu(self.bn1(self.conv1(x)))
+ if self.use_dropout:
+ out = self.dropout(out)
+ out = F.relu(self.bn2(self.conv2(out)))
+ if self.use_dropout:
+ out = self.dropout(out)
+ out = paddle.concat([x, out], 1)
+ return out
+
+
+class SingleLayer(nn.Layer):
+ def __init__(self, nChannels, growthRate, use_dropout):
+ super(SingleLayer, self).__init__()
+ self.bn1 = nn.BatchNorm2D(nChannels)
+ self.conv1 = nn.Conv2D(
+ nChannels, growthRate, kernel_size=3, padding=1, bias_attr=False)
+
+ self.use_dropout = use_dropout
+ self.dropout = nn.Dropout(p=0.2)
+
+ def forward(self, x):
+ out = self.conv1(F.relu(x))
+ if self.use_dropout:
+ out = self.dropout(out)
+
+ out = paddle.concat([x, out], 1)
+ return out
+
+
+class Transition(nn.Layer):
+ def __init__(self, nChannels, out_channels, use_dropout):
+ super(Transition, self).__init__()
+ self.bn1 = nn.BatchNorm2D(out_channels)
+ self.conv1 = nn.Conv2D(
+ nChannels, out_channels, kernel_size=1, bias_attr=False)
+ self.use_dropout = use_dropout
+ self.dropout = nn.Dropout(p=0.2)
+
+ def forward(self, x):
+ out = F.relu(self.bn1(self.conv1(x)))
+ if self.use_dropout:
+ out = self.dropout(out)
+ out = F.avg_pool2d(out, 2, ceil_mode=True, exclusive=False)
+ return out
+
+
+class DenseNet(nn.Layer):
+ def __init__(self, growthRate, reduction, bottleneck, use_dropout,
+ input_channel, **kwargs):
+ super(DenseNet, self).__init__()
+ '''
+ ratio: 16
+ growthRate: 24
+ reduction: 0.5
+ '''
+ nDenseBlocks = 16
+ nChannels = 2 * growthRate
+
+ self.conv1 = nn.Conv2D(
+ input_channel,
+ nChannels,
+ kernel_size=7,
+ padding=3,
+ stride=2,
+ bias_attr=False)
+ self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks,
+ bottleneck, use_dropout)
+ nChannels += nDenseBlocks * growthRate
+ out_channels = int(math.floor(nChannels * reduction))
+ self.trans1 = Transition(nChannels, out_channels, use_dropout)
+
+ nChannels = out_channels
+ self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks,
+ bottleneck, use_dropout)
+ nChannels += nDenseBlocks * growthRate
+ out_channels = int(math.floor(nChannels * reduction))
+ self.trans2 = Transition(nChannels, out_channels, use_dropout)
+
+ nChannels = out_channels
+ self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks,
+ bottleneck, use_dropout)
+ self.out_channels = out_channels
+
+ def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck,
+ use_dropout):
+ layers = []
+ for i in range(int(nDenseBlocks)):
+ if bottleneck:
+ layers.append(Bottleneck(nChannels, growthRate, use_dropout))
+ else:
+ layers.append(SingleLayer(nChannels, growthRate, use_dropout))
+ nChannels += growthRate
+ return nn.Sequential(*layers)
+
+ def forward(self, inputs):
+ x, x_m, y = inputs
+ out = self.conv1(x)
+ out = F.relu(out)
+ out = F.max_pool2d(out, 2, ceil_mode=True)
+ out = self.dense1(out)
+ out = self.trans1(out)
+ out = self.dense2(out)
+ out = self.trans2(out)
+ out = self.dense3(out)
+ return out, x_m, y
diff --git a/ppocr/modeling/heads/__init__.py b/ppocr/modeling/heads/__init__.py
index 63002140c..fdf5a8a96 100755
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -40,6 +40,7 @@ def build_head(config):
from .rec_robustscanner_head import RobustScannerHead
from .rec_visionlan_head import VLHead
from .rec_rfl_head import RFLHead
+ from .rec_can_head import CANHead
# cls head
from .cls_head import ClsHead
@@ -56,7 +57,7 @@ def build_head(config):
'TableAttentionHead', 'SARHead', 'AsterHead', 'SDMGRHead', 'PRENHead',
'MultiHead', 'ABINetHead', 'TableMasterHead', 'SPINAttentionHead',
'VLHead', 'SLAHead', 'RobustScannerHead', 'CT_Head', 'RFLHead',
- 'DRRGHead'
+ 'DRRGHead', 'CANHead'
]
#table head
diff --git a/ppocr/modeling/heads/rec_can_head.py b/ppocr/modeling/heads/rec_can_head.py
new file mode 100644
index 000000000..afd78ee9d
--- /dev/null
+++ b/ppocr/modeling/heads/rec_can_head.py
@@ -0,0 +1,294 @@
+from turtle import forward
+import paddle.nn as nn
+import paddle
+import math
+'''
+Counting Module
+'''
+
+
+class ChannelAtt(nn.Layer):
+ def __init__(self, channel, reduction):
+ super(ChannelAtt, self).__init__()
+ self.avg_pool = nn.AdaptiveAvgPool2D(1)
+
+ self.fc = nn.Sequential(
+ nn.Linear(channel, channel // reduction),
+ nn.ReLU(), nn.Linear(channel // reduction, channel), nn.Sigmoid())
+
+ def forward(self, x):
+ b, c, _, _ = x.shape
+ y = paddle.reshape(self.avg_pool(x), [b, c])
+ y = paddle.reshape(self.fc(y), [b, c, 1, 1])
+ return x * y
+
+
+class CountingDecoder(nn.Layer):
+ def __init__(self, in_channel, out_channel, kernel_size):
+ super(CountingDecoder, self).__init__()
+ self.in_channel = in_channel
+ self.out_channel = out_channel
+
+ self.trans_layer = nn.Sequential(
+ nn.Conv2D(
+ self.in_channel,
+ 512,
+ kernel_size=kernel_size,
+ padding=kernel_size // 2,
+ bias_attr=False),
+ nn.BatchNorm2D(512))
+
+ self.channel_att = ChannelAtt(512, 16)
+
+ self.pred_layer = nn.Sequential(
+ nn.Conv2D(
+ 512, self.out_channel, kernel_size=1, bias_attr=False),
+ nn.Sigmoid())
+
+ def forward(self, x, mask):
+ b, _, h, w = x.shape
+ x = self.trans_layer(x)
+ x = self.channel_att(x)
+ x = self.pred_layer(x)
+
+ if mask is not None:
+ x = x * mask
+ x = paddle.reshape(x, [b, self.out_channel, -1])
+ x1 = paddle.sum(x, axis=-1)
+
+ return x1, paddle.reshape(x, [b, self.out_channel, h, w])
+
+
+'''
+Attention Decoder
+'''
+
+
+class PositionEmbeddingSine(nn.Layer):
+ def __init__(self,
+ num_pos_feats=64,
+ temperature=10000,
+ normalize=False,
+ scale=None):
+ super().__init__()
+ self.num_pos_feats = num_pos_feats
+ self.temperature = temperature
+ self.normalize = normalize
+ if scale is not None and normalize is False:
+ raise ValueError("normalize should be True if scale is passed")
+ if scale is None:
+ scale = 2 * math.pi
+ self.scale = scale
+
+ def forward(self, x, mask):
+ y_embed = paddle.cumsum(mask, 1, dtype='float32')
+ x_embed = paddle.cumsum(mask, 2, dtype='float32')
+
+ if self.normalize:
+ eps = 1e-6
+ y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
+ x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+ dim_t = paddle.arange(self.num_pos_feats, dtype='float32')
+ dim_d = paddle.expand(paddle.to_tensor(2), dim_t.shape)
+ dim_t = self.temperature**(2 * (dim_t / dim_d).astype('int64') /
+ self.num_pos_feats)
+
+ pos_x = paddle.unsqueeze(x_embed, [3]) / dim_t
+ pos_y = paddle.unsqueeze(y_embed, [3]) / dim_t
+
+ pos_x = paddle.flatten(
+ paddle.stack(
+ [
+ paddle.sin(pos_x[:, :, :, 0::2]),
+ paddle.cos(pos_x[:, :, :, 1::2])
+ ],
+ axis=4),
+ 3)
+ pos_y = paddle.flatten(
+ paddle.stack(
+ [
+ paddle.sin(pos_y[:, :, :, 0::2]),
+ paddle.cos(pos_y[:, :, :, 1::2])
+ ],
+ axis=4),
+ 3)
+
+ pos = paddle.transpose(
+ paddle.concat(
+ [pos_y, pos_x], axis=3), [0, 3, 1, 2])
+
+ return pos
+
+
+class AttDecoder(nn.Layer):
+ def __init__(self, ratio, is_train, input_size, hidden_size,
+ encoder_out_channel, dropout, dropout_ratio, word_num,
+ counting_decoder_out_channel, attention):
+ super(AttDecoder, self).__init__()
+ self.input_size = input_size
+ self.hidden_size = hidden_size
+ self.out_channel = encoder_out_channel
+ self.attention_dim = attention['attention_dim']
+ self.dropout_prob = dropout
+ self.ratio = ratio
+ self.word_num = word_num
+
+ self.counting_num = counting_decoder_out_channel
+ self.is_train = is_train
+
+ self.init_weight = nn.Linear(self.out_channel, self.hidden_size)
+ self.embedding = nn.Embedding(self.word_num, self.input_size)
+ self.word_input_gru = nn.GRUCell(self.input_size, self.hidden_size)
+ self.word_attention = Attention(hidden_size, attention['attention_dim'])
+
+ self.encoder_feature_conv = nn.Conv2D(
+ self.out_channel,
+ self.attention_dim,
+ kernel_size=attention['word_conv_kernel'],
+ padding=attention['word_conv_kernel'] // 2)
+
+ self.word_state_weight = nn.Linear(self.hidden_size, self.hidden_size)
+ self.word_embedding_weight = nn.Linear(self.input_size,
+ self.hidden_size)
+ self.word_context_weight = nn.Linear(self.out_channel, self.hidden_size)
+ self.counting_context_weight = nn.Linear(self.counting_num,
+ self.hidden_size)
+ self.word_convert = nn.Linear(self.hidden_size, self.word_num)
+
+ if dropout:
+ self.dropout = nn.Dropout(dropout_ratio)
+
+ def forward(self, cnn_features, labels, counting_preds, images_mask):
+ if self.is_train:
+ _, num_steps = labels.shape
+ else:
+ num_steps = 36
+
+ batch_size, _, height, width = cnn_features.shape
+ images_mask = images_mask[:, :, ::self.ratio, ::self.ratio]
+
+ word_probs = paddle.zeros((batch_size, num_steps, self.word_num))
+ word_alpha_sum = paddle.zeros((batch_size, 1, height, width))
+
+ hidden = self.init_hidden(cnn_features, images_mask)
+ counting_context_weighted = self.counting_context_weight(counting_preds)
+ cnn_features_trans = self.encoder_feature_conv(cnn_features)
+
+ position_embedding = PositionEmbeddingSine(256, normalize=True)
+ pos = position_embedding(cnn_features_trans, images_mask[:, 0, :, :])
+
+ cnn_features_trans = cnn_features_trans + pos
+
+ word = paddle.ones([batch_size, 1], dtype='int64') # init word as sos
+ word = word.squeeze(axis=1)
+ for i in range(num_steps):
+ word_embedding = self.embedding(word)
+ _, hidden = self.word_input_gru(word_embedding, hidden)
+ word_context_vec, _, word_alpha_sum = self.word_attention(
+ cnn_features, cnn_features_trans, hidden, word_alpha_sum,
+ images_mask)
+
+ current_state = self.word_state_weight(hidden)
+ word_weighted_embedding = self.word_embedding_weight(word_embedding)
+ word_context_weighted = self.word_context_weight(word_context_vec)
+
+ if self.dropout_prob:
+ word_out_state = self.dropout(
+ current_state + word_weighted_embedding +
+ word_context_weighted + counting_context_weighted)
+ else:
+ word_out_state = current_state + word_weighted_embedding + word_context_weighted + counting_context_weighted
+
+ word_prob = self.word_convert(word_out_state)
+ word_probs[:, i] = word_prob
+
+ if self.is_train:
+ word = labels[:, i]
+ else:
+ word = word_prob.argmax(1)
+ word = paddle.multiply(
+ word, labels[:, i]
+ ) # labels are oneslike tensor in infer/predict mode
+
+ return word_probs
+
+ def init_hidden(self, features, feature_mask):
+ average = paddle.sum(paddle.sum(features * feature_mask, axis=-1),
+ axis=-1) / paddle.sum(
+ (paddle.sum(feature_mask, axis=-1)), axis=-1)
+ average = self.init_weight(average)
+ return paddle.tanh(average)
+
+
+'''
+Attention Module
+'''
+
+
+class Attention(nn.Layer):
+ def __init__(self, hidden_size, attention_dim):
+ super(Attention, self).__init__()
+ self.hidden = hidden_size
+ self.attention_dim = attention_dim
+ self.hidden_weight = nn.Linear(self.hidden, self.attention_dim)
+ self.attention_conv = nn.Conv2D(
+ 1, 512, kernel_size=11, padding=5, bias_attr=False)
+ self.attention_weight = nn.Linear(
+ 512, self.attention_dim, bias_attr=False)
+ self.alpha_convert = nn.Linear(self.attention_dim, 1)
+
+ def forward(self,
+ cnn_features,
+ cnn_features_trans,
+ hidden,
+ alpha_sum,
+ image_mask=None):
+ query = self.hidden_weight(hidden)
+ alpha_sum_trans = self.attention_conv(alpha_sum)
+ coverage_alpha = self.attention_weight(
+ paddle.transpose(alpha_sum_trans, [0, 2, 3, 1]))
+ alpha_score = paddle.tanh(
+ paddle.unsqueeze(query, [1, 2]) + coverage_alpha + paddle.transpose(
+ cnn_features_trans, [0, 2, 3, 1]))
+ energy = self.alpha_convert(alpha_score)
+ energy = energy - energy.max()
+ energy_exp = paddle.exp(paddle.squeeze(energy, -1))
+
+ if image_mask is not None:
+ energy_exp = energy_exp * paddle.squeeze(image_mask, 1)
+ alpha = energy_exp / (paddle.unsqueeze(
+ paddle.sum(paddle.sum(energy_exp, -1), -1), [1, 2]) + 1e-10)
+ alpha_sum = paddle.unsqueeze(alpha, 1) + alpha_sum
+ context_vector = paddle.sum(
+ paddle.sum((paddle.unsqueeze(alpha, 1) * cnn_features), -1), -1)
+
+ return context_vector, alpha, alpha_sum
+
+
+class CANHead(nn.Layer):
+ def __init__(self, in_channel, out_channel, ratio, attdecoder, **kwargs):
+ super(CANHead, self).__init__()
+
+ self.in_channel = in_channel
+ self.out_channel = out_channel
+
+ self.counting_decoder1 = CountingDecoder(self.in_channel,
+ self.out_channel, 3) # mscm
+ self.counting_decoder2 = CountingDecoder(self.in_channel,
+ self.out_channel, 5)
+
+ self.decoder = AttDecoder(ratio, **attdecoder)
+
+ self.ratio = ratio
+
+ def forward(self, inputs, targets=None):
+ cnn_features, images_mask, labels = inputs
+
+ counting_mask = images_mask[:, :, ::self.ratio, ::self.ratio]
+ counting_preds1, _ = self.counting_decoder1(cnn_features, counting_mask)
+ counting_preds2, _ = self.counting_decoder2(cnn_features, counting_mask)
+ counting_preds = (counting_preds1 + counting_preds2) / 2
+
+ word_probs = self.decoder(cnn_features, labels, counting_preds,
+ images_mask)
+ return word_probs, counting_preds, counting_preds1, counting_preds2
diff --git a/ppocr/optimizer/learning_rate.py b/ppocr/optimizer/learning_rate.py
index 7d45109b4..be52a9184 100644
--- a/ppocr/optimizer/learning_rate.py
+++ b/ppocr/optimizer/learning_rate.py
@@ -18,7 +18,7 @@ from __future__ import print_function
from __future__ import unicode_literals
from paddle.optimizer import lr
-from .lr_scheduler import CyclicalCosineDecay, OneCycleDecay
+from .lr_scheduler import CyclicalCosineDecay, OneCycleDecay, TwoStepCosineDecay
class Linear(object):
@@ -386,3 +386,44 @@ class MultiStepDecay(object):
end_lr=self.learning_rate,
last_epoch=self.last_epoch)
return learning_rate
+
+
+class TwoStepCosine(object):
+ """
+ Cosine learning rate decay
+ lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
+ Args:
+ lr(float): initial learning rate
+ step_each_epoch(int): steps each epoch
+ epochs(int): total training epochs
+ last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+ """
+
+ def __init__(self,
+ learning_rate,
+ step_each_epoch,
+ epochs,
+ warmup_epoch=0,
+ last_epoch=-1,
+ **kwargs):
+ super(TwoStepCosine, self).__init__()
+ self.learning_rate = learning_rate
+ self.T_max1 = step_each_epoch * 200
+ self.T_max2 = step_each_epoch * epochs
+ self.last_epoch = last_epoch
+ self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+ def __call__(self):
+ learning_rate = TwoStepCosineDecay(
+ learning_rate=self.learning_rate,
+ T_max1=self.T_max1,
+ T_max2=self.T_max2,
+ last_epoch=self.last_epoch)
+ if self.warmup_epoch > 0:
+ learning_rate = lr.LinearWarmup(
+ learning_rate=learning_rate,
+ warmup_steps=self.warmup_epoch,
+ start_lr=0.0,
+ end_lr=self.learning_rate,
+ last_epoch=self.last_epoch)
+ return learning_rate
diff --git a/ppocr/optimizer/lr_scheduler.py b/ppocr/optimizer/lr_scheduler.py
index f62f1f3b0..cd09367e2 100644
--- a/ppocr/optimizer/lr_scheduler.py
+++ b/ppocr/optimizer/lr_scheduler.py
@@ -160,3 +160,63 @@ class OneCycleDecay(LRScheduler):
start_step = phase['end_step']
return computed_lr
+
+
+class TwoStepCosineDecay(LRScheduler):
+ def __init__(self,
+ learning_rate,
+ T_max1,
+ T_max2,
+ eta_min=0,
+ last_epoch=-1,
+ verbose=False):
+ if not isinstance(T_max1, int):
+ raise TypeError(
+ "The type of 'T_max1' in 'CosineAnnealingDecay' must be 'int', but received %s."
+ % type(T_max1))
+ if not isinstance(T_max2, int):
+ raise TypeError(
+ "The type of 'T_max2' in 'CosineAnnealingDecay' must be 'int', but received %s."
+ % type(T_max2))
+ if not isinstance(eta_min, (float, int)):
+ raise TypeError(
+ "The type of 'eta_min' in 'CosineAnnealingDecay' must be 'float, int', but received %s."
+ % type(eta_min))
+ assert T_max1 > 0 and isinstance(
+ T_max1, int), " 'T_max1' must be a positive integer."
+ assert T_max2 > 0 and isinstance(
+ T_max2, int), " 'T_max1' must be a positive integer."
+ self.T_max1 = T_max1
+ self.T_max2 = T_max2
+ self.eta_min = float(eta_min)
+ super(TwoStepCosineDecay, self).__init__(learning_rate, last_epoch,
+ verbose)
+
+ def get_lr(self):
+
+ if self.last_epoch <= self.T_max1:
+ if self.last_epoch == 0:
+ return self.base_lr
+ elif (self.last_epoch - 1 - self.T_max1) % (2 * self.T_max1) == 0:
+ return self.last_lr + (self.base_lr - self.eta_min) * (
+ 1 - math.cos(math.pi / self.T_max1)) / 2
+
+ return (1 + math.cos(math.pi * self.last_epoch / self.T_max1)) / (
+ 1 + math.cos(math.pi * (self.last_epoch - 1) / self.T_max1)) * (
+ self.last_lr - self.eta_min) + self.eta_min
+ else:
+ if (self.last_epoch - 1 - self.T_max2) % (2 * self.T_max2) == 0:
+ return self.last_lr + (self.base_lr - self.eta_min) * (
+ 1 - math.cos(math.pi / self.T_max2)) / 2
+
+ return (1 + math.cos(math.pi * self.last_epoch / self.T_max2)) / (
+ 1 + math.cos(math.pi * (self.last_epoch - 1) / self.T_max2)) * (
+ self.last_lr - self.eta_min) + self.eta_min
+
+ def _get_closed_form_lr(self):
+ if self.last_epoch <= self.T_max1:
+ return self.eta_min + (self.base_lr - self.eta_min) * (1 + math.cos(
+ math.pi * self.last_epoch / self.T_max1)) / 2
+ else:
+ return self.eta_min + (self.base_lr - self.eta_min) * (1 + math.cos(
+ math.pi * self.last_epoch / self.T_max2)) / 2
diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py
index 3a09030b2..e86a7ea70 100644
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -37,6 +37,7 @@ from .table_postprocess import TableMasterLabelDecode, TableLabelDecode
from .picodet_postprocess import PicoDetPostProcess
from .ct_postprocess import CTPostProcess
from .drrg_postprocess import DRRGPostprocess
+from .rec_postprocess import SeqLabelDecode
def build_post_process(config, global_config=None):
@@ -51,7 +52,7 @@ def build_post_process(config, global_config=None):
'TableMasterLabelDecode', 'SPINLabelDecode',
'DistillationSerPostProcess', 'DistillationRePostProcess',
'VLLabelDecode', 'PicoDetPostProcess', 'CTPostProcess',
- 'RFLLabelDecode', 'DRRGPostprocess'
+ 'RFLLabelDecode', 'DRRGPostprocess', 'SeqLabelDecode'
]
if config['name'] == 'PSEPostProcess':
diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py
index 59b5254e4..4d88c278e 100644
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -897,3 +897,36 @@ class VLLabelDecode(BaseRecLabelDecode):
return text
label = self.decode(label)
return text, label
+
+
+class SeqLabelDecode(BaseRecLabelDecode):
+ """ Convert between latex-symbol and symbol-index """
+
+ def __init__(self, character_dict_path=None, use_space_char=False,
+ **kwargs):
+ super(SeqLabelDecode, self).__init__(character_dict_path,
+ use_space_char)
+
+ def decode(self, text_index, preds_prob=None):
+ result_list = []
+ batch_size = len(text_index)
+ for batch_idx in range(batch_size):
+ seq_end = text_index[batch_idx].argmin(0)
+ idx_list = text_index[batch_idx][:seq_end].tolist()
+ symbol_list = [self.character[idx] for idx in idx_list]
+ probs = []
+ if preds_prob is not None:
+ probs = preds_prob[batch_idx][:len(symbol_list)].tolist()
+
+ result_list.append([' '.join(symbol_list), probs])
+ return result_list
+
+ def __call__(self, preds, label=None, *args, **kwargs):
+ pred_prob, _, _, _ = preds
+ preds_idx = pred_prob.argmax(axis=2)
+
+ text = self.decode(preds_idx)
+ if label is None:
+ return text
+ label = self.decode(label)
+ return text, label
diff --git a/ppocr/utils/dict/latex_symbol_dict.txt b/ppocr/utils/dict/latex_symbol_dict.txt
new file mode 100644
index 000000000..b43f1fa8b
--- /dev/null
+++ b/ppocr/utils/dict/latex_symbol_dict.txt
@@ -0,0 +1,111 @@
+eos
+sos
+!
+'
+(
+)
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+<
+=
+>
+A
+B
+C
+E
+F
+G
+H
+I
+L
+M
+N
+P
+R
+S
+T
+V
+X
+Y
+[
+\Delta
+\alpha
+\beta
+\cdot
+\cdots
+\cos
+\div
+\exists
+\forall
+\frac
+\gamma
+\geq
+\in
+\infty
+\int
+\lambda
+\ldots
+\leq
+\lim
+\log
+\mu
+\neq
+\phi
+\pi
+\pm
+\prime
+\rightarrow
+\sigma
+\sin
+\sqrt
+\sum
+\tan
+\theta
+\times
+]
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+\{
+|
+\}
+{
+}
+^
+_
\ No newline at end of file
diff --git a/test_tipc/configs/rec_d28_can/rec_d28_can.yml b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
new file mode 100644
index 000000000..aeaccb6b0
--- /dev/null
+++ b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
@@ -0,0 +1,114 @@
+Global:
+ use_gpu: True
+ epoch_num: 240
+ log_smooth_window: 20
+ print_batch_step: 10
+ save_model_dir: ./output/rec/can/
+ save_epoch_step: 1
+ # evaluation is run every 1105 iterations
+ eval_batch_step: [0, 1105]
+ cal_metric_during_train: True
+ pretrained_model: ./output/rec/can/CAN
+ checkpoints: ./output/rec/can/CAN
+ save_inference_dir: ./inference/rec_d28_can/
+ use_visualdl: False
+ infer_img: doc/imgs_hme/hme_01.jpeg
+ # for data or label process
+ character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+ max_text_length: 36
+ infer_mode: False
+ use_space_char: False
+ save_res_path: ./output/rec/predicts_can.txt
+
+Optimizer:
+ name: Momentum
+ momentum: 0.9
+ clip_norm_global: 100.0
+ lr:
+ name: TwoStepCosine
+ learning_rate: 0.01
+ warmup_epoch: 1
+ weight_decay: 0.0001
+
+Architecture:
+ model_type: rec
+ algorithm: CAN
+ in_channels: 1
+ Transform:
+ Backbone:
+ name: DenseNet
+ growthRate: 24
+ reduction: 0.5
+ bottleneck: True
+ use_dropout: True
+ input_channel: 1
+
+ Head:
+ name: CANHead
+ in_channel: 684
+ out_channel: 111
+ max_text_length: 36
+ ratio: 16
+ attdecoder:
+ is_train: True
+ input_size: 256
+ hidden_size: 256
+ encoder_out_channel: 684
+ dropout: True
+ dropout_ratio: 0.5
+ word_num: 111
+ counting_decoder_out_channel: 111
+ attention:
+ attention_dim: 512
+ word_conv_kernel: 1
+
+Loss:
+ name: CANLoss
+
+PostProcess:
+ name: SeqLabelDecode
+ character: 111
+
+Metric:
+ name: CANMetric
+ main_indicator: exp_rate
+
+Train:
+ dataset:
+ name: HMERDataSet
+ data_dir: ./train_data/CROHME/training/images/
+ transforms:
+ - DecodeImage:
+ channel_first: False
+ - GrayImageChannelFormat:
+ normalize: True
+ inverse: True
+ - KeepKeys:
+ keep_keys: ['image', 'label']
+ label_file_list: ["./train_data/CROHME/training/labels.json"]
+ loader:
+ shuffle: True
+ batch_size_per_card: 2
+ drop_last: True
+ num_workers: 1
+ collate_fn: DyMaskCollator
+
+Eval:
+ dataset:
+ name: HMERDataSet
+ data_dir: ./train_data/CROHME/evaluation/images/
+ transforms:
+ - DecodeImage:
+ channel_first: False
+ - GrayImageChannelFormat:
+ normalize: True
+ inverse: True
+ - KeepKeys:
+ keep_keys: ['image', 'label']
+ label_file_list: ["./train_data/CROHME/evaluation/labels.json"]
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size_per_card: 1
+ num_workers: 4
+ collate_fn: DyMaskCollator
diff --git a/test_tipc/configs/rec_d28_can/train_infer_python.txt b/test_tipc/configs/rec_d28_can/train_infer_python.txt
new file mode 100644
index 000000000..be50c5980
--- /dev/null
+++ b/test_tipc/configs/rec_d28_can/train_infer_python.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:rec_d28_can
+python:python
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:null
+Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=240
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=8
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./doc/imgs_hme
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c test_tipc/configs/rec_d28_can/rec_d28_can.yml -o
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c test_tipc/configs/rec_d28_can/rec_d28_can.yml -o
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c test_tipc/configs/rec_d28_can/rec_d28_can.yml -o
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+train_model:./inference/rec_d28_can_train/best_accuracy
+infer_export:tools/export_model.py -c test_tipc/configs/rec_d28_can/rec_d28_can.yml -o
+infer_quant:False
+inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/dict/latex_symbol_dict.txt --rec_image_shape="1,100,100" --rec_algorithm="CAN"
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:1
+--use_tensorrt:False
+--precision:fp32
+--rec_model_dir:./output/
+--image_dir:./doc/imgs_hme
+--save_log_path:./test/output/
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[1,100,100]}]
diff --git a/tools/eval.py b/tools/eval.py
index 3d1d3813d..21f4d94d5 100755
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -74,7 +74,9 @@ def main():
config['Architecture']["Head"]['out_channels'] = char_num
model = build_model(config['Architecture'])
- extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR", "VisionLAN", "RobustScanner"]
+ extra_input_models = [
+ "SRN", "NRTR", "SAR", "SEED", "SVTR", "VisionLAN", "RobustScanner"
+ ]
extra_input = False
if config['Architecture']['algorithm'] == 'Distillation':
for key in config['Architecture']["Models"]:
@@ -83,7 +85,10 @@ def main():
else:
extra_input = config['Architecture']['algorithm'] in extra_input_models
if "model_type" in config['Architecture'].keys():
- model_type = config['Architecture']['model_type']
+ if config['Architecture']['algorithm'] == 'CAN':
+ model_type = 'can'
+ else:
+ model_type = config['Architecture']['model_type']
else:
model_type = None
@@ -92,7 +97,7 @@ def main():
# amp
use_amp = config["Global"].get("use_amp", False)
amp_level = config["Global"].get("amp_level", 'O2')
- amp_custom_black_list = config['Global'].get('amp_custom_black_list',[])
+ amp_custom_black_list = config['Global'].get('amp_custom_black_list', [])
if use_amp:
AMP_RELATED_FLAGS_SETTING = {
'FLAGS_cudnn_batchnorm_spatial_persistent': 1,
@@ -120,7 +125,8 @@ def main():
# start eval
metric = program.eval(model, valid_dataloader, post_process_class,
- eval_class, model_type, extra_input, scaler, amp_level, amp_custom_black_list)
+ eval_class, model_type, extra_input, scaler,
+ amp_level, amp_custom_black_list)
logger.info('metric eval ***************')
for k, v in metric.items():
logger.info('{}:{}'.format(k, v))
diff --git a/tools/export_model.py b/tools/export_model.py
index 52f05bfcb..4b90fcae4 100755
--- a/tools/export_model.py
+++ b/tools/export_model.py
@@ -123,6 +123,17 @@ def export_single_model(model,
]
]
model = to_static(model, input_spec=other_shape)
+ elif arch_config["algorithm"] == "CAN":
+ other_shape = [[
+ paddle.static.InputSpec(
+ shape=[None, 1, None, None],
+ dtype="float32"), paddle.static.InputSpec(
+ shape=[None, 1, None, None], dtype="float32"),
+ paddle.static.InputSpec(
+ shape=[None, arch_config['Head']['max_text_length']],
+ dtype="int64")
+ ]]
+ model = to_static(model, input_spec=other_shape)
elif arch_config["algorithm"] in ["LayoutLM", "LayoutLMv2", "LayoutXLM"]:
input_spec = [
paddle.static.InputSpec(
diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py
index bffeb2553..c1604798e 100755
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -108,6 +108,13 @@ class TextRecognizer(object):
}
elif self.rec_algorithm == "PREN":
postprocess_params = {'name': 'PRENLabelDecode'}
+ elif self.rec_algorithm == "CAN":
+ self.inverse = args.rec_image_inverse
+ postprocess_params = {
+ 'name': 'SeqLabelDecode',
+ "character_dict_path": args.rec_char_dict_path,
+ "use_space_char": args.use_space_char
+ }
self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors, self.config = \
utility.create_predictor(args, 'rec', logger)
@@ -351,6 +358,30 @@ class TextRecognizer(object):
return resized_image
+ def norm_img_can(self, img, image_shape):
+
+ img = cv2.cvtColor(
+ img, cv2.COLOR_BGR2GRAY) # CAN only predict gray scale image
+
+ if self.inverse:
+ img = 255 - img
+
+ if self.rec_image_shape[0] == 1:
+ h, w = img.shape
+ _, imgH, imgW = self.rec_image_shape
+ if h < imgH or w < imgW:
+ padding_h = max(imgH - h, 0)
+ padding_w = max(imgW - w, 0)
+ img_padded = np.pad(img, ((0, padding_h), (0, padding_w)),
+ 'constant',
+ constant_values=(255))
+ img = img_padded
+
+ img = np.expand_dims(img, 0) / 255.0 # h,w,c -> c,h,w
+ img = img.astype('float32')
+
+ return img
+
def __call__(self, img_list):
img_num = len(img_list)
# Calculate the aspect ratio of all text bars
@@ -430,6 +461,17 @@ class TextRecognizer(object):
word_positions = np.array(range(0, 40)).astype('int64')
word_positions = np.expand_dims(word_positions, axis=0)
word_positions_list.append(word_positions)
+ elif self.rec_algorithm == "CAN":
+ norm_img = self.norm_img_can(img_list[indices[ino]],
+ max_wh_ratio)
+ norm_img = norm_img[np.newaxis, :]
+ norm_img_batch.append(norm_img)
+ norm_image_mask = np.ones(norm_img.shape, dtype='float32')
+ word_label = np.ones([1, 36], dtype='int64')
+ norm_img_mask_batch = []
+ word_label_list = []
+ norm_img_mask_batch.append(norm_image_mask)
+ word_label_list.append(word_label)
else:
norm_img = self.resize_norm_img(img_list[indices[ino]],
max_wh_ratio)
@@ -527,6 +569,33 @@ class TextRecognizer(object):
if self.benchmark:
self.autolog.times.stamp()
preds = outputs[0]
+ elif self.rec_algorithm == "CAN":
+ norm_img_mask_batch = np.concatenate(norm_img_mask_batch)
+ word_label_list = np.concatenate(word_label_list)
+ inputs = [norm_img_batch, norm_img_mask_batch, word_label_list]
+ if self.use_onnx:
+ input_dict = {}
+ input_dict[self.input_tensor.name] = norm_img_batch
+ outputs = self.predictor.run(self.output_tensors,
+ input_dict)
+ preds = outputs
+ else:
+ input_names = self.predictor.get_input_names()
+ input_tensor = []
+ for i in range(len(input_names)):
+ input_tensor_i = self.predictor.get_input_handle(
+ input_names[i])
+ input_tensor_i.copy_from_cpu(inputs[i])
+ input_tensor.append(input_tensor_i)
+ self.input_tensor = input_tensor
+ self.predictor.run()
+ outputs = []
+ for output_tensor in self.output_tensors:
+ output = output_tensor.copy_to_cpu()
+ outputs.append(output)
+ if self.benchmark:
+ self.autolog.times.stamp()
+ preds = outputs
else:
if self.use_onnx:
input_dict = {}
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index f6a44e35a..34cad2590 100644
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -84,6 +84,7 @@ def init_args():
# params for text recognizer
parser.add_argument("--rec_algorithm", type=str, default='SVTR_LCNet')
parser.add_argument("--rec_model_dir", type=str)
+ parser.add_argument("--rec_image_inverse", type=str2bool, default=True)
parser.add_argument("--rec_image_shape", type=str, default="3, 48, 320")
parser.add_argument("--rec_batch_num", type=int, default=6)
parser.add_argument("--max_text_length", type=int, default=25)
diff --git a/tools/infer_rec.py b/tools/infer_rec.py
index cb8a6ec30..29aab9b57 100755
--- a/tools/infer_rec.py
+++ b/tools/infer_rec.py
@@ -141,6 +141,11 @@ def main():
paddle.to_tensor(valid_ratio),
paddle.to_tensor(word_positons),
]
+ if config['Architecture']['algorithm'] == "CAN":
+ image_mask = paddle.ones(
+ (np.expand_dims(
+ batch[0], axis=0).shape), dtype='float32')
+ label = paddle.ones((1, 36), dtype='int64')
images = np.expand_dims(batch[0], axis=0)
images = paddle.to_tensor(images)
if config['Architecture']['algorithm'] == "SRN":
@@ -149,6 +154,8 @@ def main():
preds = model(images, img_metas)
elif config['Architecture']['algorithm'] == "RobustScanner":
preds = model(images, img_metas)
+ elif config['Architecture']['algorithm'] == "CAN":
+ preds = model([images, image_mask, label])
else:
preds = model(images)
post_result = post_process_class(preds)
diff --git a/tools/program.py b/tools/program.py
index 5d2bd5bfb..c491247a6 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -273,6 +273,8 @@ def train(config,
preds = model(images, data=batch[1:])
elif model_type in ["kie"]:
preds = model(batch)
+ elif algorithm in ['CAN']:
+ preds = model(batch[:3])
else:
preds = model(images)
preds = to_float32(preds)
@@ -286,6 +288,8 @@ def train(config,
preds = model(images, data=batch[1:])
elif model_type in ["kie", 'sr']:
preds = model(batch)
+ elif algorithm in ['CAN']:
+ preds = model(batch[:3])
else:
preds = model(images)
loss = loss_class(preds, batch)
@@ -302,6 +306,9 @@ def train(config,
elif model_type in ['table']:
post_result = post_process_class(preds, batch)
eval_class(post_result, batch)
+ elif algorithm in ['CAN']:
+ model_type = 'can'
+ eval_class(preds[0], batch[2:], epoch_reset=(idx == 0))
else:
if config['Loss']['name'] in ['MultiLoss', 'MultiLoss_v2'
]: # for multi head loss
@@ -496,6 +503,8 @@ def eval(model,
preds = model(images, data=batch[1:])
elif model_type in ["kie"]:
preds = model(batch)
+ elif model_type in ['can']:
+ preds = model(batch[:3])
elif model_type in ['sr']:
preds = model(batch)
sr_img = preds["sr_img"]
@@ -508,6 +517,8 @@ def eval(model,
preds = model(images, data=batch[1:])
elif model_type in ["kie"]:
preds = model(batch)
+ elif model_type in ['can']:
+ preds = model(batch[:3])
elif model_type in ['sr']:
preds = model(batch)
sr_img = preds["sr_img"]
@@ -532,6 +543,8 @@ def eval(model,
eval_class(post_result, batch_numpy)
elif model_type in ['sr']:
eval_class(preds, batch_numpy)
+ elif model_type in ['can']:
+ eval_class(preds[0], batch_numpy[2:], epoch_reset=False)
else:
post_result = post_process_class(preds, batch_numpy[1])
eval_class(post_result, batch_numpy)
@@ -629,7 +642,7 @@ def preprocess(is_train=False):
'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE',
'SEED', 'SDMGR', 'LayoutXLM', 'LayoutLM', 'LayoutLMv2', 'PREN', 'FCE',
'SVTR', 'ViTSTR', 'ABINet', 'DB++', 'TableMaster', 'SPIN', 'VisionLAN',
- 'Gestalt', 'SLANet', 'RobustScanner', 'CT', 'RFL', 'DRRG'
+ 'Gestalt', 'SLANet', 'RobustScanner', 'CT', 'RFL', 'DRRG', 'CAN'
]
if use_xpu:
From 25e56a6f447115737a135a2813225ef50038276a Mon Sep 17 00:00:00 2001
From: dorren
Date: Sat, 15 Oct 2022 20:45:43 +0800
Subject: [PATCH 05/20] add handwritten mathematiccal expression recognition
algorithm, co-writer Lllllolita and yeyanli
---
doc/doc_en/algorithm_rec_can_en.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/doc/doc_en/algorithm_rec_can_en.md b/doc/doc_en/algorithm_rec_can_en.md
index f2bc645af..4d7a64f99 100644
--- a/doc/doc_en/algorithm_rec_can_en.md
+++ b/doc/doc_en/algorithm_rec_can_en.md
@@ -1,4 +1,4 @@
-# RobustScanner
+# CAN
- [1. Introduction](#1)
- [2. Environment](#2)
@@ -77,7 +77,7 @@ python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_infe
For RobustScanner text recognition model inference, the following commands can be executed:
```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_hme/hme_01.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_image_shape="1, 132, 519" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_hme/hme_01.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_image_shape="1, 100, 100" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
```
From c57effb84f53d9416482e6fa15d0e2307a62ac3d Mon Sep 17 00:00:00 2001
From: dorren
Date: Mon, 17 Oct 2022 15:04:42 +0800
Subject: [PATCH 06/20] update can data loading method and tipc configs, revert
precommit config
---
.pre-commit-config.yaml | 7 +-
configs/rec/rec_d28_can.yml | 30 +++---
.../crohme_demo}/hme_00.jpg | Bin
.../crohme_demo}/hme_01.jpg | Bin
.../crohme_demo}/hme_02.jpg | Bin
doc/doc_ch/algorithm_rec_can.md | 32 +++---
doc/doc_en/algorithm_rec_can_en.md | 16 +--
ppocr/data/__init__.py | 3 +-
ppocr/data/collate_fn.py | 6 +-
ppocr/data/hmer_dataset.py | 99 ------------------
ppocr/data/imaug/label_ops.py | 31 +++++-
test_tipc/configs/rec_d28_can/rec_d28_can.yml | 34 +++---
.../rec_d28_can/train_infer_python.txt | 10 +-
test_tipc/prepare.sh | 7 ++
test_tipc/readme.md | 1 +
tools/program.py | 2 +-
16 files changed, 117 insertions(+), 161 deletions(-)
rename doc/{imgs_hme => datasets/crohme_demo}/hme_00.jpg (100%)
rename doc/{imgs_hme => datasets/crohme_demo}/hme_01.jpg (100%)
rename doc/{imgs_hme => datasets/crohme_demo}/hme_02.jpg (100%)
delete mode 100644 ppocr/data/hmer_dataset.py
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b6a299ba4..1584bc76a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,11 +1,10 @@
-repos:
- repo: https://github.com/PaddlePaddle/mirrors-yapf.git
- rev: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
+ sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
hooks:
- id: yapf
files: \.py$
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: a11d9314b22d8f8c7556443875b731ef05965464
+ sha: a11d9314b22d8f8c7556443875b731ef05965464
hooks:
- id: check-merge-conflict
- id: check-symlinks
@@ -16,7 +15,7 @@ repos:
- id: trailing-whitespace
files: \.md$
- repo: https://github.com/Lucas-C/pre-commit-hooks
- rev: v1.0.1
+ sha: v1.0.1
hooks:
- id: forbid-crlf
files: \.md$
diff --git a/configs/rec/rec_d28_can.yml b/configs/rec/rec_d28_can.yml
index aeaccb6b0..9fe936ae1 100644
--- a/configs/rec/rec_d28_can.yml
+++ b/configs/rec/rec_d28_can.yml
@@ -5,14 +5,14 @@ Global:
print_batch_step: 10
save_model_dir: ./output/rec/can/
save_epoch_step: 1
- # evaluation is run every 1105 iterations
+ # evaluation is run every 1105 iterations (1 epoch)(batch_size = 8)
eval_batch_step: [0, 1105]
cal_metric_during_train: True
- pretrained_model: ./output/rec/can/CAN
- checkpoints: ./output/rec/can/CAN
- save_inference_dir: ./inference/rec_d28_can/
+ pretrained_model:
+ checkpoints:
+ save_inference_dir:
use_visualdl: False
- infer_img: doc/imgs_hme/hme_01.jpeg
+ infer_img: doc/datasets/crohme_demo/hme_00.jpg
# for data or label process
character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
max_text_length: 36
@@ -75,7 +75,7 @@ Metric:
Train:
dataset:
- name: HMERDataSet
+ name: PGDataSet
data_dir: ./train_data/CROHME/training/images/
transforms:
- DecodeImage:
@@ -83,19 +83,22 @@ Train:
- GrayImageChannelFormat:
normalize: True
inverse: True
+ - SeqLabelEncode:
+ character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+ lower: False
- KeepKeys:
keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME/training/labels.json"]
+ label_file_list: ["./train_data/CROHME/training/labels.txt"]
loader:
shuffle: True
- batch_size_per_card: 2
- drop_last: True
- num_workers: 1
+ batch_size_per_card: 8
+ drop_last: False
+ num_workers: 4
collate_fn: DyMaskCollator
Eval:
dataset:
- name: HMERDataSet
+ name: PGDataSet
data_dir: ./train_data/CROHME/evaluation/images/
transforms:
- DecodeImage:
@@ -103,9 +106,12 @@ Eval:
- GrayImageChannelFormat:
normalize: True
inverse: True
+ - SeqLabelEncode:
+ character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+ lower: False
- KeepKeys:
keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME/evaluation/labels.json"]
+ label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
loader:
shuffle: False
drop_last: False
diff --git a/doc/imgs_hme/hme_00.jpg b/doc/datasets/crohme_demo/hme_00.jpg
similarity index 100%
rename from doc/imgs_hme/hme_00.jpg
rename to doc/datasets/crohme_demo/hme_00.jpg
diff --git a/doc/imgs_hme/hme_01.jpg b/doc/datasets/crohme_demo/hme_01.jpg
similarity index 100%
rename from doc/imgs_hme/hme_01.jpg
rename to doc/datasets/crohme_demo/hme_01.jpg
diff --git a/doc/imgs_hme/hme_02.jpg b/doc/datasets/crohme_demo/hme_02.jpg
similarity index 100%
rename from doc/imgs_hme/hme_02.jpg
rename to doc/datasets/crohme_demo/hme_02.jpg
diff --git a/doc/doc_ch/algorithm_rec_can.md b/doc/doc_ch/algorithm_rec_can.md
index 9585dae0c..8a012b490 100644
--- a/doc/doc_ch/algorithm_rec_can.md
+++ b/doc/doc_ch/algorithm_rec_can.md
@@ -1,4 +1,4 @@
-# 手写数学公式识别算法-ABINet
+# 手写数学公式识别算法-CAN
- [1. 算法简介](#1)
- [2. 环境配置](#2)
@@ -27,7 +27,7 @@
|模型 |骨干网络|配置文件|ExpRate|下载链接|
| ----- | ----- | ----- | ----- | ----- |
-|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|[训练模型](https://paddleocr.bj.bcebos.com/rec_r45_abinet_train.tar)|
+|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|[训练模型](https://paddleocr.bj.bcebos.com/contribution/can_train.tar)|
## 2. 环境配置
@@ -60,16 +60,21 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs
python3 tools/train.py -c configs/rec/rec_d28_can.yml
-o Train.dataset.transforms.GrayImageChannelFormat.inverse=False
```
+- 默认每训练1个epoch(1105次iteration)进行1次评估,若您更改训练的batch_size,或更换数据集,请在训练时作出如下修改
+```
+python3 tools/train.py -c configs/rec/rec_d28_can.yml
+-o Global.eval_batch_step=[0, {length_of_dataset//batch_size}]
+```
#
### 3.2 评估
-可下载已训练完成的[模型文件](#model),使用如下命令进行评估:
+可下载已训练完成的[模型文件](https://paddleocr.bj.bcebos.com/contribution/can_train.tar),使用如下命令进行评估:
```shell
-# 注意将pretrained_model的路径设置为本地路径。
-python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/best_accuracy
+# 注意将pretrained_model的路径设置为本地路径。若使用自行训练保存的模型,请注意修改路径和文件名为{path/to/weights}/{model_name}。
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/CAN
```
@@ -78,9 +83,9 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
使用如下命令进行单张图片预测:
```shell
# 注意将pretrained_model的路径设置为本地路径。
-python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/imgs_hme/hme_01.jpg' Global.pretrained_model=./rec_d28_can_train/best_accuracy
+python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/datasets/crohme_demo/hme_00.jpg' Global.pretrained_model=./rec_d28_can_train/CAN
-# 预测文件夹下所有图像时,可修改infer_img为文件夹,如 Global.infer_img='./doc/imgs_hme/'。
+# 预测文件夹下所有图像时,可修改infer_img为文件夹,如 Global.infer_img='./doc/datasets/crohme_demo/'。
```
@@ -89,17 +94,16 @@ python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.a
### 4.1 Python推理
-首先将训练得到best模型,转换成inference model。这里以训练完成的模型为例([模型下载地址](https://paddleocr.bj.bcebos.com/rec_d28_can_train.tar) ),可以使用如下命令进行转换:
+首先将训练得到best模型,转换成inference model。这里以训练完成的模型为例([模型下载地址](https://paddleocr.bj.bcebos.com/contribution/can_train.tar) ),可以使用如下命令进行转换:
```shell
# 注意将pretrained_model的路径设置为本地路径。
-python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_inference_dir=./inference/rec_d28_can/ Architecture.Head.attdecoder.is_train=False
+python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/CAN Global.save_inference_dir=./inference/rec_d28_can/ Architecture.Head.attdecoder.is_train=False
# 目前的静态图模型默认的输出长度最大为36,如果您需要预测更长的序列,请在导出模型时指定其输出序列为合适的值,例如 Architecture.Head.max_text_length=72
```
**注意:**
- 如果您是在自己的数据集上训练的模型,并且调整了字典文件,请注意修改配置文件中的`character_dict_path`是否是所需要的字典文件。
-- 如果您修改了训练时的输入大小,请修改`tools/export_model.py`文件中的对应ABINet的`infer_shape`。
转换成功后,在目录下有三个文件:
```
@@ -112,18 +116,18 @@ python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_infe
执行如下命令进行模型推理:
```shell
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_hme/hme_01.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+python3 tools/infer/predict_rec.py --image_dir="./doc/datasets/crohme_demo/hme_00.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
-# 预测文件夹下所有图像时,可修改image_dir为文件夹,如 --image_dir='./doc/imgs_hme/'。
+# 预测文件夹下所有图像时,可修改image_dir为文件夹,如 --image_dir='./doc/datasets/crohme_demo/'。
# 如果您需要在白底黑字的图片上进行预测,请设置 --rec_image_inverse=False
```
-
+
执行命令后,上面图像的预测结果(识别的文本)会打印到屏幕上,示例如下:
```shell
-Predicts of ./doc/imgs_hme/hme_03.jpg:['x _ { k } x x _ { k } + y _ { k } y x _ { k }', []]
+Predicts of ./doc/imgs_hme/hme_00.jpg:['x _ { k } x x _ { k } + y _ { k } y x _ { k }', []]
```
diff --git a/doc/doc_en/algorithm_rec_can_en.md b/doc/doc_en/algorithm_rec_can_en.md
index 4d7a64f99..da6c9c609 100644
--- a/doc/doc_en/algorithm_rec_can_en.md
+++ b/doc/doc_en/algorithm_rec_can_en.md
@@ -25,7 +25,7 @@ Using CROHME handwrittem mathematical expression recognition datasets for traini
|Model|Backbone|config|exprate|Download link|
| --- | --- | --- | --- | --- |
-|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|coming soon|
+|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|[trained model](https://paddleocr.bj.bcebos.com/contribution/can_train.tar)|
## 2. Environment
@@ -53,14 +53,14 @@ Evaluation:
```
# GPU evaluation
-python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/best_accuracy
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/CAN
```
Prediction:
```
# The configuration file used for prediction must match the training
-python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/imgs_hme/hme_01.jpg' Global.pretrained_model=./rec_d28_can_train/best_accuracy
+python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/crohme_demo/hme_00.jpg' Global.pretrained_model=./rec_d28_can_train/CAN
```
@@ -68,16 +68,20 @@ python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.a
### 4.1 Python Inference
-First, the model saved during the RobustScanner text recognition training process is converted into an inference model. you can use the following command to convert:
+First, the model saved during the CAN handwritten mathematical expression recognition training process is converted into an inference model. you can use the following command to convert:
```
python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_inference_dir=./inference/rec_d28_can/ Architecture.Head.attdecoder.is_train=False
+
+# The default output max length of the model is 36. If you need to predict a longer sequence, please specify its output sequence as an appropriate value when exporting the model, as: Architecture.Head.max_ text_ length=72
```
-For RobustScanner text recognition model inference, the following commands can be executed:
+For CAN handwritten mathematical expression recognition model inference, the following commands can be executed:
```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_hme/hme_01.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_image_shape="1, 100, 100" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+python3 tools/infer/predict_rec.py --image_dir="./doc/crohme_demo/hme_00.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+
+# If you need to predict on a picture with black characters on a white background, please set: -- rec_ image_ inverse=False
```
diff --git a/ppocr/data/__init__.py b/ppocr/data/__init__.py
index 1f3de63de..b602a346d 100644
--- a/ppocr/data/__init__.py
+++ b/ppocr/data/__init__.py
@@ -37,7 +37,6 @@ from ppocr.data.simple_dataset import SimpleDataSet
from ppocr.data.lmdb_dataset import LMDBDataSet, LMDBDataSetSR
from ppocr.data.pgnet_dataset import PGDataSet
from ppocr.data.pubtab_dataset import PubTabDataSet
-from ppocr.data.hmer_dataset import HMERDataSet
__all__ = ['build_dataloader', 'transform', 'create_operators']
@@ -56,7 +55,7 @@ def build_dataloader(config, mode, device, logger, seed=None):
support_dict = [
'SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet',
- 'LMDBDataSetSR', 'HMERDataSet'
+ 'LMDBDataSetSR'
]
module_name = config[mode]['dataset']['name']
assert module_name in support_dict, Exception(
diff --git a/ppocr/data/collate_fn.py b/ppocr/data/collate_fn.py
index fec1e895f..067b2158a 100644
--- a/ppocr/data/collate_fn.py
+++ b/ppocr/data/collate_fn.py
@@ -95,8 +95,8 @@ class DyMaskCollator(object):
1] > max_height else max_height
max_width = item[0].shape[2] if item[0].shape[
2] > max_width else max_width
- max_length = item[1].shape[0] if item[1].shape[
- 0] > max_length else max_length
+ max_length = len(item[1]) if len(item[
+ 1]) > max_length else max_length
proper_items.append(item)
images, image_masks = np.zeros(
@@ -111,7 +111,7 @@ class DyMaskCollator(object):
_, h, w = proper_items[i][0].shape
images[i][:, :h, :w] = proper_items[i][0]
image_masks[i][:, :h, :w] = 1
- l = proper_items[i][1].shape[0]
+ l = len(proper_items[i][1])
labels[i][:l] = proper_items[i][1]
label_masks[i][:l] = 1
diff --git a/ppocr/data/hmer_dataset.py b/ppocr/data/hmer_dataset.py
deleted file mode 100644
index d5d92f264..000000000
--- a/ppocr/data/hmer_dataset.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os, json, random, traceback
-import numpy as np
-
-from PIL import Image
-from paddle.io import Dataset
-
-from .imaug import transform, create_operators
-
-
-class HMERDataSet(Dataset):
- def __init__(self, config, mode, logger, seed=None):
- super(HMERDataSet, self).__init__()
-
- self.logger = logger
- self.seed = seed
- self.mode = mode
-
- global_config = config['Global']
- dataset_config = config[mode]['dataset']
- self.data_dir = config[mode]['dataset']['data_dir']
-
- label_file_list = dataset_config['label_file_list']
- data_source_num = len(label_file_list)
- ratio_list = dataset_config.get("ratio_list", [1.0])
-
- self.data_lines, self.labels = self.get_image_info_list(label_file_list,
- ratio_list)
- self.data_idx_order_list = list(range(len(self.data_lines)))
- if self.mode == "train" and self.do_shuffle:
- self.shuffle_data_random()
-
- if isinstance(ratio_list, (float, int)):
- ratio_list = [float(ratio_list)] * int(data_source_num)
-
- assert len(
- ratio_list
- ) == data_source_num, "The length of ratio_list should be the same as the file_list."
-
- self.ops = create_operators(dataset_config['transforms'], global_config)
- self.need_reset = True in [x < 1 for x in ratio_list]
-
- def get_image_info_list(self, file_list, ratio_list):
- if isinstance(file_list, str):
- file_list = [file_list]
- labels = {}
- for idx, file in enumerate(file_list):
- with open(file, "r") as f:
- lines = json.load(f)
- labels.update(lines)
- data_lines = [name for name in labels.keys()]
- return data_lines, labels
-
- def shuffle_data_random(self):
- random.seed(self.seed)
- random.shuffle(self.data_lines)
- return
-
- def __len__(self):
- return len(self.data_idx_order_list)
-
- def __getitem__(self, idx):
- file_idx = self.data_idx_order_list[idx]
- data_name = self.data_lines[file_idx]
- try:
- file_name = data_name + '.jpg'
- img_path = os.path.join(self.data_dir, file_name)
- if not os.path.exists(img_path):
- raise Exception("{} does not exist!".format(img_path))
- with open(img_path, 'rb') as f:
- img = f.read()
-
- label = self.labels.get(data_name).split()
- label = np.array([int(item) for item in label])
-
- data = {'image': img, 'label': label}
- outs = transform(data, self.ops)
- except:
- self.logger.error(
- "When parsing line {}, error happened with msg: {}".format(
- file_name, traceback.format_exc()))
- outs = None
- if outs is None:
- # during evaluation, we should fix the idx to get same results for many times of evaluation.
- rnd_idx = np.random.randint(self.__len__())
- return self.__getitem__(rnd_idx)
- return outs
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index 2a2ac2dec..ae916b2ee 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -1476,4 +1476,33 @@ class CTLabelEncode(object):
data['polys'] = boxes
data['texts'] = txts
- return data
\ No newline at end of file
+ return data
+
+
+class SeqLabelEncode(BaseRecLabelEncode):
+ def __init__(self,
+ character_dict_path,
+ max_text_length=100,
+ use_space_char=False,
+ lower=True,
+ **kwargs):
+ super(SeqLabelEncode, self).__init__(
+ max_text_length, character_dict_path, use_space_char, lower)
+
+ def encode(self, text_seq):
+ text_seq_encoded = []
+ for text in text_seq:
+ if text not in self.character:
+ continue
+ text_seq_encoded.append(self.dict.get(text))
+ if len(text_seq_encoded) == 0:
+ return None
+ return text_seq_encoded
+
+ def __call__(self, data):
+ label = data['label']
+ if isinstance(label, str):
+ label = label.strip().split()
+ label.append(self.end_str)
+ data['label'] = self.encode(label)
+ return data
diff --git a/test_tipc/configs/rec_d28_can/rec_d28_can.yml b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
index aeaccb6b0..ac7b07712 100644
--- a/test_tipc/configs/rec_d28_can/rec_d28_can.yml
+++ b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
@@ -5,14 +5,14 @@ Global:
print_batch_step: 10
save_model_dir: ./output/rec/can/
save_epoch_step: 1
- # evaluation is run every 1105 iterations
+ # evaluation is run every 1105 iterations (1 epoch)(batch_size = 8)
eval_batch_step: [0, 1105]
cal_metric_during_train: True
- pretrained_model: ./output/rec/can/CAN
- checkpoints: ./output/rec/can/CAN
- save_inference_dir: ./inference/rec_d28_can/
+ pretrained_model:
+ checkpoints:
+ save_inference_dir:
use_visualdl: False
- infer_img: doc/imgs_hme/hme_01.jpeg
+ infer_img: doc/datasets/crohme_demo/hme_00.jpg
# for data or label process
character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
max_text_length: 36
@@ -75,37 +75,43 @@ Metric:
Train:
dataset:
- name: HMERDataSet
- data_dir: ./train_data/CROHME/training/images/
+ name: PGDataSet
+ data_dir: ./train_data/CROHME_lite/training/images/
transforms:
- DecodeImage:
channel_first: False
- GrayImageChannelFormat:
normalize: True
inverse: True
+ - SeqLabelEncode:
+ character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+ lower: False
- KeepKeys:
keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME/training/labels.json"]
+ label_file_list: ["./train_data/CROHME_lite/training/labels.txt"]
loader:
shuffle: True
- batch_size_per_card: 2
- drop_last: True
- num_workers: 1
+ batch_size_per_card: 8
+ drop_last: False
+ num_workers: 4
collate_fn: DyMaskCollator
Eval:
dataset:
- name: HMERDataSet
- data_dir: ./train_data/CROHME/evaluation/images/
+ name: PGDataSet
+ data_dir: ./train_data/CROHME_lite/evaluation/images/
transforms:
- DecodeImage:
channel_first: False
- GrayImageChannelFormat:
normalize: True
inverse: True
+ - SeqLabelEncode:
+ character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+ lower: False
- KeepKeys:
keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME/evaluation/labels.json"]
+ label_file_list: ["./train_data/CROHME_lite/evaluation/labels.txt"]
loader:
shuffle: False
drop_last: False
diff --git a/test_tipc/configs/rec_d28_can/train_infer_python.txt b/test_tipc/configs/rec_d28_can/train_infer_python.txt
index be50c5980..731d327cd 100644
--- a/test_tipc/configs/rec_d28_can/train_infer_python.txt
+++ b/test_tipc/configs/rec_d28_can/train_infer_python.txt
@@ -1,6 +1,6 @@
===========================train_params===========================
model_name:rec_d28_can
-python:python
+python:python3.7
gpu_list:0|0,1
Global.use_gpu:True|True
Global.auto_cast:null
@@ -9,7 +9,7 @@ Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=8
Global.pretrained_model:null
train_model_name:latest
-train_infer_img_dir:./doc/imgs_hme
+train_infer_img_dir:./doc/datasets/crohme_demo
null:null
##
trainer:norm_train
@@ -37,15 +37,15 @@ export2:null
train_model:./inference/rec_d28_can_train/best_accuracy
infer_export:tools/export_model.py -c test_tipc/configs/rec_d28_can/rec_d28_can.yml -o
infer_quant:False
-inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/dict/latex_symbol_dict.txt --rec_image_shape="1,100,100" --rec_algorithm="CAN"
+inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/dict/latex_symbol_dict.txt --rec_algorithm="CAN"
--use_gpu:True|False
--enable_mkldnn:False
--cpu_threads:6
--rec_batch_num:1
--use_tensorrt:False
--precision:fp32
---rec_model_dir:./output/
---image_dir:./doc/imgs_hme
+--rec_model_dir:
+--image_dir:./doc/datasets/crohme_demo
--save_log_path:./test/output/
--benchmark:True
null:null
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 5ca426e28..4aab17019 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -257,6 +257,13 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate
cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../
fi
+ if [ ${model_name} == "rec_d28_can" ]; then
+ wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/contribution/can_train.tar --no-check-certificate
+ cd ./pretrain_models/ && tar xf can_train.tar && cd ../
+ wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate
+ cd ./train_data/ && tar xf CROHME_lite.tar && cd ../
+
+ fi
if [ ${model_name} == "layoutxlm_ser" ]; then
${python_name} -m pip install -r ppstructure/kie/requirements.txt
${python_name} -m pip install opencv-python -U
diff --git a/test_tipc/readme.md b/test_tipc/readme.md
index 1442ee1c8..9f02c2e30 100644
--- a/test_tipc/readme.md
+++ b/test_tipc/readme.md
@@ -44,6 +44,7 @@
| SAST |det_r50_vd_sast_totaltext_v2.0 | 检测 | 支持 | 多机多卡
混合精度 | - | - |
| Rosetta|rec_mv3_none_none_ctc_v2.0 | 识别 | 支持 | 多机多卡
混合精度 | - | - |
| Rosetta|rec_r34_vd_none_none_ctc_v2.0 | 识别 | 支持 | 多机多卡
混合精度 | - | - |
+| CAN |rec_d28_can | 识别 | 支持 | 多机多卡
混合精度 | - | - |
| CRNN |rec_mv3_none_bilstm_ctc_v2.0 | 识别 | 支持 | 多机多卡
混合精度 | - | - |
| CRNN |rec_r34_vd_none_bilstm_ctc_v2.0| 识别 | 支持 | 多机多卡
混合精度 | - | - |
| StarNet|rec_mv3_tps_bilstm_ctc_v2.0 | 识别 | 支持 | 多机多卡
混合精度 | - | - |
diff --git a/tools/program.py b/tools/program.py
index c491247a6..a0594e950 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -544,7 +544,7 @@ def eval(model,
elif model_type in ['sr']:
eval_class(preds, batch_numpy)
elif model_type in ['can']:
- eval_class(preds[0], batch_numpy[2:], epoch_reset=False)
+ eval_class(preds[0], batch_numpy[2:], epoch_reset=(idx == 0))
else:
post_result = post_process_class(preds, batch_numpy[1])
eval_class(post_result, batch_numpy)
From 9606bec16a7d7b8a1abcfa113a69d40f837b4cc5 Mon Sep 17 00:00:00 2001
From: andyjpaddle
Date: Mon, 17 Oct 2022 07:41:36 +0000
Subject: [PATCH 07/20] fix visionlan default dict
---
doc/doc_ch/algorithm_rec_visionlan.md | 2 +-
doc/doc_en/algorithm_rec_visionlan_en.md | 2 +-
ppocr/data/imaug/label_ops.py | 9 +++++++--
ppocr/postprocess/rec_postprocess.py | 10 ++++++++--
4 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/doc/doc_ch/algorithm_rec_visionlan.md b/doc/doc_ch/algorithm_rec_visionlan.md
index df039491d..84b5ef682 100644
--- a/doc/doc_ch/algorithm_rec_visionlan.md
+++ b/doc/doc_ch/algorithm_rec_visionlan.md
@@ -139,7 +139,7 @@ Predicts of ./doc/imgs_words/en/word_2.png:('yourself', 0.9999493)
## 5. FAQ
1. MJSynth和SynthText两种数据集来自于[VisionLAN源repo](https://github.com/wangyuxin87/VisionLAN) 。
-2. 我们使用VisionLAN作者提供的预训练模型进行finetune训练。
+2. 我们使用VisionLAN作者提供的预训练模型进行finetune训练,预训练模型配套字典为'ppocr/utils/ic15_dict.txt'。
## 引用
diff --git a/doc/doc_en/algorithm_rec_visionlan_en.md b/doc/doc_en/algorithm_rec_visionlan_en.md
index 70c2ccc47..cf2293b3d 100644
--- a/doc/doc_en/algorithm_rec_visionlan_en.md
+++ b/doc/doc_en/algorithm_rec_visionlan_en.md
@@ -120,7 +120,7 @@ Not supported
## 5. FAQ
1. Note that the MJSynth and SynthText datasets come from [VisionLAN repo](https://github.com/wangyuxin87/VisionLAN).
-2. We use the pre-trained model provided by the VisionLAN authors for finetune training.
+2. We use the pre-trained model provided by the VisionLAN authors for finetune training. The dictionary for the pre-trained model is 'ppocr/utils/ic15_dict.txt'.
## Citation
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index 2a2ac2dec..511471c76 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -107,6 +107,7 @@ class BaseRecLabelEncode(object):
self.beg_str = "sos"
self.end_str = "eos"
self.lower = lower
+ self.use_default_dict = False
if character_dict_path is None:
logger = get_logger()
@@ -116,8 +117,11 @@ class BaseRecLabelEncode(object):
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str)
self.lower = True
+ self.use_default_dict = True
else:
self.character_str = []
+ if 'ppocr/utils/ic15_dict.txt' in character_dict_path:
+ self.use_default_dict = True
with open(character_dict_path, "rb") as fin:
lines = fin.readlines()
for line in lines:
@@ -1400,8 +1404,9 @@ class VLLabelEncode(BaseRecLabelEncode):
**kwargs):
super(VLLabelEncode, self).__init__(
max_text_length, character_dict_path, use_space_char, lower)
- self.character = self.character[10:] + self.character[
- 1:10] + [self.character[0]]
+ if self.use_default_dict:
+ self.character = self.character[10:] + self.character[
+ 1:10] + [self.character[0]]
self.dict = {}
for i, char in enumerate(self.character):
self.dict[char] = i
diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py
index 59b5254e4..98753ef7a 100644
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -26,10 +26,15 @@ class BaseRecLabelDecode(object):
self.end_str = "eos"
self.reverse = False
self.character_str = []
+ self.use_default_dict = False
+
if character_dict_path is None:
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str)
+ self.use_default_dict = True
else:
+ if 'ppocr/utils/ic15_dict.txt' in character_dict_path:
+ self.use_default_dict = True
with open(character_dict_path, "rb") as fin:
lines = fin.readlines()
for line in lines:
@@ -805,8 +810,9 @@ class VLLabelDecode(BaseRecLabelDecode):
super(VLLabelDecode, self).__init__(character_dict_path, use_space_char)
self.max_text_length = kwargs.get('max_text_length', 25)
self.nclass = len(self.character) + 1
- self.character = self.character[10:] + self.character[
- 1:10] + [self.character[0]]
+ if self.use_default_dict:
+ self.character = self.character[10:] + self.character[
+ 1:10] + [self.character[0]]
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
""" convert text-index into text-label. """
From f875556541cf4374287257a3864fcf4fb9d6bcac Mon Sep 17 00:00:00 2001
From: dorren
Date: Mon, 17 Oct 2022 17:18:14 +0800
Subject: [PATCH 08/20] update can transform method and add copyright info for
new file
---
configs/rec/rec_d28_can.yml | 23 +++++++------
ppocr/data/imaug/__init__.py | 2 +-
ppocr/data/imaug/label_ops.py | 4 +--
ppocr/data/imaug/operators.py | 24 ++++++++++++++
ppocr/data/imaug/rec_img_aug.py | 30 -----------------
ppocr/losses/rec_can_loss.py | 18 +++++++++++
ppocr/modeling/backbones/rec_densenet.py | 32 +++++++++++--------
ppocr/modeling/heads/rec_can_head.py | 27 +++++++++++++++-
ppocr/postprocess/__init__.py | 4 +--
ppocr/postprocess/rec_postprocess.py | 4 +--
test_tipc/configs/rec_d28_can/rec_d28_can.yml | 31 ++++++++++--------
.../rec_d28_can/train_infer_python.txt | 4 +--
test_tipc/prepare.sh | 1 -
tools/infer/predict_rec.py | 2 +-
14 files changed, 129 insertions(+), 77 deletions(-)
diff --git a/configs/rec/rec_d28_can.yml b/configs/rec/rec_d28_can.yml
index 9fe936ae1..2149100da 100644
--- a/configs/rec/rec_d28_can.yml
+++ b/configs/rec/rec_d28_can.yml
@@ -42,7 +42,6 @@ Architecture:
bottleneck: True
use_dropout: True
input_channel: 1
-
Head:
name: CANHead
in_channel: 684
@@ -66,8 +65,8 @@ Loss:
name: CANLoss
PostProcess:
- name: SeqLabelDecode
- character: 111
+ name: CANLabelDecode
+ character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
Metric:
name: CANMetric
@@ -75,15 +74,18 @@ Metric:
Train:
dataset:
- name: PGDataSet
+ name: SimpleDataSet
data_dir: ./train_data/CROHME/training/images/
transforms:
- DecodeImage:
channel_first: False
+ - NormalizeImage:
+ mean: [0,0,0]
+ std: [1,1,1]
+ order: 'hwc'
- GrayImageChannelFormat:
- normalize: True
inverse: True
- - SeqLabelEncode:
+ - CANLabelEncode:
character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
lower: False
- KeepKeys:
@@ -98,15 +100,18 @@ Train:
Eval:
dataset:
- name: PGDataSet
+ name: SimpleDataSet
data_dir: ./train_data/CROHME/evaluation/images/
transforms:
- DecodeImage:
channel_first: False
+ - NormalizeImage:
+ mean: [0,0,0]
+ std: [1,1,1]
+ order: 'hwc'
- GrayImageChannelFormat:
- normalize: True
inverse: True
- - SeqLabelEncode:
+ - CANLabelEncode:
character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
lower: False
- KeepKeys:
diff --git a/ppocr/data/imaug/__init__.py b/ppocr/data/imaug/__init__.py
index a64092286..93d97446d 100644
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -27,7 +27,7 @@ from .make_pse_gt import MakePseGt
from .rec_img_aug import BaseDataAugmentation, RecAug, RecConAug, RecResizeImg, ClsResizeImg, \
SRNRecResizeImg, GrayRecResizeImg, SARRecResizeImg, PRENResizeImg, \
ABINetRecResizeImg, SVTRRecResizeImg, ABINetRecAug, VLRecResizeImg, SPINRecResizeImg, RobustScannerRecResizeImg, \
- RFLRecResizeImg, GrayImageChannelFormat
+ RFLRecResizeImg
from .ssl_img_aug import SSLRotateResize
from .randaugment import RandAugment
from .copy_paste import CopyPaste
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index ae916b2ee..e1389639b 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -1479,14 +1479,14 @@ class CTLabelEncode(object):
return data
-class SeqLabelEncode(BaseRecLabelEncode):
+class CANLabelEncode(BaseRecLabelEncode):
def __init__(self,
character_dict_path,
max_text_length=100,
use_space_char=False,
lower=True,
**kwargs):
- super(SeqLabelEncode, self).__init__(
+ super(CANLabelEncode, self).__init__(
max_text_length, character_dict_path, use_space_char, lower)
def encode(self, text_seq):
diff --git a/ppocr/data/imaug/operators.py b/ppocr/data/imaug/operators.py
index 5e84b1aac..4ff2d29ed 100644
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -498,3 +498,27 @@ class ResizeNormalize(object):
img_numpy = np.array(img).astype("float32")
img_numpy = img_numpy.transpose((2, 0, 1)) / 255
return img_numpy
+
+
+class GrayImageChannelFormat(object):
+ """
+ format gray scale image's channel: (3,h,w) -> (1,h,w)
+ Args:
+ inverse: inverse gray image
+ """
+
+ def __init__(self, inverse=False, **kwargs):
+ self.inverse = inverse
+
+ def __call__(self, data):
+ img = data['image']
+ img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ img_expanded = np.expand_dims(img_single_channel, 0)
+
+ if self.inverse:
+ data['image'] = np.abs(img_expanded - 1)
+ else:
+ data['image'] = img_expanded
+
+ data['src_image'] = img
+ return data
\ No newline at end of file
diff --git a/ppocr/data/imaug/rec_img_aug.py b/ppocr/data/imaug/rec_img_aug.py
index bc7fbc604..e22153bde 100644
--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -465,36 +465,6 @@ class RobustScannerRecResizeImg(object):
return data
-class GrayImageChannelFormat(object):
- """
- format gray scale image's channel: (3,h,w) -> (1,h,w)
- Args:
- normalize: True/False
- when True convert image dynamic range [0,255]->[0,1]
- inverse: inverse gray image
- """
-
- def __init__(self, normalize=True, inverse=False, **kwargs):
- self.normalize = normalize
- self.inverse = inverse
-
- def __call__(self, data):
- img = data['image']
- img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- img_single_channel = np.expand_dims(img_single_channel, 0)
-
- if self.normalize:
- img_single_channel = img_single_channel / 255.0
-
- if self.inverse:
- data['image'] = np.abs(img_single_channel - 1).astype('float32')
- else:
- data['image'] = img_single_channel.astype('float32')
-
- data['src_image'] = img
- return data
-
-
def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25):
imgC, imgH, imgW_min, imgW_max = image_shape
h = img.shape[0]
diff --git a/ppocr/losses/rec_can_loss.py b/ppocr/losses/rec_can_loss.py
index a6c655e0e..227e17f5e 100644
--- a/ppocr/losses/rec_can_loss.py
+++ b/ppocr/losses/rec_can_loss.py
@@ -1,3 +1,21 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/LBH1024/CAN/models/can.py
+"""
+
import paddle
import paddle.nn as nn
import numpy as np
diff --git a/ppocr/modeling/backbones/rec_densenet.py b/ppocr/modeling/backbones/rec_densenet.py
index d3391d408..b9fab765b 100644
--- a/ppocr/modeling/backbones/rec_densenet.py
+++ b/ppocr/modeling/backbones/rec_densenet.py
@@ -1,3 +1,21 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
import math
import paddle
import paddle.nn as nn
@@ -5,14 +23,6 @@ import paddle.nn.functional as F
class Bottleneck(nn.Layer):
- '''
- ratio: 16
- growthRate: 24
- reduction: 0.5
- bottleneck: True
- use_dropout: True
- '''
-
def __init__(self, nChannels, growthRate, use_dropout):
super(Bottleneck, self).__init__()
interChannels = 4 * growthRate
@@ -78,11 +88,7 @@ class DenseNet(nn.Layer):
def __init__(self, growthRate, reduction, bottleneck, use_dropout,
input_channel, **kwargs):
super(DenseNet, self).__init__()
- '''
- ratio: 16
- growthRate: 24
- reduction: 0.5
- '''
+
nDenseBlocks = 16
nChannels = 2 * growthRate
diff --git a/ppocr/modeling/heads/rec_can_head.py b/ppocr/modeling/heads/rec_can_head.py
index afd78ee9d..732dbfe2d 100644
--- a/ppocr/modeling/heads/rec_can_head.py
+++ b/ppocr/modeling/heads/rec_can_head.py
@@ -1,4 +1,29 @@
-from turtle import forward
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/LBH1024/CAN/models/can.py
+https://github.com/LBH1024/CAN/models/counting.py
+https://github.com/LBH1024/CAN/models/decoder.py
+https://github.com/LBH1024/CAN/models/attention.py
+
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
import paddle.nn as nn
import paddle
import math
diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py
index e86a7ea70..36a3152f2 100644
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -37,7 +37,7 @@ from .table_postprocess import TableMasterLabelDecode, TableLabelDecode
from .picodet_postprocess import PicoDetPostProcess
from .ct_postprocess import CTPostProcess
from .drrg_postprocess import DRRGPostprocess
-from .rec_postprocess import SeqLabelDecode
+from .rec_postprocess import CANLabelDecode
def build_post_process(config, global_config=None):
@@ -52,7 +52,7 @@ def build_post_process(config, global_config=None):
'TableMasterLabelDecode', 'SPINLabelDecode',
'DistillationSerPostProcess', 'DistillationRePostProcess',
'VLLabelDecode', 'PicoDetPostProcess', 'CTPostProcess',
- 'RFLLabelDecode', 'DRRGPostprocess', 'SeqLabelDecode'
+ 'RFLLabelDecode', 'DRRGPostprocess', 'CANLabelDecode'
]
if config['name'] == 'PSEPostProcess':
diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py
index 4d88c278e..0664ac6d9 100644
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -899,12 +899,12 @@ class VLLabelDecode(BaseRecLabelDecode):
return text, label
-class SeqLabelDecode(BaseRecLabelDecode):
+class CANLabelDecode(BaseRecLabelDecode):
""" Convert between latex-symbol and symbol-index """
def __init__(self, character_dict_path=None, use_space_char=False,
**kwargs):
- super(SeqLabelDecode, self).__init__(character_dict_path,
+ super(CANLabelDecode, self).__init__(character_dict_path,
use_space_char)
def decode(self, text_index, preds_prob=None):
diff --git a/test_tipc/configs/rec_d28_can/rec_d28_can.yml b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
index ac7b07712..2149100da 100644
--- a/test_tipc/configs/rec_d28_can/rec_d28_can.yml
+++ b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
@@ -42,7 +42,6 @@ Architecture:
bottleneck: True
use_dropout: True
input_channel: 1
-
Head:
name: CANHead
in_channel: 684
@@ -66,8 +65,8 @@ Loss:
name: CANLoss
PostProcess:
- name: SeqLabelDecode
- character: 111
+ name: CANLabelDecode
+ character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
Metric:
name: CANMetric
@@ -75,20 +74,23 @@ Metric:
Train:
dataset:
- name: PGDataSet
- data_dir: ./train_data/CROHME_lite/training/images/
+ name: SimpleDataSet
+ data_dir: ./train_data/CROHME/training/images/
transforms:
- DecodeImage:
channel_first: False
+ - NormalizeImage:
+ mean: [0,0,0]
+ std: [1,1,1]
+ order: 'hwc'
- GrayImageChannelFormat:
- normalize: True
inverse: True
- - SeqLabelEncode:
+ - CANLabelEncode:
character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
lower: False
- KeepKeys:
keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME_lite/training/labels.txt"]
+ label_file_list: ["./train_data/CROHME/training/labels.txt"]
loader:
shuffle: True
batch_size_per_card: 8
@@ -98,20 +100,23 @@ Train:
Eval:
dataset:
- name: PGDataSet
- data_dir: ./train_data/CROHME_lite/evaluation/images/
+ name: SimpleDataSet
+ data_dir: ./train_data/CROHME/evaluation/images/
transforms:
- DecodeImage:
channel_first: False
+ - NormalizeImage:
+ mean: [0,0,0]
+ std: [1,1,1]
+ order: 'hwc'
- GrayImageChannelFormat:
- normalize: True
inverse: True
- - SeqLabelEncode:
+ - CANLabelEncode:
character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
lower: False
- KeepKeys:
keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME_lite/evaluation/labels.txt"]
+ label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
loader:
shuffle: False
drop_last: False
diff --git a/test_tipc/configs/rec_d28_can/train_infer_python.txt b/test_tipc/configs/rec_d28_can/train_infer_python.txt
index 731d327cd..1794e78cf 100644
--- a/test_tipc/configs/rec_d28_can/train_infer_python.txt
+++ b/test_tipc/configs/rec_d28_can/train_infer_python.txt
@@ -1,7 +1,7 @@
===========================train_params===========================
model_name:rec_d28_can
-python:python3.7
-gpu_list:0|0,1
+python:python
+gpu_list:0|0
Global.use_gpu:True|True
Global.auto_cast:null
Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=240
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 4aab17019..dc0d2fdb0 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -262,7 +262,6 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
cd ./pretrain_models/ && tar xf can_train.tar && cd ../
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate
cd ./train_data/ && tar xf CROHME_lite.tar && cd ../
-
fi
if [ ${model_name} == "layoutxlm_ser" ]; then
${python_name} -m pip install -r ppstructure/kie/requirements.txt
diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py
index c1604798e..b3ef557c0 100755
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -111,7 +111,7 @@ class TextRecognizer(object):
elif self.rec_algorithm == "CAN":
self.inverse = args.rec_image_inverse
postprocess_params = {
- 'name': 'SeqLabelDecode',
+ 'name': 'CANLabelDecode',
"character_dict_path": args.rec_char_dict_path,
"use_space_char": args.use_space_char
}
From c44f3bc78ce777c8efc8ef10a344d434bbd73338 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Mon, 17 Oct 2022 09:51:32 +0000
Subject: [PATCH 09/20] add pse tipc
---
test_tipc/prepare.sh | 3 +++
1 file changed, 3 insertions(+)
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 5ca426e28..62cda1d8a 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -241,6 +241,9 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
if [ ${model_name} == "ch_ppocr_mobile_v2_0_det_FPGM" ]; then
${python_name} -m pip install paddleslim
fi
+ if [ ${model_name} == "det_r50_vd_pse_v2_0" ]; then
+ wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams --no-check-certificate
+ fi
if [ ${model_name} == "det_mv3_east_v2_0" ]; then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar --no-check-certificate
cd ./pretrain_models/ && tar xf det_mv3_east_v2.0_train.tar && cd ../
From e410d2e1c902c747200e0231377c7daed54a8db7 Mon Sep 17 00:00:00 2001
From: dorren
Date: Mon, 17 Oct 2022 19:16:02 +0800
Subject: [PATCH 10/20] update tipc config
---
test_tipc/configs/rec_d28_can/rec_d28_can.yml | 8 ++++----
test_tipc/configs/rec_d28_can/train_infer_python.txt | 4 ++--
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/test_tipc/configs/rec_d28_can/rec_d28_can.yml b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
index 2149100da..92917c20d 100644
--- a/test_tipc/configs/rec_d28_can/rec_d28_can.yml
+++ b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
@@ -75,7 +75,7 @@ Metric:
Train:
dataset:
name: SimpleDataSet
- data_dir: ./train_data/CROHME/training/images/
+ data_dir: ./train_data/CROHME_lite/training/images/
transforms:
- DecodeImage:
channel_first: False
@@ -90,7 +90,7 @@ Train:
lower: False
- KeepKeys:
keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME/training/labels.txt"]
+ label_file_list: ["./train_data/CROHME_lite/training/labels.txt"]
loader:
shuffle: True
batch_size_per_card: 8
@@ -101,7 +101,7 @@ Train:
Eval:
dataset:
name: SimpleDataSet
- data_dir: ./train_data/CROHME/evaluation/images/
+ data_dir: ./train_data/CROHME_lite/evaluation/images/
transforms:
- DecodeImage:
channel_first: False
@@ -116,7 +116,7 @@ Eval:
lower: False
- KeepKeys:
keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
+ label_file_list: ["./train_data/CROHME_lite/evaluation/labels.txt"]
loader:
shuffle: False
drop_last: False
diff --git a/test_tipc/configs/rec_d28_can/train_infer_python.txt b/test_tipc/configs/rec_d28_can/train_infer_python.txt
index 1794e78cf..731d327cd 100644
--- a/test_tipc/configs/rec_d28_can/train_infer_python.txt
+++ b/test_tipc/configs/rec_d28_can/train_infer_python.txt
@@ -1,7 +1,7 @@
===========================train_params===========================
model_name:rec_d28_can
-python:python
-gpu_list:0|0
+python:python3.7
+gpu_list:0|0,1
Global.use_gpu:True|True
Global.auto_cast:null
Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=240
From ea557c50eef30dc2f8f5a76d9920877ed4e159c8 Mon Sep 17 00:00:00 2001
From: dorren
Date: Mon, 17 Oct 2022 19:58:35 +0800
Subject: [PATCH 11/20] update config
---
configs/rec/rec_d28_can.yml | 7 ++-----
ppocr/modeling/backbones/rec_densenet.py | 5 +++++
test_tipc/configs/rec_d28_can/rec_d28_can.yml | 7 ++-----
3 files changed, 9 insertions(+), 10 deletions(-)
diff --git a/configs/rec/rec_d28_can.yml b/configs/rec/rec_d28_can.yml
index 2149100da..7c3b0fd3d 100644
--- a/configs/rec/rec_d28_can.yml
+++ b/configs/rec/rec_d28_can.yml
@@ -66,7 +66,6 @@ Loss:
PostProcess:
name: CANLabelDecode
- character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
Metric:
name: CANMetric
@@ -76,6 +75,7 @@ Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/CROHME/training/images/
+ label_file_list: ["./train_data/CROHME/training/labels.txt"]
transforms:
- DecodeImage:
channel_first: False
@@ -86,11 +86,9 @@ Train:
- GrayImageChannelFormat:
inverse: True
- CANLabelEncode:
- character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
lower: False
- KeepKeys:
keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME/training/labels.txt"]
loader:
shuffle: True
batch_size_per_card: 8
@@ -102,6 +100,7 @@ Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/CROHME/evaluation/images/
+ label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
transforms:
- DecodeImage:
channel_first: False
@@ -112,11 +111,9 @@ Eval:
- GrayImageChannelFormat:
inverse: True
- CANLabelEncode:
- character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
lower: False
- KeepKeys:
keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
loader:
shuffle: False
drop_last: False
diff --git a/ppocr/modeling/backbones/rec_densenet.py b/ppocr/modeling/backbones/rec_densenet.py
index b9fab765b..65c5fa4f2 100644
--- a/ppocr/modeling/backbones/rec_densenet.py
+++ b/ppocr/modeling/backbones/rec_densenet.py
@@ -11,6 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+"""
+This code is refer from:
+https://github.com/LBH1024/CAN/models/densenet.py
+
+"""
from __future__ import absolute_import
from __future__ import division
diff --git a/test_tipc/configs/rec_d28_can/rec_d28_can.yml b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
index 92917c20d..550186586 100644
--- a/test_tipc/configs/rec_d28_can/rec_d28_can.yml
+++ b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
@@ -66,7 +66,6 @@ Loss:
PostProcess:
name: CANLabelDecode
- character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
Metric:
name: CANMetric
@@ -76,6 +75,7 @@ Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/CROHME_lite/training/images/
+ label_file_list: ["./train_data/CROHME_lite/training/labels.txt"]
transforms:
- DecodeImage:
channel_first: False
@@ -86,11 +86,9 @@ Train:
- GrayImageChannelFormat:
inverse: True
- CANLabelEncode:
- character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
lower: False
- KeepKeys:
keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME_lite/training/labels.txt"]
loader:
shuffle: True
batch_size_per_card: 8
@@ -102,6 +100,7 @@ Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/CROHME_lite/evaluation/images/
+ label_file_list: ["./train_data/CROHME_lite/evaluation/labels.txt"]
transforms:
- DecodeImage:
channel_first: False
@@ -112,11 +111,9 @@ Eval:
- GrayImageChannelFormat:
inverse: True
- CANLabelEncode:
- character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
lower: False
- KeepKeys:
keep_keys: ['image', 'label']
- label_file_list: ["./train_data/CROHME_lite/evaluation/labels.txt"]
loader:
shuffle: False
drop_last: False
From 273068527f9c7217ff9af34df4a20dbd378b4841 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Tue, 18 Oct 2022 07:31:07 +0000
Subject: [PATCH 12/20] only in DRRG import DRRGHead
---
ppocr/modeling/heads/__init__.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/ppocr/modeling/heads/__init__.py b/ppocr/modeling/heads/__init__.py
index 63002140c..c203af016 100755
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -24,7 +24,6 @@ def build_head(config):
from .det_fce_head import FCEHead
from .e2e_pg_head import PGHead
from .det_ct_head import CT_Head
- from .det_drrg_head import DRRGHead
# rec head
from .rec_ctc_head import CTCHead
@@ -59,6 +58,10 @@ def build_head(config):
'DRRGHead'
]
+ if config['name'] == 'DRRGHead':
+ from .det_drrg_head import DRRGHead
+ support_dict.append('DRRGHead')
+
#table head
module_name = config.pop('name')
From 4078b0fee8c3df6ef223789731e348dea7462a49 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Tue, 18 Oct 2022 10:03:11 +0000
Subject: [PATCH 13/20] fix pact bug in slanet
---
ppstructure/kie/requirements.txt | 2 +-
ppstructure/table/predict_table.py | 18 +++++++++---------
.../layoutxlm_ser/train_pact_infer_python.txt | 4 ++--
.../configs/slanet/train_pact_infer_python.txt | 2 +-
test_tipc/prepare.sh | 3 ++-
5 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/ppstructure/kie/requirements.txt b/ppstructure/kie/requirements.txt
index 11fa98da1..6cfcba764 100644
--- a/ppstructure/kie/requirements.txt
+++ b/ppstructure/kie/requirements.txt
@@ -4,4 +4,4 @@ seqeval
pypandoc
attrdict
python_docx
-https://paddleocr.bj.bcebos.com/ppstructure/whl/paddlenlp-2.3.0.dev0-py3-none-any.whl
+paddlenlp>=2.4.1
diff --git a/ppstructure/table/predict_table.py b/ppstructure/table/predict_table.py
index aeec66dec..fdf611b7f 100644
--- a/ppstructure/table/predict_table.py
+++ b/ppstructure/table/predict_table.py
@@ -58,6 +58,7 @@ def expand(pix, det_box, shape):
class TableSystem(object):
def __init__(self, args, text_detector=None, text_recognizer=None):
+ self.args = args
if not args.show_log:
logger.setLevel(logging.INFO)
@@ -99,13 +100,18 @@ class TableSystem(object):
result = dict()
time_dict = {'det': 0, 'rec': 0, 'table': 0, 'all': 0, 'match': 0}
start = time.time()
-
+ if self.args.benchmark:
+ self.autolog.times.start()
structure_res, elapse = self._structure(copy.deepcopy(img))
+ if self.benchmark:
+ self.autolog.times.stamp()
result['cell_bbox'] = structure_res[1].tolist()
time_dict['table'] = elapse
dt_boxes, rec_res, det_elapse, rec_elapse = self._ocr(
copy.deepcopy(img))
+ if self.benchmark:
+ self.autolog.times.stamp()
time_dict['det'] = det_elapse
time_dict['rec'] = rec_elapse
@@ -118,24 +124,18 @@ class TableSystem(object):
toc = time.time()
time_dict['match'] = toc - tic
result['html'] = pred_html
- if self.benchmark:
- self.autolog.times.end(stamp=True)
end = time.time()
time_dict['all'] = end - start
if self.benchmark:
- self.autolog.times.stamp()
+ self.autolog.times.end(stamp=True)
return result, time_dict
def _structure(self, img):
- if self.benchmark:
- self.autolog.times.start()
structure_res, elapse = self.table_structurer(copy.deepcopy(img))
return structure_res, elapse
def _ocr(self, img):
h, w = img.shape[:2]
- if self.benchmark:
- self.autolog.times.stamp()
dt_boxes, det_elapse = self.text_detector(copy.deepcopy(img))
dt_boxes = sorted_boxes(dt_boxes)
@@ -233,7 +233,7 @@ def main(args):
f_html.close()
if args.benchmark:
- text_sys.autolog.report()
+ table_sys.autolog.report()
if __name__ == "__main__":
diff --git a/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
index fbf2a8802..c19b4b73a 100644
--- a/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
+++ b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
@@ -7,14 +7,14 @@ Global.auto_cast:fp32
Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=4|whole_train_whole_infer=8
-Architecture.Backbone.checkpoints:pretrain_models/ser_LayoutXLM_xfun_zh
+Architecture.Backbone.pretrained:pretrain_models/ser_LayoutXLM_xfun_zh
train_model_name:latest
train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg
null:null
##
trainer:pact_train
norm_train:null
-pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o
+pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o Global.eval_batch_step=[2000,10]
fpgm_train:null
distill_train:null
null:null
diff --git a/test_tipc/configs/slanet/train_pact_infer_python.txt b/test_tipc/configs/slanet/train_pact_infer_python.txt
index 42ed0cf59..98546afa6 100644
--- a/test_tipc/configs/slanet/train_pact_infer_python.txt
+++ b/test_tipc/configs/slanet/train_pact_infer_python.txt
@@ -34,7 +34,7 @@ distill_export:null
export1:null
export2:null
##
-infer_model:./inference/en_ppocr_mobile_v2.0_table_structure_infer
+infer_model:./inference/en_ppstructure_mobile_v2.0_SLANet_infer
infer_export:null
infer_quant:True
inference:ppstructure/table/predict_table.py --det_model_dir=./inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=./inference/en_ppocr_mobile_v2.0_table_rec_infer --rec_char_dict_path=./ppocr/utils/dict/table_dict.txt --table_char_dict_path=./ppocr/utils/dict/table_structure_dict.txt --image_dir=./ppstructure/docs/table/table.jpg --det_limit_side_len=736 --det_limit_type=min --output ./output/table
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 62cda1d8a..da6dfecad 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -146,6 +146,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
python_name=${array[0]}
${python_name} -m pip install -r requirements.txt
${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
+ ${python_name} -m pip install paddleslim==2.3.4
# pretrain lite train data
wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate
@@ -260,7 +261,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate
cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../
fi
- if [ ${model_name} == "layoutxlm_ser" ]; then
+ if [[ ${model_name} =~ "layoutxlm_ser" ]]; then
${python_name} -m pip install -r ppstructure/kie/requirements.txt
${python_name} -m pip install opencv-python -U
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
From 4cf04cbee88acb173706c6ea8ad59a07e1bf2ecb Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Wed, 19 Oct 2022 04:02:01 +0000
Subject: [PATCH 14/20] fix table recogition benckmark error
---
ppstructure/table/predict_structure.py | 32 +++++++++++++++++++
ppstructure/table/predict_table.py | 44 +++++---------------------
2 files changed, 40 insertions(+), 36 deletions(-)
diff --git a/ppstructure/table/predict_structure.py b/ppstructure/table/predict_structure.py
index 0bf100852..08e381a84 100755
--- a/ppstructure/table/predict_structure.py
+++ b/ppstructure/table/predict_structure.py
@@ -68,6 +68,7 @@ def build_pre_process_list(args):
class TableStructurer(object):
def __init__(self, args):
+ self.args = args
self.use_onnx = args.use_onnx
pre_process_list = build_pre_process_list(args)
if args.table_algorithm not in ['TableMaster']:
@@ -89,8 +90,31 @@ class TableStructurer(object):
self.predictor, self.input_tensor, self.output_tensors, self.config = \
utility.create_predictor(args, 'table', logger)
+ if args.benchmark:
+ import auto_log
+ pid = os.getpid()
+ gpu_id = utility.get_infer_gpuid()
+ self.autolog = auto_log.AutoLogger(
+ model_name="table",
+ model_precision=args.precision,
+ batch_size=1,
+ data_shape="dynamic",
+ save_path=None, #args.save_log_path,
+ inference_config=self.config,
+ pids=pid,
+ process_name=None,
+ gpu_ids=gpu_id if args.use_gpu else None,
+ time_keys=[
+ 'preprocess_time', 'inference_time', 'postprocess_time'
+ ],
+ warmup=0,
+ logger=logger)
+
def __call__(self, img):
starttime = time.time()
+ if self.args.benchmark:
+ self.autolog.times.start()
+
ori_im = img.copy()
data = {'image': img}
data = transform(data, self.preprocess_op)
@@ -99,6 +123,8 @@ class TableStructurer(object):
return None, 0
img = np.expand_dims(img, axis=0)
img = img.copy()
+ if self.args.benchmark:
+ self.autolog.times.stamp()
if self.use_onnx:
input_dict = {}
input_dict[self.input_tensor.name] = img
@@ -110,6 +136,8 @@ class TableStructurer(object):
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
outputs.append(output)
+ if self.args.benchmark:
+ self.autolog.times.stamp()
preds = {}
preds['structure_probs'] = outputs[1]
@@ -125,6 +153,8 @@ class TableStructurer(object):
'', '', ''
] + structure_str_list + ['
', '', '']
elapse = time.time() - starttime
+ if self.args.benchmark:
+ self.autolog.times.end(stamp=True)
return (structure_str_list, bbox_list), elapse
@@ -164,6 +194,8 @@ def main(args):
total_time += elapse
count += 1
logger.info("Predict time of {}: {}".format(image_file, elapse))
+ if args.benchmark:
+ table_structurer.autolog.report()
if __name__ == "__main__":
diff --git a/ppstructure/table/predict_table.py b/ppstructure/table/predict_table.py
index fdf611b7f..8f9c71749 100644
--- a/ppstructure/table/predict_table.py
+++ b/ppstructure/table/predict_table.py
@@ -14,7 +14,6 @@
import os
import sys
-import subprocess
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
@@ -61,57 +60,31 @@ class TableSystem(object):
self.args = args
if not args.show_log:
logger.setLevel(logging.INFO)
-
- self.text_detector = predict_det.TextDetector(
- args) if text_detector is None else text_detector
- self.text_recognizer = predict_rec.TextRecognizer(
- args) if text_recognizer is None else text_recognizer
-
+ args.benchmark = False
+ self.text_detector = predict_det.TextDetector(copy.deepcopy(
+ args)) if text_detector is None else text_detector
+ self.text_recognizer = predict_rec.TextRecognizer(copy.deepcopy(
+ args)) if text_recognizer is None else text_recognizer
+ args.benchmark = True
self.table_structurer = predict_strture.TableStructurer(args)
if args.table_algorithm in ['TableMaster']:
self.match = TableMasterMatcher()
else:
self.match = TableMatch(filter_ocr_result=True)
- self.benchmark = args.benchmark
self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor(
args, 'table', logger)
- if args.benchmark:
- import auto_log
- pid = os.getpid()
- gpu_id = utility.get_infer_gpuid()
- self.autolog = auto_log.AutoLogger(
- model_name="table",
- model_precision=args.precision,
- batch_size=1,
- data_shape="dynamic",
- save_path=None, #args.save_log_path,
- inference_config=self.config,
- pids=pid,
- process_name=None,
- gpu_ids=gpu_id if args.use_gpu else None,
- time_keys=[
- 'preprocess_time', 'inference_time', 'postprocess_time'
- ],
- warmup=0,
- logger=logger)
def __call__(self, img, return_ocr_result_in_table=False):
result = dict()
time_dict = {'det': 0, 'rec': 0, 'table': 0, 'all': 0, 'match': 0}
start = time.time()
- if self.args.benchmark:
- self.autolog.times.start()
structure_res, elapse = self._structure(copy.deepcopy(img))
- if self.benchmark:
- self.autolog.times.stamp()
result['cell_bbox'] = structure_res[1].tolist()
time_dict['table'] = elapse
dt_boxes, rec_res, det_elapse, rec_elapse = self._ocr(
copy.deepcopy(img))
- if self.benchmark:
- self.autolog.times.stamp()
time_dict['det'] = det_elapse
time_dict['rec'] = rec_elapse
@@ -126,8 +99,6 @@ class TableSystem(object):
result['html'] = pred_html
end = time.time()
time_dict['all'] = end - start
- if self.benchmark:
- self.autolog.times.end(stamp=True)
return result, time_dict
def _structure(self, img):
@@ -233,12 +204,13 @@ def main(args):
f_html.close()
if args.benchmark:
- table_sys.autolog.report()
+ table_sys.table_structurer.autolog.report()
if __name__ == "__main__":
args = parse_args()
if args.use_mp:
+ import subprocess
p_list = []
total_process_num = args.total_process_num
for process_id in range(total_process_num):
From ae9388ef574400e416bdbf18a1111d932a29a257 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Wed, 19 Oct 2022 07:03:42 +0000
Subject: [PATCH 15/20] update paddleslim version
---
test_tipc/prepare.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index da6dfecad..177857794 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -146,7 +146,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
python_name=${array[0]}
${python_name} -m pip install -r requirements.txt
${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
- ${python_name} -m pip install paddleslim==2.3.4
+ ${python_name} -m pip install paddleslim
# pretrain lite train data
wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate
From 0f58f37c160eb2487d7ebdb4595133c72b068978 Mon Sep 17 00:00:00 2001
From: dorren
Date: Wed, 19 Oct 2022 16:41:00 +0800
Subject: [PATCH 16/20] update prepare.sh
---
doc/doc_ch/algorithm_rec_can.md | 2 +-
test_tipc/prepare.sh | 6 +++++-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/doc/doc_ch/algorithm_rec_can.md b/doc/doc_ch/algorithm_rec_can.md
index 8a012b490..53e29535b 100644
--- a/doc/doc_ch/algorithm_rec_can.md
+++ b/doc/doc_ch/algorithm_rec_can.md
@@ -141,7 +141,7 @@ Predicts of ./doc/imgs_hme/hme_00.jpg:['x _ { k } x x _ { k } + y _ { k } y x _
### 4.2 C++推理部署
-由于C++预处理后处理还未支持ABINet,所以暂未支持
+由于C++预处理后处理还未支持CAN,所以暂未支持
### 4.3 Serving服务化部署
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index dc0d2fdb0..d8e6a2fc4 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -146,6 +146,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
python_name=${array[0]}
${python_name} -m pip install -r requirements.txt
${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
+ ${python_name} -m pip install paddleslim==2.3.4
# pretrain lite train data
wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate
@@ -241,6 +242,9 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
if [ ${model_name} == "ch_ppocr_mobile_v2_0_det_FPGM" ]; then
${python_name} -m pip install paddleslim
fi
+ if [ ${model_name} == "det_r50_vd_pse_v2_0" ]; then
+ wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams --no-check-certificate
+ fi
if [ ${model_name} == "det_mv3_east_v2_0" ]; then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar --no-check-certificate
cd ./pretrain_models/ && tar xf det_mv3_east_v2.0_train.tar && cd ../
@@ -263,7 +267,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate
cd ./train_data/ && tar xf CROHME_lite.tar && cd ../
fi
- if [ ${model_name} == "layoutxlm_ser" ]; then
+ if [[ ${model_name} =~ "layoutxlm_ser" ]]; then
${python_name} -m pip install -r ppstructure/kie/requirements.txt
${python_name} -m pip install opencv-python -U
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
From b22102d4c54ccaac7a79169ed431c5026b00d12e Mon Sep 17 00:00:00 2001
From: dorren
Date: Wed, 19 Oct 2022 16:46:27 +0800
Subject: [PATCH 17/20] update prepare.sh again
---
test_tipc/prepare.sh | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index d8e6a2fc4..cdc69f25a 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -261,12 +261,6 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate
cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../
fi
- if [ ${model_name} == "rec_d28_can" ]; then
- wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/contribution/can_train.tar --no-check-certificate
- cd ./pretrain_models/ && tar xf can_train.tar && cd ../
- wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate
- cd ./train_data/ && tar xf CROHME_lite.tar && cd ../
- fi
if [[ ${model_name} =~ "layoutxlm_ser" ]]; then
${python_name} -m pip install -r ppstructure/kie/requirements.txt
${python_name} -m pip install opencv-python -U
@@ -293,6 +287,12 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/ct_tipc/total_text_lite2.tar --no-check-certificate
cd ./train_data && tar xf total_text_lite2.tar && ln -s total_text_lite2 total_text && cd ../
fi
+ if [ ${model_name} == "rec_d28_can" ]; then
+ wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/contribution/can_train.tar --no-check-certificate
+ cd ./pretrain_models/ && tar xf can_train.tar && cd ../
+ wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate
+ cd ./train_data/ && tar xf CROHME_lite.tar && cd ../
+ fi
elif [ ${MODE} = "whole_train_whole_infer" ];then
wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
From db8e6f94af899d0ef472e6277ecbb0cb2c37d68b Mon Sep 17 00:00:00 2001
From: dorren
Date: Wed, 19 Oct 2022 17:04:19 +0800
Subject: [PATCH 18/20] update prepare.sh and readme
---
doc/doc_ch/algorithm_rec_can.md | 2 +-
test_tipc/prepare.sh | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/doc/doc_ch/algorithm_rec_can.md b/doc/doc_ch/algorithm_rec_can.md
index 53e29535b..4f266cb33 100644
--- a/doc/doc_ch/algorithm_rec_can.md
+++ b/doc/doc_ch/algorithm_rec_can.md
@@ -55,7 +55,7 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs
```
**注意:**
-- 我们提供的数据集,即`CROHME数据集`将手写公式存储为黑底白字的格式,若您自行准备的数据集与之相反,即以白底黑字模式存储,请在训练时做出如下修改
+- 我们提供的数据集,即[`CROHME数据集`](https://paddleocr.bj.bcebos.com/dataset/CROHME.tar)将手写公式存储为黑底白字的格式,若您自行准备的数据集与之相反,即以白底黑字模式存储,请在训练时做出如下修改
```
python3 tools/train.py -c configs/rec/rec_d28_can.yml
-o Train.dataset.transforms.GrayImageChannelFormat.inverse=False
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index cdc69f25a..9291ce8b9 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -146,7 +146,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
python_name=${array[0]}
${python_name} -m pip install -r requirements.txt
${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
- ${python_name} -m pip install paddleslim==2.3.4
+ ${python_name} -m pip install paddleslim
# pretrain lite train data
wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate
From 38290a2853dcc1b08513c8f5ed7055b8cc708705 Mon Sep 17 00:00:00 2001
From: andyj <87074272+andyjpaddle@users.noreply.github.com>
Date: Wed, 19 Oct 2022 17:12:12 +0800
Subject: [PATCH 19/20] update visionlan (#7989)
---
doc/doc_ch/algorithm_overview.md | 2 +-
doc/doc_ch/algorithm_rec_visionlan.md | 4 ++--
doc/doc_en/algorithm_overview_en.md | 2 +-
doc/doc_en/algorithm_rec_visionlan_en.md | 4 ++--
ppocr/data/imaug/label_ops.py | 7 -------
ppocr/postprocess/rec_postprocess.py | 7 -------
6 files changed, 6 insertions(+), 20 deletions(-)
diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md
index 235763d8a..44c1e117e 100755
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -102,7 +102,7 @@ PaddleOCR将**持续新增**支持OCR领域前沿算法与模型,**欢迎广
|SVTR|SVTR-Tiny| 89.25% | rec_svtr_tiny_none_ctc_en | [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/rec_svtr_tiny_none_ctc_en_train.tar) |
|ViTSTR|ViTSTR| 79.82% | rec_vitstr_none_ce | [训练模型](https://paddleocr.bj.bcebos.com/rec_vitstr_none_ce_train.tar) |
|ABINet|Resnet45| 90.75% | rec_r45_abinet | [训练模型](https://paddleocr.bj.bcebos.com/rec_r45_abinet_train.tar) |
-|VisionLAN|Resnet45| 90.30% | rec_r45_visionlan | [训练模型](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar) |
+|VisionLAN|Resnet45| 90.30% | rec_r45_visionlan | [训练模型](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar) |
|SPIN|ResNet32| 90.00% | rec_r32_gaspin_bilstm_att | [训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_r32_gaspin_bilstm_att.tar) |
|RobustScanner|ResNet31| 87.77% | rec_r31_robustscanner | [训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_r31_robustscanner.tar)|
|RFL|ResNetRFL| 88.63% | rec_resnet_rfl_att | [训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl_att_train.tar) |
diff --git a/doc/doc_ch/algorithm_rec_visionlan.md b/doc/doc_ch/algorithm_rec_visionlan.md
index 84b5ef682..b4474c29f 100644
--- a/doc/doc_ch/algorithm_rec_visionlan.md
+++ b/doc/doc_ch/algorithm_rec_visionlan.md
@@ -27,7 +27,7 @@
|模型|骨干网络|配置文件|Acc|下载链接|
| --- | --- | --- | --- | --- |
-|VisionLAN|ResNet45|[rec_r45_visionlan.yml](../../configs/rec/rec_r45_visionlan.yml)|90.3%|[预训练、训练模型](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar)|
+|VisionLAN|ResNet45|[rec_r45_visionlan.yml](../../configs/rec/rec_r45_visionlan.yml)|90.3%|[预训练、训练模型](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar)|
## 2. 环境配置
@@ -80,7 +80,7 @@ python3 tools/infer_rec.py -c configs/rec/rec_r45_visionlan.yml -o Global.infer_
### 4.1 Python推理
-首先将训练得到best模型,转换成inference model。这里以训练完成的模型为例([模型下载地址](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar)),可以使用如下命令进行转换:
+首先将训练得到best模型,转换成inference model。这里以训练完成的模型为例([模型下载地址](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar)),可以使用如下命令进行转换:
```shell
# 注意将pretrained_model的路径设置为本地路径。
diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md
index ff84b9a68..2614226e0 100755
--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -99,7 +99,7 @@ Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation r
|SVTR|SVTR-Tiny| 89.25% | rec_svtr_tiny_none_ctc_en | [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/rec_svtr_tiny_none_ctc_en_train.tar) |
|ViTSTR|ViTSTR| 79.82% | rec_vitstr_none_ce | [trained model](https://paddleocr.bj.bcebos.com/rec_vitstr_none_none_train.tar) |
|ABINet|Resnet45| 90.75% | rec_r45_abinet | [trained model](https://paddleocr.bj.bcebos.com/rec_r45_abinet_train.tar) |
-|VisionLAN|Resnet45| 90.30% | rec_r45_visionlan | [trained model](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar) |
+|VisionLAN|Resnet45| 90.30% | rec_r45_visionlan | [trained model](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar) |
|SPIN|ResNet32| 90.00% | rec_r32_gaspin_bilstm_att | [trained model](https://paddleocr.bj.bcebos.com/contribution/rec_r32_gaspin_bilstm_att.tar) |
|RobustScanner|ResNet31| 87.77% | rec_r31_robustscanner | [trained model](https://paddleocr.bj.bcebos.com/contribution/rec_r31_robustscanner.tar)|
|RFL|ResNetRFL| 88.63% | rec_resnet_rfl_att | [trained model](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl_att_train.tar) |
diff --git a/doc/doc_en/algorithm_rec_visionlan_en.md b/doc/doc_en/algorithm_rec_visionlan_en.md
index cf2293b3d..f67aa3c62 100644
--- a/doc/doc_en/algorithm_rec_visionlan_en.md
+++ b/doc/doc_en/algorithm_rec_visionlan_en.md
@@ -25,7 +25,7 @@ Using MJSynth and SynthText two text recognition datasets for training, and eval
|Model|Backbone|config|Acc|Download link|
| --- | --- | --- | --- | --- |
-|VisionLAN|ResNet45|[rec_r45_visionlan.yml](../../configs/rec/rec_r45_visionlan.yml)|90.3%|[预训练、训练模型](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar)|
+|VisionLAN|ResNet45|[rec_r45_visionlan.yml](../../configs/rec/rec_r45_visionlan.yml)|90.3%|[预训练、训练模型](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar)|
## 2. Environment
@@ -68,7 +68,7 @@ python3 tools/infer_rec.py -c configs/rec/rec_r45_visionlan.yml -o Global.infer_
### 4.1 Python Inference
-First, the model saved during the VisionLAN text recognition training process is converted into an inference model. ( [Model download link](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar)) ), you can use the following command to convert:
+First, the model saved during the VisionLAN text recognition training process is converted into an inference model. ( [Model download link](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar)) ), you can use the following command to convert:
```
python3 tools/export_model.py -c configs/rec/rec_r45_visionlan.yml -o Global.pretrained_model=./rec_r45_visionlan_train/best_accuracy Global.save_inference_dir=./inference/rec_r45_visionlan/
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index 511471c76..e8a3fedaf 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -107,7 +107,6 @@ class BaseRecLabelEncode(object):
self.beg_str = "sos"
self.end_str = "eos"
self.lower = lower
- self.use_default_dict = False
if character_dict_path is None:
logger = get_logger()
@@ -117,11 +116,8 @@ class BaseRecLabelEncode(object):
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str)
self.lower = True
- self.use_default_dict = True
else:
self.character_str = []
- if 'ppocr/utils/ic15_dict.txt' in character_dict_path:
- self.use_default_dict = True
with open(character_dict_path, "rb") as fin:
lines = fin.readlines()
for line in lines:
@@ -1404,9 +1400,6 @@ class VLLabelEncode(BaseRecLabelEncode):
**kwargs):
super(VLLabelEncode, self).__init__(
max_text_length, character_dict_path, use_space_char, lower)
- if self.use_default_dict:
- self.character = self.character[10:] + self.character[
- 1:10] + [self.character[0]]
self.dict = {}
for i, char in enumerate(self.character):
self.dict[char] = i
diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py
index 98753ef7a..2ec572e8d 100644
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -26,15 +26,11 @@ class BaseRecLabelDecode(object):
self.end_str = "eos"
self.reverse = False
self.character_str = []
- self.use_default_dict = False
if character_dict_path is None:
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str)
- self.use_default_dict = True
else:
- if 'ppocr/utils/ic15_dict.txt' in character_dict_path:
- self.use_default_dict = True
with open(character_dict_path, "rb") as fin:
lines = fin.readlines()
for line in lines:
@@ -810,9 +806,6 @@ class VLLabelDecode(BaseRecLabelDecode):
super(VLLabelDecode, self).__init__(character_dict_path, use_space_char)
self.max_text_length = kwargs.get('max_text_length', 25)
self.nclass = len(self.character) + 1
- if self.use_default_dict:
- self.character = self.character[10:] + self.character[
- 1:10] + [self.character[0]]
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
""" convert text-index into text-label. """
From 969f4c52fc6a8182713bd25e46e298a6c151d357 Mon Sep 17 00:00:00 2001
From: dorren
Date: Wed, 19 Oct 2022 18:04:02 +0800
Subject: [PATCH 20/20] update prepare.sh
---
test_tipc/prepare.sh | 2 --
1 file changed, 2 deletions(-)
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 9291ce8b9..23d312c86 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -288,8 +288,6 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
cd ./train_data && tar xf total_text_lite2.tar && ln -s total_text_lite2 total_text && cd ../
fi
if [ ${model_name} == "rec_d28_can" ]; then
- wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/contribution/can_train.tar --no-check-certificate
- cd ./pretrain_models/ && tar xf can_train.tar && cd ../
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate
cd ./train_data/ && tar xf CROHME_lite.tar && cd ../
fi