From 43bd2ad6423fd153e5bd33af0e82abfb0bb7a3a5 Mon Sep 17 00:00:00 2001 From: Wang Xin Date: Sat, 6 Jul 2024 17:29:59 +0800 Subject: [PATCH] fix: table recognition content is not escaped properly (#13277) --- ppstructure/table/matcher.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ppstructure/table/matcher.py b/ppstructure/table/matcher.py index 51e6250f4..ae32b4b15 100755 --- a/ppstructure/table/matcher.py +++ b/ppstructure/table/matcher.py @@ -14,6 +14,7 @@ import numpy as np from ppstructure.table.table_master_match import deal_eb_token, deal_bb +import html def distance(box_1, box_2): @@ -133,6 +134,8 @@ class TableMatch: and " " != content[-1] ): content += " " + # escape content + content = html.escape(content) end_html.extend(content) if b_with: end_html.extend("")