From f1d0c8a2ff1a5790f78749d7cd489d8d8337b463 Mon Sep 17 00:00:00 2001
From: Leif <4603009@qq.com>
Date: Thu, 5 May 2022 22:37:11 +0800
Subject: [PATCH] Auto stash before merge of "table" and "origin/new"

---
 PPOCRLabel/PPOCRLabel.py | 19 +++++++++++++----
 PPOCRLabel/libs/utils.py | 44 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py
index 534e0cd88..f80cf8fd0 100644
--- a/PPOCRLabel/PPOCRLabel.py
+++ b/PPOCRLabel/PPOCRLabel.py
@@ -21,6 +21,7 @@ import os.path
 import platform
 import subprocess
 import sys
+import xlrd
 from functools import partial
 
 from PyQt5.QtCore import QSize, Qt, QPoint, QByteArray, QTimer, QFileInfo, QPointF, QProcess
@@ -2349,9 +2350,19 @@ class MainWindow(QMainWindow):
                 return
 
             # read xlsx file, convert to HTML
-            xd = pd.ExcelFile(csv_path)
-            df = xd.parse()
-            structure = df.to_html(index = False)
+            # xd = pd.ExcelFile(csv_path)
+            # df = xd.parse()
+            # structure = df.to_html(index = False)
+            excel = xlrd.open_workbook(csv_path)
+            sheet0 = excel.sheet_by_index(0)  # only sheet 0
+            merged_cells = sheet0.merged_cells # (0,1,1,3) start row, end row, start col, end col
+
+            html_list = [['td'] * sheet0.ncols for i in range(sheet0.nrows)]
+
+            for merged in merged_cells:
+                html_list = expand_list(merged, html_list)
+
+            token_list = convert_token(html_list)
 
             # load box annotations
             cells = []
@@ -2370,7 +2381,7 @@ class MainWindow(QMainWindow):
                 split = 'test'
 
             #  save dict
-            html = {'structure': {'tokens': structure}, 'cell': cells}
+            html = {'structure': {'tokens': token_list}, 'cell': cells}
             json_results.append({'filename': filename, 'split': split, 'imgid': imgid, 'html': html})
             imgid += 1
 
diff --git a/PPOCRLabel/libs/utils.py b/PPOCRLabel/libs/utils.py
index c49b50688..86a0336d4 100644
--- a/PPOCRLabel/libs/utils.py
+++ b/PPOCRLabel/libs/utils.py
@@ -188,6 +188,50 @@ def OBB2HBB(obb) -> np.array:
     return hbb
 
 
+def expand_list(merged, html_list):
+    '''
+    Fill blanks according to merged cells
+    '''
+    sr, er, sc, ec = merged
+    for i in range(sr, er):
+        for j in range(sc, ec):
+            html_list[i][j] = None
+    html_list[sr][sc] = ''
+    if ec - sc > 1:
+        html_list[sr][sc] += " colspan={}".format(ec - sc)
+    if er - sr > 1:
+        html_list[sr][sc] += " rowspan={}".format(er - sr)
+    return html_list
+
+
+def convert_token(html_list):
+    '''
+    Convert raw html to label format
+    '''
+    token_list = ["<tbody>"]
+    # final html list:
+    for row in html_list:
+        token_list.append("<tr>")
+        for col in row:
+            if col == None:
+                continue
+            elif col == 'td':
+                token_list.extend(["<td>", "</td>"])
+            else:
+                token_list.append("<td")
+                if 'colspan' in col:
+                    _, n = col.split('colspan=')
+                    token_list.append(" colspan=\"{}\"".format(n))
+                if 'rowspan' in col:
+                    _, n = col.split('rowspan=')
+                    token_list.append(" rowspan=\"{}\"".format(n))
+                token_list.append(">")
+        token_list.append("</tr>")
+    token_list.append("</tbody>")
+
+    return token_list
+
+
 def stepsInfo(lang='en'):
     if lang == 'ch':
         msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \