new table gt format
parent
70ad319acc
commit
839357facf
|
@ -2449,13 +2449,6 @@ class MainWindow(QMainWindow):
|
|||
export PPLabel and CSV to JSON (PubTabNet)
|
||||
'''
|
||||
import pandas as pd
|
||||
from libs.dataPartitionDialog import DataPartitionDialog
|
||||
|
||||
# data partition user input
|
||||
partitionDialog = DataPartitionDialog(parent=self)
|
||||
partitionDialog.exec()
|
||||
if partitionDialog.getStatus() == False:
|
||||
return
|
||||
|
||||
# automatically save annotations
|
||||
self.saveFilestate()
|
||||
|
@ -2478,28 +2471,19 @@ class MainWindow(QMainWindow):
|
|||
labeldict[file] = eval(label)
|
||||
else:
|
||||
labeldict[file] = []
|
||||
|
||||
# read table recognition output
|
||||
TableRec_excel_dir = os.path.join(
|
||||
self.lastOpenDir, 'tableRec_excel_output')
|
||||
|
||||
train_split, val_split, test_split = partitionDialog.getDataPartition()
|
||||
# check validate
|
||||
if train_split + val_split + test_split > 100:
|
||||
msg = "The sum of training, validation and testing data should be less than 100%"
|
||||
QMessageBox.information(self, "Information", msg)
|
||||
return
|
||||
print(train_split, val_split, test_split)
|
||||
train_split, val_split, test_split = float(train_split) / 100., float(val_split) / 100., float(test_split) / 100.
|
||||
train_id = int(len(labeldict) * train_split)
|
||||
val_id = int(len(labeldict) * (train_split + val_split))
|
||||
print('Data partition: train:', train_id,
|
||||
'validation:', val_id - train_id,
|
||||
'test:', len(labeldict) - val_id)
|
||||
|
||||
TableRec_excel_dir = os.path.join(self.lastOpenDir, 'tableRec_excel_output')
|
||||
json_results = []
|
||||
imgid = 0
|
||||
# save txt
|
||||
fid = open(
|
||||
"{}/gt.txt".format(self.lastOpenDir), "w", encoding='utf-8')
|
||||
for image_path in labeldict.keys():
|
||||
# load csv annotations
|
||||
filename, _ = os.path.splitext(os.path.basename(image_path))
|
||||
csv_path = os.path.join(TableRec_excel_dir, filename + '.xlsx')
|
||||
csv_path = os.path.join(
|
||||
TableRec_excel_dir, filename + '.xlsx')
|
||||
if not os.path.exists(csv_path):
|
||||
continue
|
||||
|
||||
|
@ -2518,28 +2502,31 @@ class MainWindow(QMainWindow):
|
|||
cells = []
|
||||
for anno in labeldict[image_path]:
|
||||
tokens = list(anno['transcription'])
|
||||
obb = anno['points']
|
||||
hbb = OBB2HBB(np.array(obb)).tolist()
|
||||
cells.append({'tokens': tokens, 'bbox': hbb})
|
||||
cells.append({
|
||||
'tokens': tokens,
|
||||
'bbox': anno['points']
|
||||
})
|
||||
|
||||
# data split
|
||||
if imgid < train_id:
|
||||
split = 'train'
|
||||
elif imgid < val_id:
|
||||
split = 'val'
|
||||
else:
|
||||
split = 'test'
|
||||
|
||||
# save dict
|
||||
html = {'structure': {'tokens': token_list}, 'cells': cells}
|
||||
json_results.append({'filename': os.path.basename(image_path), 'split': split, 'imgid': imgid, 'html': html})
|
||||
imgid += 1
|
||||
|
||||
# save json
|
||||
with open("{}/annotation.json".format(self.lastOpenDir), "w", encoding='utf-8') as fid:
|
||||
fid.write(json.dumps(json_results, ensure_ascii=False))
|
||||
|
||||
msg = 'JSON sucessfully saved in {}/annotation.json'.format(self.lastOpenDir)
|
||||
# 构造标注信息
|
||||
html = {
|
||||
'structure': {
|
||||
'tokens': token_list
|
||||
},
|
||||
'cells': cells
|
||||
}
|
||||
d = {
|
||||
'filename': os.path.basename(image_path),
|
||||
'html': html
|
||||
}
|
||||
d['gt'] = rebuild_html_from_ppstructure_label(d)
|
||||
# imgid += 1
|
||||
fid.write('{}\n'.format(
|
||||
json.dumps(
|
||||
d, ensure_ascii=False)))
|
||||
|
||||
# convert to PP-Structure label format
|
||||
fid.close()
|
||||
msg = 'JSON sucessfully saved in {}/gt.txt'.format(self.lastOpenDir)
|
||||
QMessageBox.information(self, "Information", msg)
|
||||
|
||||
def autolcm(self):
|
||||
|
|
|
@ -1,113 +0,0 @@
|
|||
try:
|
||||
from PyQt5.QtGui import *
|
||||
from PyQt5.QtCore import *
|
||||
from PyQt5.QtWidgets import *
|
||||
except ImportError:
|
||||
from PyQt4.QtGui import *
|
||||
from PyQt4.QtCore import *
|
||||
|
||||
from libs.utils import newIcon
|
||||
|
||||
import time
|
||||
import datetime
|
||||
import json
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
BB = QDialogButtonBox
|
||||
|
||||
class DataPartitionDialog(QDialog):
|
||||
def __init__(self, parent=None):
|
||||
super().__init__()
|
||||
self.parnet = parent
|
||||
self.title = 'DATA PARTITION'
|
||||
|
||||
self.train_ratio = 70
|
||||
self.val_ratio = 15
|
||||
self.test_ratio = 15
|
||||
|
||||
self.initUI()
|
||||
|
||||
def initUI(self):
|
||||
self.setWindowTitle(self.title)
|
||||
self.setWindowModality(Qt.ApplicationModal)
|
||||
|
||||
self.flag_accept = True
|
||||
|
||||
if self.parnet.lang == 'ch':
|
||||
msg = "导出JSON前请保存所有图像的标注且关闭EXCEL!"
|
||||
else:
|
||||
msg = "Please save all the annotations and close the EXCEL before exporting JSON!"
|
||||
|
||||
info_msg = QLabel(msg, self)
|
||||
info_msg.setWordWrap(True)
|
||||
info_msg.setStyleSheet("color: red")
|
||||
info_msg.setFont(QFont('Arial', 12))
|
||||
|
||||
train_lbl = QLabel('Train split: ', self)
|
||||
train_lbl.setFont(QFont('Arial', 15))
|
||||
val_lbl = QLabel('Valid split: ', self)
|
||||
val_lbl.setFont(QFont('Arial', 15))
|
||||
test_lbl = QLabel('Test split: ', self)
|
||||
test_lbl.setFont(QFont('Arial', 15))
|
||||
|
||||
self.train_input = QLineEdit(self)
|
||||
self.train_input.setFont(QFont('Arial', 15))
|
||||
self.val_input = QLineEdit(self)
|
||||
self.val_input.setFont(QFont('Arial', 15))
|
||||
self.test_input = QLineEdit(self)
|
||||
self.test_input.setFont(QFont('Arial', 15))
|
||||
|
||||
self.train_input.setText(str(self.train_ratio))
|
||||
self.val_input.setText(str(self.val_ratio))
|
||||
self.test_input.setText(str(self.test_ratio))
|
||||
|
||||
validator = QIntValidator(0, 100)
|
||||
self.train_input.setValidator(validator)
|
||||
self.val_input.setValidator(validator)
|
||||
self.test_input.setValidator(validator)
|
||||
|
||||
gridlayout = QGridLayout()
|
||||
gridlayout.addWidget(info_msg, 0, 0, 1, 2)
|
||||
gridlayout.addWidget(train_lbl, 1, 0)
|
||||
gridlayout.addWidget(val_lbl, 2, 0)
|
||||
gridlayout.addWidget(test_lbl, 3, 0)
|
||||
gridlayout.addWidget(self.train_input, 1, 1)
|
||||
gridlayout.addWidget(self.val_input, 2, 1)
|
||||
gridlayout.addWidget(self.test_input, 3, 1)
|
||||
|
||||
bb = BB(BB.Ok | BB.Cancel, Qt.Horizontal, self)
|
||||
bb.button(BB.Ok).setIcon(newIcon('done'))
|
||||
bb.button(BB.Cancel).setIcon(newIcon('undo'))
|
||||
bb.accepted.connect(self.validate)
|
||||
bb.rejected.connect(self.cancel)
|
||||
gridlayout.addWidget(bb, 4, 0, 1, 2)
|
||||
|
||||
self.setLayout(gridlayout)
|
||||
|
||||
self.show()
|
||||
|
||||
def validate(self):
|
||||
self.flag_accept = True
|
||||
self.accept()
|
||||
|
||||
def cancel(self):
|
||||
self.flag_accept = False
|
||||
self.reject()
|
||||
|
||||
def getStatus(self):
|
||||
return self.flag_accept
|
||||
|
||||
def getDataPartition(self):
|
||||
self.train_ratio = int(self.train_input.text())
|
||||
self.val_ratio = int(self.val_input.text())
|
||||
self.test_ratio = int(self.test_input.text())
|
||||
|
||||
return self.train_ratio, self.val_ratio, self.test_ratio
|
||||
|
||||
def closeEvent(self, event):
|
||||
self.flag_accept = False
|
||||
self.reject()
|
||||
|
||||
|
|
@ -176,18 +176,6 @@ def boxPad(box, imgShape, pad : int) -> np.array:
|
|||
return box
|
||||
|
||||
|
||||
def OBB2HBB(obb) -> np.array:
|
||||
"""
|
||||
Convert Oriented Bounding Box to Horizontal Bounding Box.
|
||||
"""
|
||||
hbb = np.zeros(4, dtype=np.int32)
|
||||
hbb[0] = min(obb[:, 0])
|
||||
hbb[1] = min(obb[:, 1])
|
||||
hbb[2] = max(obb[:, 0])
|
||||
hbb[3] = max(obb[:, 1])
|
||||
return hbb
|
||||
|
||||
|
||||
def expand_list(merged, html_list):
|
||||
'''
|
||||
Fill blanks according to merged cells
|
||||
|
@ -232,6 +220,26 @@ def convert_token(html_list):
|
|||
return token_list
|
||||
|
||||
|
||||
def rebuild_html_from_ppstructure_label(label_info):
|
||||
from html import escape
|
||||
html_code = label_info['html']['structure']['tokens'].copy()
|
||||
to_insert = [
|
||||
i for i, tag in enumerate(html_code) if tag in ('<td>', '>')
|
||||
]
|
||||
for i, cell in zip(to_insert[::-1], label_info['html']['cells'][::-1]):
|
||||
if cell['tokens']:
|
||||
cell = [
|
||||
escape(token) if len(token) == 1 else token
|
||||
for token in cell['tokens']
|
||||
]
|
||||
cell = ''.join(cell)
|
||||
html_code.insert(i + 1, cell)
|
||||
html_code = ''.join(html_code)
|
||||
html_code = '<html><body><table>{}</table></body></html>'.format(
|
||||
html_code)
|
||||
return html_code
|
||||
|
||||
|
||||
def stepsInfo(lang='en'):
|
||||
if lang == 'ch':
|
||||
msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \
|
||||
|
|
Loading…
Reference in New Issue