From c27402bf1f012aeb54a98eb12ba98883eded502e Mon Sep 17 00:00:00 2001 From: santlchogva <2373413239@qq.com> Date: Tue, 2 Jan 2024 10:55:36 +0800 Subject: [PATCH] fix:layout recovery image:xxx.png,err msg: list index out of range (#11405) --- ppstructure/recovery/table_process.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ppstructure/recovery/table_process.py b/ppstructure/recovery/table_process.py index 77a6ef765..1c9e2e10c 100644 --- a/ppstructure/recovery/table_process.py +++ b/ppstructure/recovery/table_process.py @@ -253,18 +253,18 @@ class HtmlToDocx(HTMLParser): cols = get_table_columns(row) cell_col = 0 for col in cols: + if cell_col >= cols_len: + break + colspan = int(col.attrs.get('colspan', 1)) rowspan = int(col.attrs.get('rowspan', 1)) - cell_html = get_cell_html(col) if col.name == 'th': cell_html = "%s" % cell_html docx_cell = table.cell(cell_row, cell_col) - - while docx_cell.text != '': # Skip the merged cell - cell_col += 1 - docx_cell = table.cell(cell_row, cell_col) + if (cell_col + colspan -1) >= cols_len: + colspan -= 1 cell_to_merge = table.cell(cell_row + rowspan - 1, cell_col + colspan - 1)