Skip to content

Commit

Permalink
Merge pull request #855 from myhloli/add-structeqtable
Browse files Browse the repository at this point in the history
feat(model): add HTML minification to StructTableModel
  • Loading branch information
myhloli authored Nov 4, 2024
2 parents dc31c97 + b5117e7 commit 2c75a37
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions magic_pdf/model/pek_sub_modules/structeqtable/StructTableModel.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

import torch
from struct_eqtable import build_model

Expand Down Expand Up @@ -28,4 +30,16 @@ def predict(self, images, output_format=None, **kwargs):
images, output_format=output_format
)

if output_format == "html":
results = [self.minify_html(html) for html in results]

return results

def minify_html(self, html):
# 移除多余的空白字符
html = re.sub(r'\s+', ' ', html)
# 移除行尾的空白字符
html = re.sub(r'\s*>\s*', '>', html)
# 移除标签前的空白字符
html = re.sub(r'\s*<\s*', '<', html)
return html.strip()

0 comments on commit 2c75a37

Please sign in to comment.