-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathreports.py
26 lines (20 loc) · 1.11 KB
/
reports.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from bs4 import BeautifulSoup
from django.utils.safestring import mark_safe
from lxml.html.clean import Cleaner
def process_html(html):
# We want to handle complete HTML documents and also fragments. We're going to extract the contents of the body
# at the end of this function, but it's easiest to normalize to complete documents because that's what the
# HTML-wrangling libraries we're using are most comfortable handling.
if "<html>" not in html:
html = f"<html><body>{html}</body></head>"
cleaned = Cleaner(page_structure=False, style=True, kill_tags=["head"]).clean_html(
html
)
soup = BeautifulSoup(cleaned, "html.parser")
# For small screens we want to allow side-scrolling for just a small number of elements. To enable this each one
# needs to be wrapped in a div that we can target for styling.
for tag in ["table", "pre"]:
for element in soup.find_all(tag):
element.wrap(soup.new_tag("div", attrs={"class": "overflow-wrapper"}))
body_content = "".join([str(element) for element in soup.body.contents])
return mark_safe(body_content)