Strip whitespace from markup

This commit is contained in:
stephenmk 2023-04-10 19:09:01 -05:00
parent 127708b1a6
commit f7cfc7915b
No known key found for this signature in database
GPG key ID: B6DA730DB06235F1

View file

@ -14,7 +14,7 @@ def __get_markup_structure(soup):
content = [] content = []
for child in soup.children: for child in soup.children:
if child.name is None: if child.name is None:
text = child.text.replace("\n", "") text = __clean(child.text)
if text != "": if text != "":
content.append(text) content.append(text)
else: else:
@ -34,6 +34,12 @@ def __get_markup_structure(soup):
return node return node
def __clean(text):
text = text.replace("/", "")
text = text.strip()
return text
def __get_attributes(attrs): def __get_attributes(attrs):
attributes = {} attributes = {}
if "href" in attrs: if "href" in attrs: