Strip whitespace from markup
This commit is contained in:
parent
127708b1a6
commit
f7cfc7915b
|
@ -14,7 +14,7 @@ def __get_markup_structure(soup):
|
||||||
content = []
|
content = []
|
||||||
for child in soup.children:
|
for child in soup.children:
|
||||||
if child.name is None:
|
if child.name is None:
|
||||||
text = child.text.replace("\n", "")
|
text = __clean(child.text)
|
||||||
if text != "":
|
if text != "":
|
||||||
content.append(text)
|
content.append(text)
|
||||||
else:
|
else:
|
||||||
|
@ -34,6 +34,12 @@ def __get_markup_structure(soup):
|
||||||
return node
|
return node
|
||||||
|
|
||||||
|
|
||||||
|
def __clean(text):
|
||||||
|
text = text.replace("/", "/")
|
||||||
|
text = text.strip()
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
def __get_attributes(attrs):
|
def __get_attributes(attrs):
|
||||||
attributes = {}
|
attributes = {}
|
||||||
if "href" in attrs:
|
if "href" in attrs:
|
||||||
|
|
Loading…
Reference in a new issue