110 lines
3.3 KiB
Python
110 lines
3.3 KiB
Python
|
import json
|
||
|
import os
|
||
|
import shutil
|
||
|
import uuid
|
||
|
from pathlib import Path
|
||
|
from css_parser import parseStyle
|
||
|
|
||
|
|
||
|
def create_zip(terms, index, tags=[]):
|
||
|
build_directory = str(uuid.uuid4())
|
||
|
os.mkdir(build_directory)
|
||
|
|
||
|
terms_per_file = 500
|
||
|
max_i = int(len(terms) / terms_per_file) + 1
|
||
|
for i in range(max_i):
|
||
|
term_file = os.path.join(build_directory, f"term_bank_{i+1}.json")
|
||
|
with open(term_file, "w", encoding='utf8') as f:
|
||
|
start = terms_per_file * i
|
||
|
end = terms_per_file * (i + 1)
|
||
|
json.dump(terms[start:end], f, indent=4, ensure_ascii=False)
|
||
|
|
||
|
index_file = os.path.join(build_directory, "index.json")
|
||
|
with open(index_file, 'w', encoding='utf8') as f:
|
||
|
json.dump(index, f, indent=4, ensure_ascii=False)
|
||
|
|
||
|
if len(tags) > 0:
|
||
|
tag_file = os.path.join(build_directory, "tag_bank_1.json")
|
||
|
with open(tag_file, 'w', encoding='utf8') as f:
|
||
|
json.dump(tags, f, indent=4, ensure_ascii=False)
|
||
|
|
||
|
zip_filename = index["title"]
|
||
|
zip_file = f"{zip_filename}.zip"
|
||
|
if Path(zip_file).is_file():
|
||
|
os.remove(zip_file)
|
||
|
shutil.make_archive(zip_filename, "zip", build_directory)
|
||
|
if not Path("output").is_dir():
|
||
|
os.mkdir("output")
|
||
|
shutil.move(zip_file, "output")
|
||
|
shutil.rmtree(build_directory)
|
||
|
|
||
|
|
||
|
def soup_to_gloss(soup):
|
||
|
structured_content = __get_markup_structure(soup)
|
||
|
return {
|
||
|
"type": "structured-content",
|
||
|
"content": structured_content
|
||
|
}
|
||
|
|
||
|
|
||
|
def __get_markup_structure(soup):
|
||
|
node = {}
|
||
|
content = []
|
||
|
for child in soup.children:
|
||
|
if child.name is None:
|
||
|
text = child.text.strip()
|
||
|
if text != "":
|
||
|
content.append(text)
|
||
|
else:
|
||
|
content.append(__get_markup_structure(child))
|
||
|
|
||
|
node["tag"] = soup.name
|
||
|
attributes = __get_attributes(soup.attrs)
|
||
|
for key, val in attributes.items():
|
||
|
node[key] = val
|
||
|
|
||
|
if node["tag"] == "th":
|
||
|
node["style"] = {"verticalAlign": "middle", "textAlign": "center"}
|
||
|
elif node["tag"] == "p":
|
||
|
node["tag"] = "span"
|
||
|
|
||
|
if len(content) == 0:
|
||
|
pass
|
||
|
elif len(content) == 1:
|
||
|
node["content"] = content[0]
|
||
|
else:
|
||
|
node["content"] = content
|
||
|
|
||
|
return node
|
||
|
|
||
|
|
||
|
def __get_attributes(attrs):
|
||
|
attributes = {}
|
||
|
if "href" in attrs:
|
||
|
attributes["href"] = attrs["href"]
|
||
|
if "rowspan" in attrs:
|
||
|
attributes["rowSpan"] = int(attrs["rowspan"])
|
||
|
if "colspan" in attrs:
|
||
|
attributes["colSpan"] = int(attrs["colspan"])
|
||
|
if "style" in attrs:
|
||
|
attributes["style"] = __get_style(attrs["style"])
|
||
|
return attributes
|
||
|
|
||
|
|
||
|
def __get_style(inline_style_string):
|
||
|
style = {}
|
||
|
parsedStyle = parseStyle(inline_style_string)
|
||
|
if parsedStyle.fontSize != "":
|
||
|
style["fontSize"] = parsedStyle.fontSize
|
||
|
if parsedStyle.verticalAlign != "":
|
||
|
style["verticalAlign"] = parsedStyle.verticalAlign
|
||
|
if parsedStyle.textDecoration != "":
|
||
|
style["textDecorationLine"] = parsedStyle.textDecoration
|
||
|
if parsedStyle.listStyleType != "":
|
||
|
style["listStyleType"] = parsedStyle.listStyleType
|
||
|
if parsedStyle.fontStyle != "":
|
||
|
style["fontStyle"] = parsedStyle.fontStyle
|
||
|
if parsedStyle.fontWeight != "":
|
||
|
style["fontWeight"] = parsedStyle.fontWeight
|
||
|
return style
|