epub converter: <pre>, <code> fix

This commit is contained in:
shirshasa
2021-07-05 14:39:52 +03:00
parent 532fa42622
commit 14088ccc3a

View File

@@ -382,7 +382,7 @@ def get_tags_between_chapter_marks(first_id, href, html_soup):
return tags
def wrap_text_with_table(main_tag, text, old_tag):
def wrap_span_with_table(main_tag, old_tag):
table = main_tag.new_tag("table")
table.attrs['border'] = '0'
table.attrs['style'] = 'width:100%;'
@@ -391,8 +391,7 @@ def wrap_text_with_table(main_tag, text, old_tag):
td = main_tag.new_tag("td")
td.attrs['style'] = 'font-family: courier new,courier,monospace;'
td.attrs['bgcolor'] = '#f5f5f5'
td.insert(0, str(text))
old_tag.replace_with(td)
old_tag.wrap(td)
td.wrap(tr)
tr.wrap(tbody)
tbody.wrap(table)
@@ -401,20 +400,17 @@ def wrap_text_with_table(main_tag, text, old_tag):
def preprocess_pre_tags(chapter_tag):
for pre in chapter_tag.find_all("pre"):
if not pre.children:
assert 1, 'Pre tag has other tags.'
else:
wrap_text_with_table(chapter_tag, escape(pre.text), pre)
for child in pre.children:
if isinstance(child, NavigableString):
child.text = escape(pre.text)
pre.name = 'span'
wrap_span_with_table(chapter_tag, pre)
def preprocess_code_tags(chapter_tag):
for code in chapter_tag.find_all("code"):
if not code.children:
assert 1, 'Code tag has other tags.'
else:
code.string = escape(code.text)
code.name = 'span'
code.attrs['style'] = 'color:#c7254e; font-family: courier new,courier,monospace;'
code.name = 'span'
code.attrs['style'] = 'color:#c7254e; font-family: courier new,courier,monospace;'
def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_from_chapter) -> Tuple[str, str]:
@@ -435,8 +431,8 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
clean_headings_content(chapter_tag, title_str)
_process_lists(chapter_tag)
preprocess_table(chapter_tag)
preprocess_pre_tags(chapter_tag)
preprocess_code_tags(chapter_tag)
preprocess_pre_tags(chapter_tag)
# 2. class removal
for tag in chapter_tag.find_all(recursive=True):
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor']):