diff --git a/src/html_epub_preprocessor.py b/src/html_epub_preprocessor.py index 85d5900..06878de 100644 --- a/src/html_epub_preprocessor.py +++ b/src/html_epub_preprocessor.py @@ -382,7 +382,7 @@ def get_tags_between_chapter_marks(first_id, href, html_soup): return tags -def wrap_text_with_table(main_tag, text, old_tag): +def wrap_span_with_table(main_tag, old_tag): table = main_tag.new_tag("table") table.attrs['border'] = '0' table.attrs['style'] = 'width:100%;' @@ -391,8 +391,7 @@ def wrap_text_with_table(main_tag, text, old_tag): td = main_tag.new_tag("td") td.attrs['style'] = 'font-family: courier new,courier,monospace;' td.attrs['bgcolor'] = '#f5f5f5' - td.insert(0, str(text)) - old_tag.replace_with(td) + old_tag.wrap(td) td.wrap(tr) tr.wrap(tbody) tbody.wrap(table) @@ -401,20 +400,17 @@ def wrap_text_with_table(main_tag, text, old_tag): def preprocess_pre_tags(chapter_tag): for pre in chapter_tag.find_all("pre"): - if not pre.children: - assert 1, 'Pre tag has other tags.' - else: - wrap_text_with_table(chapter_tag, escape(pre.text), pre) + for child in pre.children: + if isinstance(child, NavigableString): + child.text = escape(pre.text) + pre.name = 'span' + wrap_span_with_table(chapter_tag, pre) def preprocess_code_tags(chapter_tag): for code in chapter_tag.find_all("code"): - if not code.children: - assert 1, 'Code tag has other tags.' - else: - code.string = escape(code.text) - code.name = 'span' - code.attrs['style'] = 'color:#c7254e; font-family: courier new,courier,monospace;' + code.name = 'span' + code.attrs['style'] = 'color:#c7254e; font-family: courier new,courier,monospace;' def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_from_chapter) -> Tuple[str, str]: @@ -435,8 +431,8 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr clean_headings_content(chapter_tag, title_str) _process_lists(chapter_tag) preprocess_table(chapter_tag) - preprocess_pre_tags(chapter_tag) preprocess_code_tags(chapter_tag) + preprocess_pre_tags(chapter_tag) # 2. class removal for tag in chapter_tag.find_all(recursive=True): if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor']):