diff --git a/src/epub_converter/html_epub_preprocessor.py b/src/epub_converter/html_epub_preprocessor.py index 450d776..f9c2c06 100644 --- a/src/epub_converter/html_epub_preprocessor.py +++ b/src/epub_converter/html_epub_preprocessor.py @@ -198,7 +198,7 @@ def _remove_headings_content(content_tag, title_of_chapter: str): text = tag if isinstance(tag, NavigableString) else tag.text if re.sub(r"[\s\xa0]", "", text): text = re.sub(r"[\s\xa0]", " ", text).lower() - text = text.strip() # delete extra spaces + text = text.strip() # delete extra spaces if title_of_chapter == text or \ (title_of_chapter in text and re.findall(r"^h[1-3]$", tag.name or content_tag.name)): diff --git a/src/livecarta_config.py b/src/livecarta_config.py index a81ffca..9929cda 100644 --- a/src/livecarta_config.py +++ b/src/livecarta_config.py @@ -120,13 +120,20 @@ class LiveCartaConfig: ("section", "blockquote",) : ("class", r"feature[1234]"), } - REPLACE_REGEX_WITH_LIVECARTA_CORRESPOND_TAGS = { + """('what to replace', 'parent tag', 'child tag')""" + REPLACE_TAG_WITH_LIVECARTA_CORRESPOND_TAGS = { (r"^h[6-9]$", "^figure$", "^section$", "^div$"): "p", ("^aside$",): "blockquote", - ("^header$", "^footer$"): "span", + ("^header$", "^footer$", ("child", ":not(pre)", "code, kbd, var")): "span", ("^b$",): "strong", + # (("parent", ":not(pre)", "code")): "p", } + """ > == in (p in li)""" TAGS_TO_UNWRAP = [ - "section", "article", "figcaption", "main", "body", "html", + "section", "article", "figcaption", "main", "body", "html", "li > p", ] + + INSERT_TAG_IN_PARENT_TAG = { + ("pre", "code, kbd, var"): "code", + }