forked from LiveCarta/BookConverter
Update livecarta_config.py with processing changes
This commit is contained in:
@@ -198,7 +198,7 @@ def _remove_headings_content(content_tag, title_of_chapter: str):
|
||||
text = tag if isinstance(tag, NavigableString) else tag.text
|
||||
if re.sub(r"[\s\xa0]", "", text):
|
||||
text = re.sub(r"[\s\xa0]", " ", text).lower()
|
||||
text = text.strip() # delete extra spaces
|
||||
text = text.strip() # delete extra spaces
|
||||
if title_of_chapter == text or \
|
||||
(title_of_chapter in text and
|
||||
re.findall(r"^h[1-3]$", tag.name or content_tag.name)):
|
||||
|
||||
@@ -120,13 +120,20 @@ class LiveCartaConfig:
|
||||
("section", "blockquote",) : ("class", r"feature[1234]"),
|
||||
}
|
||||
|
||||
REPLACE_REGEX_WITH_LIVECARTA_CORRESPOND_TAGS = {
|
||||
"""('what to replace', 'parent tag', 'child tag')"""
|
||||
REPLACE_TAG_WITH_LIVECARTA_CORRESPOND_TAGS = {
|
||||
(r"^h[6-9]$", "^figure$", "^section$", "^div$"): "p",
|
||||
("^aside$",): "blockquote",
|
||||
("^header$", "^footer$"): "span",
|
||||
("^header$", "^footer$", ("child", ":not(pre)", "code, kbd, var")): "span",
|
||||
("^b$",): "strong",
|
||||
# (("parent", ":not(pre)", "code")): "p",
|
||||
}
|
||||
|
||||
""" > == in (p in li)"""
|
||||
TAGS_TO_UNWRAP = [
|
||||
"section", "article", "figcaption", "main", "body", "html",
|
||||
"section", "article", "figcaption", "main", "body", "html", "li > p",
|
||||
]
|
||||
|
||||
INSERT_TAG_IN_PARENT_TAG = {
|
||||
("pre", "code, kbd, var"): "code",
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user