forked from LiveCarta/BookConverter
Update livecarta_config.py with processing changes
This commit is contained in:
@@ -198,7 +198,7 @@ def _remove_headings_content(content_tag, title_of_chapter: str):
|
|||||||
text = tag if isinstance(tag, NavigableString) else tag.text
|
text = tag if isinstance(tag, NavigableString) else tag.text
|
||||||
if re.sub(r"[\s\xa0]", "", text):
|
if re.sub(r"[\s\xa0]", "", text):
|
||||||
text = re.sub(r"[\s\xa0]", " ", text).lower()
|
text = re.sub(r"[\s\xa0]", " ", text).lower()
|
||||||
text = text.strip() # delete extra spaces
|
text = text.strip() # delete extra spaces
|
||||||
if title_of_chapter == text or \
|
if title_of_chapter == text or \
|
||||||
(title_of_chapter in text and
|
(title_of_chapter in text and
|
||||||
re.findall(r"^h[1-3]$", tag.name or content_tag.name)):
|
re.findall(r"^h[1-3]$", tag.name or content_tag.name)):
|
||||||
|
|||||||
@@ -120,13 +120,20 @@ class LiveCartaConfig:
|
|||||||
("section", "blockquote",) : ("class", r"feature[1234]"),
|
("section", "blockquote",) : ("class", r"feature[1234]"),
|
||||||
}
|
}
|
||||||
|
|
||||||
REPLACE_REGEX_WITH_LIVECARTA_CORRESPOND_TAGS = {
|
"""('what to replace', 'parent tag', 'child tag')"""
|
||||||
|
REPLACE_TAG_WITH_LIVECARTA_CORRESPOND_TAGS = {
|
||||||
(r"^h[6-9]$", "^figure$", "^section$", "^div$"): "p",
|
(r"^h[6-9]$", "^figure$", "^section$", "^div$"): "p",
|
||||||
("^aside$",): "blockquote",
|
("^aside$",): "blockquote",
|
||||||
("^header$", "^footer$"): "span",
|
("^header$", "^footer$", ("child", ":not(pre)", "code, kbd, var")): "span",
|
||||||
("^b$",): "strong",
|
("^b$",): "strong",
|
||||||
|
# (("parent", ":not(pre)", "code")): "p",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
""" > == in (p in li)"""
|
||||||
TAGS_TO_UNWRAP = [
|
TAGS_TO_UNWRAP = [
|
||||||
"section", "article", "figcaption", "main", "body", "html",
|
"section", "article", "figcaption", "main", "body", "html", "li > p",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
INSERT_TAG_IN_PARENT_TAG = {
|
||||||
|
("pre", "code, kbd, var"): "code",
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user