forked from LiveCarta/BookConverter
Improve remove headings content
This commit is contained in:
@@ -190,14 +190,32 @@ def _remove_headings_content(content_tag, title_of_chapter: str):
|
|||||||
title_of_chapter = title_of_chapter.lower()
|
title_of_chapter = title_of_chapter.lower()
|
||||||
for tag in content_tag.contents:
|
for tag in content_tag.contents:
|
||||||
text = tag if isinstance(tag, NavigableString) else tag.text
|
text = tag if isinstance(tag, NavigableString) else tag.text
|
||||||
if text:
|
if re.sub(r'([\s\xa0])', '', text):
|
||||||
text = re.sub(r"[\s\xa0]", " ", text).lower()
|
text = re.sub(r"[\s\xa0]", " ", text).lower()
|
||||||
text = text.strip() # delete extra spaces
|
text = text.strip() # delete extra spaces
|
||||||
if title_of_chapter == text or \
|
if title_of_chapter == text or \
|
||||||
(title_of_chapter in text and re.findall(r"^h[1-3]$", tag.name)):
|
(title_of_chapter in text and re.findall(r"^h[1-3]$", tag.name)):
|
||||||
_add_span_to_save_ids_for_links(tag, content_tag)
|
_add_span_to_save_ids_for_links(tag, content_tag)
|
||||||
tag.extract()
|
tag.extract()
|
||||||
break
|
elif not isinstance(tag, NavigableString):
|
||||||
|
_remove_headings_content(tag, title_of_chapter)
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
def _tags_to_correspond_livecarta_tag(chapter_tag):
|
||||||
|
"""Function to replace all tags to correspond livecarta tags"""
|
||||||
|
for reg_key, to_replace_value in LiveCartaConfig.REPLACE_REGEX_WITH_LIVECARTA_CORRESPOND_TAGS.items():
|
||||||
|
for key in reg_key:
|
||||||
|
tags = chapter_tag.find_all(re.compile(key))
|
||||||
|
for tag in tags:
|
||||||
|
tag.name = to_replace_value
|
||||||
|
|
||||||
|
def _unwrap_tags(chapter_tag):
|
||||||
|
"""Function unwrap tags and move id to span"""
|
||||||
|
for tag in LiveCartaConfig. TAGS_TO_UNWRAP:
|
||||||
|
for s in chapter_tag.find_all(tag):
|
||||||
|
_add_span_to_save_ids_for_links(s, chapter_tag)
|
||||||
|
s.unwrap()
|
||||||
|
|
||||||
|
|
||||||
# todo remove
|
# todo remove
|
||||||
|
|||||||
Reference in New Issue
Block a user