diff --git a/src/epub_converter/html_epub_preprocessor.py b/src/epub_converter/html_epub_preprocessor.py index 18f8902..e2fe136 100644 --- a/src/epub_converter/html_epub_preprocessor.py +++ b/src/epub_converter/html_epub_preprocessor.py @@ -126,7 +126,7 @@ def _remove_headings_content(content_tag, title_of_chapter: str): title_of_chapter = title_of_chapter.lower() for tag in content_tag.contents: text = tag if isinstance(tag, NavigableString) else tag.text - if re.sub(r'([\s\xa0])', '', text): + if re.sub(r"[\s\xa0]", "", text): text = re.sub(r"[\s\xa0]", " ", text).lower() text = text.strip() # delete extra spaces if title_of_chapter == text or \ @@ -134,9 +134,10 @@ def _remove_headings_content(content_tag, title_of_chapter: str): re.findall(r"^h[1-3]$", tag.name or content_tag.name)): _add_span_to_save_ids_for_links(tag, content_tag) tag.extract() + return elif not isinstance(tag, NavigableString): - _remove_headings_content(tag, title_of_chapter) - break + if not _remove_headings_content(tag, title_of_chapter): + break def _tags_to_correspond_livecarta_tag(chapter_tag): @@ -275,13 +276,13 @@ def _preprocess_div_tags(chapter_tag): Function replace
with : """ for div in chapter_tag.find_all("div"): - if any(attr in ['width', 'border', 'bgcolor'] for attr in div.attrs): + if any(attr in ["width", "border", "bgcolor"] for attr in div.attrs): _wrap_tag_with_table( chapter_tag, tag_to_be_wrapped=div, - width=div.attrs['width'] if div.attrs.get('width') else '100', - border=div.attrs['border'] if div.attrs.get('border') else None, - bg_color=div.attrs['bgcolor'] if div.attrs.get('bgcolor') else None) + width=div.attrs["width"] if div.attrs.get("width") else "100", + border=div.attrs["border"] if div.attrs.get("border") else None, + bg_color=div.attrs["bgcolor"] if div.attrs.get("bgcolor") else None) else: div.name = "p" continue