diff --git a/src/epub_converter/html_epub_preprocessor.py b/src/epub_converter/html_epub_preprocessor.py index e2fe136..627d1f1 100644 --- a/src/epub_converter/html_epub_preprocessor.py +++ b/src/epub_converter/html_epub_preprocessor.py @@ -96,13 +96,11 @@ def _wrap_strings_with_p(chapter_tag): for node in chapter_tag: if isinstance(node, NavigableString): content = str(node) - content = re.sub(r"([\n\t\xa0])", " ", content) - # remove spaces at the beginning and at the end of the string: - content = content.strip() + content = re.sub(r"([\s\xa0])", " ", content).strip() if content: - tag = chapter_tag.new_tag("p") - tag.append(str(node)) - node.replace_with(tag) + p_tag = chapter_tag.new_tag("p") + p_tag.append(str(node)) + node.replace_with(p_tag) def _remove_headings_content(content_tag, title_of_chapter: str): @@ -146,6 +144,7 @@ def _tags_to_correspond_livecarta_tag(chapter_tag): for key in reg_key: tags = chapter_tag.find_all(re.compile(key)) for tag in tags: + # todo can cause appearance of \n
...
->\n
...
\n
(section) tag.name = to_replace_value def _unwrap_tags(chapter_tag): @@ -300,8 +299,6 @@ def _clean_wiley_block(block): h.insert_before(BeautifulSoup(features="lxml").new_tag("br")) - - def _preprocess_block_tags(chapter_tag: Tag): """Function preprocessing