From 8eda9337eff63bbbb38775faa23e8e0ab6a7bc5d Mon Sep 17 00:00:00 2001 From: shirshasa Date: Fri, 9 Jul 2021 10:53:56 +0300 Subject: [PATCH] epub converter: update pre --- src/html_epub_preprocessor.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/html_epub_preprocessor.py b/src/html_epub_preprocessor.py index 3160c88..c7c0981 100644 --- a/src/html_epub_preprocessor.py +++ b/src/html_epub_preprocessor.py @@ -431,6 +431,14 @@ def preprocess_block_tags(chapter_tag): block.unwrap() +def _prepare_formatted(text): + text = text.replace("<", "\x3C;") + text = text.replace(">", "\x3E;") + text = text.replace('\t', "\xa0 \xa0 \xa0 ") + text = text.replace(' ', "\xa0 ") + return text + + def preprocess_pre_tags(chapter_tag): for pre in chapter_tag.find_all("pre"): new_tag = BeautifulSoup(features='lxml').new_tag("span") @@ -441,15 +449,13 @@ def preprocess_pre_tags(chapter_tag): for child in pre.children: if isinstance(child, NavigableString): text = pre.text - text = text.replace("<", "\x3C;") - text = text.replace(">", "\x3E;") - text = text.replace('\t', "\xa0 \xa0 \xa0 ") - text = text.replace(' ', "\xa0 ") + text = _prepare_formatted(text) elements = re.split('\r\n|\n|\r', text) for i in elements: new_tag.append(NavigableString(i)) new_tag.append(BeautifulSoup(features='lxml').new_tag('br')) else: + child.string = _prepare_formatted(child.text) new_tag.append(child.extract()) if to_add_br: new_tag.append(BeautifulSoup(features='lxml').new_tag('br'))