From 536c1d23e438047d7dd69b6ec132a78e7543ec1f Mon Sep 17 00:00:00 2001 From: Kiryl Date: Thu, 23 Jun 2022 17:43:51 +0300 Subject: [PATCH] =?UTF-8?q?=D0=A1ut=20preparing=20title=20function?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/epub_converter/html_epub_preprocessor.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/epub_converter/html_epub_preprocessor.py b/src/epub_converter/html_epub_preprocessor.py index 793247c..18f8902 100644 --- a/src/epub_converter/html_epub_preprocessor.py +++ b/src/epub_converter/html_epub_preprocessor.py @@ -79,10 +79,8 @@ def get_tags_between_chapter_marks(first_id: str, href: str, html_soup: Beautifu def prepare_title(title_of_chapter: str) -> str: """Function finalise processing/cleaning title""" title_str = BeautifulSoup(title_of_chapter, features="lxml").string - title_str = re.sub(r"([\n\t\xa0])", " ", title_str) - title_str = re.sub(r" +", " ", title_str).rstrip() - # clean whitespace characters ([\r\n\t\f\v ]) - title_str = re.sub(r"(^\s+)|(\s+$)", "", title_str) + # clean extra whitespace characters ([\r\n\t\f\v ]) + title_str = re.sub(r"[\s\xa0]", " ", title_str).strip() return title_str