From d81fc18403c7e998c0945bf5a01f5e9acaba05b4 Mon Sep 17 00:00:00 2001 From: Kibzik Date: Sat, 13 May 2023 16:23:44 +0300 Subject: [PATCH] LAW-6695|Change process of removing heading content --- src/book_solver.py | 2 +- src/epub_converter/html_epub_processor.py | 9 +++++++-- src/html_presets_processor.py | 19 +++++-------------- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/src/book_solver.py b/src/book_solver.py index 9ab9caf..7ca8b28 100644 --- a/src/book_solver.py +++ b/src/book_solver.py @@ -82,7 +82,7 @@ class BookSolver: file_path=f"{self.access.url}/doc-convert/{self.book_id}/presets") self.book_logger.log("\033[4mPreset\033[0m file was received from server.") self.preset_path = pathlib.Path( - str(self.save_file(content, path_to_save="preset", file_type="json"))) + str(self.save_file(content, path_to_save="preset/", file_type="json"))) except FileNotFoundError as f_err: self.book_logger.log( "Can't get preset file from server.", logging.ERROR) diff --git a/src/epub_converter/html_epub_processor.py b/src/epub_converter/html_epub_processor.py index 76604fa..4a484cd 100644 --- a/src/epub_converter/html_epub_processor.py +++ b/src/epub_converter/html_epub_processor.py @@ -100,6 +100,11 @@ class HtmlEpubProcessor: text = text.strip() # delete extra spaces return text + def remove_text(found_tag: Tag): + for text_node in found_tag.find_all(text=True): + if text_node.strip(): + text_node.extract() + title_of_chapter: str = title_of_chapter.lower() title_in_text: List[Tag] = chapter_tag.find_all(lambda tag: (title_of_chapter in text_preparing(tag) and len(text_preparing(tag)) != 0 and @@ -110,10 +115,10 @@ class HtmlEpubProcessor: re.findall(r"^h[1-5]$", tag.name or chapter_tag.name)) if title_in_text: self.html_presets_processor.add_span_to_save_ids_for_links(title_in_text[-1], chapter_tag) - title_in_text[-1].extract() + remove_text(title_in_text[-1]) elif text_in_title: [self.html_presets_processor.add_span_to_save_ids_for_links(tag, chapter_tag) for tag in text_in_title] - [tag.extract() for tag in text_in_title] + [remove_text(tag) for tag in text_in_title] @staticmethod def _class_removing(chapter_tag: BeautifulSoup): diff --git a/src/html_presets_processor.py b/src/html_presets_processor.py index 962f9c9..e0ed486 100644 --- a/src/html_presets_processor.py +++ b/src/html_presets_processor.py @@ -116,21 +116,11 @@ class HtmlPresetsProcessor: new_tag.string = "\xa0" tag_to_be_removed.insert_before(new_tag) - def has_child_with_id(tag): - """ - Check if any child tag has an 'id' attribute. - """ - for child in tag.children: - if child.has_attr('id'): - return child - return False - - child_with_id = has_child_with_id(tag_to_be_removed) - if tag_to_be_removed.attrs.get("id") or child_with_id: + if tag_to_be_removed.attrs.get("id"): _insert_span_with_attrs_before_tag(chapter_tag=chapter_tag, tag_to_be_removed=tag_to_be_removed, - id_=tag_to_be_removed.attrs["id"] if tag_to_be_removed.attrs.get("id") else child_with_id.attrs["id"], - class_=tag_to_be_removed.attrs["class"] if tag_to_be_removed.attrs.get("id") else child_with_id.attrs.get("class")) + id_=tag_to_be_removed.attrs["id"], + class_=tag_to_be_removed.attrs["class"]) def _process_tag_using_table(self, **kwargs): def _wrap_tag_with_table(width: str = "100", border: str = "", bg_color: str = None) -> Tag: @@ -270,7 +260,8 @@ class HtmlPresetsProcessor: @staticmethod def _replace_text(**kwargs): - found_tag_text = kwargs["found_tag"].string or kwargs["found_tag"].text + found_tag_text = kwargs["found_tag"].string if kwargs["found_tag"].string is not None\ + else kwargs["found_tag"].text if found_tag_text is not None and re.search(re.compile(kwargs["rule"]["condition"]["text"]), found_tag_text): new_text = re.sub(re.compile( kwargs["rule"]["condition"]["text"]), kwargs["rule"]["text_to_replace"], found_tag_text)