forked from LiveCarta/BookConverter
LAW-6695|Change process of removing heading content
This commit is contained in:
@@ -82,7 +82,7 @@ class BookSolver:
|
|||||||
file_path=f"{self.access.url}/doc-convert/{self.book_id}/presets")
|
file_path=f"{self.access.url}/doc-convert/{self.book_id}/presets")
|
||||||
self.book_logger.log("\033[4mPreset\033[0m file was received from server.")
|
self.book_logger.log("\033[4mPreset\033[0m file was received from server.")
|
||||||
self.preset_path = pathlib.Path(
|
self.preset_path = pathlib.Path(
|
||||||
str(self.save_file(content, path_to_save="preset", file_type="json")))
|
str(self.save_file(content, path_to_save="preset/", file_type="json")))
|
||||||
except FileNotFoundError as f_err:
|
except FileNotFoundError as f_err:
|
||||||
self.book_logger.log(
|
self.book_logger.log(
|
||||||
"Can't get preset file from server.", logging.ERROR)
|
"Can't get preset file from server.", logging.ERROR)
|
||||||
|
|||||||
@@ -100,6 +100,11 @@ class HtmlEpubProcessor:
|
|||||||
text = text.strip() # delete extra spaces
|
text = text.strip() # delete extra spaces
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
def remove_text(found_tag: Tag):
|
||||||
|
for text_node in found_tag.find_all(text=True):
|
||||||
|
if text_node.strip():
|
||||||
|
text_node.extract()
|
||||||
|
|
||||||
title_of_chapter: str = title_of_chapter.lower()
|
title_of_chapter: str = title_of_chapter.lower()
|
||||||
title_in_text: List[Tag] = chapter_tag.find_all(lambda tag: (title_of_chapter in text_preparing(tag) and
|
title_in_text: List[Tag] = chapter_tag.find_all(lambda tag: (title_of_chapter in text_preparing(tag) and
|
||||||
len(text_preparing(tag)) != 0 and
|
len(text_preparing(tag)) != 0 and
|
||||||
@@ -110,10 +115,10 @@ class HtmlEpubProcessor:
|
|||||||
re.findall(r"^h[1-5]$", tag.name or chapter_tag.name))
|
re.findall(r"^h[1-5]$", tag.name or chapter_tag.name))
|
||||||
if title_in_text:
|
if title_in_text:
|
||||||
self.html_presets_processor.add_span_to_save_ids_for_links(title_in_text[-1], chapter_tag)
|
self.html_presets_processor.add_span_to_save_ids_for_links(title_in_text[-1], chapter_tag)
|
||||||
title_in_text[-1].extract()
|
remove_text(title_in_text[-1])
|
||||||
elif text_in_title:
|
elif text_in_title:
|
||||||
[self.html_presets_processor.add_span_to_save_ids_for_links(tag, chapter_tag) for tag in text_in_title]
|
[self.html_presets_processor.add_span_to_save_ids_for_links(tag, chapter_tag) for tag in text_in_title]
|
||||||
[tag.extract() for tag in text_in_title]
|
[remove_text(tag) for tag in text_in_title]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _class_removing(chapter_tag: BeautifulSoup):
|
def _class_removing(chapter_tag: BeautifulSoup):
|
||||||
|
|||||||
@@ -116,21 +116,11 @@ class HtmlPresetsProcessor:
|
|||||||
new_tag.string = "\xa0"
|
new_tag.string = "\xa0"
|
||||||
tag_to_be_removed.insert_before(new_tag)
|
tag_to_be_removed.insert_before(new_tag)
|
||||||
|
|
||||||
def has_child_with_id(tag):
|
if tag_to_be_removed.attrs.get("id"):
|
||||||
"""
|
|
||||||
Check if any child tag has an 'id' attribute.
|
|
||||||
"""
|
|
||||||
for child in tag.children:
|
|
||||||
if child.has_attr('id'):
|
|
||||||
return child
|
|
||||||
return False
|
|
||||||
|
|
||||||
child_with_id = has_child_with_id(tag_to_be_removed)
|
|
||||||
if tag_to_be_removed.attrs.get("id") or child_with_id:
|
|
||||||
_insert_span_with_attrs_before_tag(chapter_tag=chapter_tag,
|
_insert_span_with_attrs_before_tag(chapter_tag=chapter_tag,
|
||||||
tag_to_be_removed=tag_to_be_removed,
|
tag_to_be_removed=tag_to_be_removed,
|
||||||
id_=tag_to_be_removed.attrs["id"] if tag_to_be_removed.attrs.get("id") else child_with_id.attrs["id"],
|
id_=tag_to_be_removed.attrs["id"],
|
||||||
class_=tag_to_be_removed.attrs["class"] if tag_to_be_removed.attrs.get("id") else child_with_id.attrs.get("class"))
|
class_=tag_to_be_removed.attrs["class"])
|
||||||
|
|
||||||
def _process_tag_using_table(self, **kwargs):
|
def _process_tag_using_table(self, **kwargs):
|
||||||
def _wrap_tag_with_table(width: str = "100", border: str = "", bg_color: str = None) -> Tag:
|
def _wrap_tag_with_table(width: str = "100", border: str = "", bg_color: str = None) -> Tag:
|
||||||
@@ -270,7 +260,8 @@ class HtmlPresetsProcessor:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _replace_text(**kwargs):
|
def _replace_text(**kwargs):
|
||||||
found_tag_text = kwargs["found_tag"].string or kwargs["found_tag"].text
|
found_tag_text = kwargs["found_tag"].string if kwargs["found_tag"].string is not None\
|
||||||
|
else kwargs["found_tag"].text
|
||||||
if found_tag_text is not None and re.search(re.compile(kwargs["rule"]["condition"]["text"]), found_tag_text):
|
if found_tag_text is not None and re.search(re.compile(kwargs["rule"]["condition"]["text"]), found_tag_text):
|
||||||
new_text = re.sub(re.compile(
|
new_text = re.sub(re.compile(
|
||||||
kwargs["rule"]["condition"]["text"]), kwargs["rule"]["text_to_replace"], found_tag_text)
|
kwargs["rule"]["condition"]["text"]), kwargs["rule"]["text_to_replace"], found_tag_text)
|
||||||
|
|||||||
Reference in New Issue
Block a user