This commit is contained in:
Kiryl
2022-12-09 16:24:22 +03:00
parent b651d6d396
commit 256a1abba0
5 changed files with 29 additions and 69 deletions

View File

@@ -52,12 +52,13 @@ class EpubBook(BookSolver):
if __name__ == "__main__":
epub_file_path = f"../../books/epub/9781614382264.epub"
epub_file_path = f"../../books/epub/Deep_Learning_with_Python_Second_Editio.epub"
logger_object = BookLogger(name="epub")
logger_object.configure_book_logger(book_id=epub_file_path.split("/")[-1])
html_preprocessor = HtmlPresetsProcessor(
logger=logger_object, preset_path="../../preset/epub_presets.json")
style_preprocessor = StyleReader()
html_processor = HtmlEpubProcessor(logger=logger_object,

View File

@@ -2,7 +2,7 @@ import re
import json
from bs4 import BeautifulSoup, Tag
from bs4.element import PageElement
from typing import List, Set, Dict, Union
from typing import Union
from src.util.helpers import BookLogger
@@ -29,42 +29,49 @@ class HtmlPresetsProcessor:
"text": self._tags_with_text_condition
}
@staticmethod
def _tags_with_parent_condition(**kwargs):
found_tags: Set[Tag] = set()
found_tags: list[Tag] = list()
# add unique id in order not to add duplicates to the
# found_tags(because tag with subtag could duplicate found_tag)
u_id = 0
for parent_tag in kwargs["body_tag"].select(kwargs["family_condition"]):
for tag in parent_tag.find_all([re.compile(tag) for tag in kwargs["tags"]]):
found_tags.add(tag)
if not tag.attrs.get("unique_id"):
tag.attrs["unique_id"] = u_id
u_id += 1
found_tags.append(tag)
return len(found_tags) != 0, list(found_tags)
@staticmethod
def _tags_with_child_condition(**kwargs):
found_tags: Set[Tag] = set()
found_tags: list[Tag] = list()
for tag in kwargs["body_tag"].find_all([re.compile(tag) for tag in kwargs["tags"]]):
if tag.select(kwargs["family_condition"]):
found_tags.add(tag)
found_tags.append(tag)
return len(found_tags) != 0, list(found_tags)
@staticmethod
def _tags_with_attrs_condition(**kwargs):
found_tags: Set[Tag] = set()
found_tags: list[Tag] = list()
names = [attr["name"] for attr in kwargs["rule"]["condition"]["attrs"]]
values = [re.compile(attr["value"]) for attr in kwargs["rule"]["condition"]["attrs"]]
attr_conditions: Dict[str, str] = dict(zip(names, values))
attr_conditions: dict[str, re] = dict(zip(names, values))
for tag in kwargs["body_tag"].find_all([re.compile(tag) for tag in kwargs["tags"]],
attr_conditions):
found_tags.add(tag)
found_tags.append(tag)
return len(found_tags) != 0, list(found_tags)
@staticmethod
def _tags_with_text_condition(**kwargs):
# find all tags that are in List of tags and tags that contains required text
found_tags: Set[Tag] = set()
found_tags: list[Tag] = list()
for tag in kwargs["body_tag"].find_all(
lambda t: re.search(r"(?=(" + '|'.join([tag for tag in kwargs["tags"]]) + r"))",
t.name) and re.search(re.compile(kwargs["rule"]["condition"]["text"]),
t.text)):
found_tags.add(tag)
found_tags.append(tag)
return len(found_tags) != 0, list(found_tags)
@staticmethod
@@ -104,7 +111,7 @@ class HtmlPresetsProcessor:
def _insert_span_with_attrs_before_tag(chapter_tag: BeautifulSoup,
tag_to_be_removed: Tag,
id_: str,
class_: Union[List[str], str]):
class_: Union[list[str], str]):
"""Function inserts span before tag aren't supported by LiveCarta"""
new_tag: Tag = chapter_tag.new_tag("span")
new_tag.attrs["id"] = id_ or ""
@@ -201,7 +208,7 @@ class HtmlPresetsProcessor:
def process_tags(self,
body_tag: BeautifulSoup,
preset_rules: List[Dict[str, Union[List[str], str, Dict[str, Union[List[Dict[str, str]], int, str]]]]],
preset_rules: list[dict[str, Union[list[str], str, dict[str, Union[list[dict[str, str]], int, str]]]]],
action):
"""
Function does action with tags
@@ -220,9 +227,9 @@ class HtmlPresetsProcessor:
"""
for preset_rule in preset_rules:
tags: List[str] = preset_rule["tags"] if preset_rule.get(
tags: list[str] = preset_rule["tags"] if preset_rule.get(
"tags") else preset_rule["condition"]["tags"]
found_tags: List[Tag] = []
found_tags: list[Tag] = []
if preset_rule["condition"]:
conditions_on_tag = tuple((k, v) for k, v in preset_rule["condition"].items() if v)
for condition_on_tag in conditions_on_tag:

View File

@@ -131,6 +131,8 @@ class LiveCartaConfig:
"border-left-width": [],
"border-bottom-width": [],
"border-top": [],
"border-right": [],
"border-left": [],
"border-bottom": [],
"list-style-type": [],
"list-style-image": [],

View File

@@ -32,6 +32,8 @@ class StyleReader:
"border-left-width": self.convert_tag_style_values,
"border-bottom-width": self.convert_tag_style_values,
"border-top": self.convert_tag_style_values,
"border-right": self.convert_tag_style_values,
"border-left": self.convert_tag_style_values,
"border-bottom": self.convert_tag_style_values,
"list-style-type": lambda x: x if x in LiveCartaConfig.list_types else "disc",
"list-style-image": lambda x: "disc",