forked from LiveCarta/BookConverter
LAW-5957
This commit is contained in:
@@ -52,12 +52,13 @@ class EpubBook(BookSolver):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
epub_file_path = f"../../books/epub/9781614382264.epub"
|
||||
epub_file_path = f"../../books/epub/Deep_Learning_with_Python_Second_Editio.epub"
|
||||
|
||||
logger_object = BookLogger(name="epub")
|
||||
logger_object.configure_book_logger(book_id=epub_file_path.split("/")[-1])
|
||||
|
||||
html_preprocessor = HtmlPresetsProcessor(
|
||||
|
||||
logger=logger_object, preset_path="../../preset/epub_presets.json")
|
||||
style_preprocessor = StyleReader()
|
||||
html_processor = HtmlEpubProcessor(logger=logger_object,
|
||||
|
||||
@@ -2,7 +2,7 @@ import re
|
||||
import json
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from bs4.element import PageElement
|
||||
from typing import List, Set, Dict, Union
|
||||
from typing import Union
|
||||
|
||||
from src.util.helpers import BookLogger
|
||||
|
||||
@@ -29,42 +29,49 @@ class HtmlPresetsProcessor:
|
||||
"text": self._tags_with_text_condition
|
||||
}
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _tags_with_parent_condition(**kwargs):
|
||||
found_tags: Set[Tag] = set()
|
||||
found_tags: list[Tag] = list()
|
||||
# add unique id in order not to add duplicates to the
|
||||
# found_tags(because tag with subtag could duplicate found_tag)
|
||||
u_id = 0
|
||||
for parent_tag in kwargs["body_tag"].select(kwargs["family_condition"]):
|
||||
for tag in parent_tag.find_all([re.compile(tag) for tag in kwargs["tags"]]):
|
||||
found_tags.add(tag)
|
||||
if not tag.attrs.get("unique_id"):
|
||||
tag.attrs["unique_id"] = u_id
|
||||
u_id += 1
|
||||
found_tags.append(tag)
|
||||
return len(found_tags) != 0, list(found_tags)
|
||||
|
||||
@staticmethod
|
||||
def _tags_with_child_condition(**kwargs):
|
||||
found_tags: Set[Tag] = set()
|
||||
found_tags: list[Tag] = list()
|
||||
for tag in kwargs["body_tag"].find_all([re.compile(tag) for tag in kwargs["tags"]]):
|
||||
if tag.select(kwargs["family_condition"]):
|
||||
found_tags.add(tag)
|
||||
found_tags.append(tag)
|
||||
return len(found_tags) != 0, list(found_tags)
|
||||
|
||||
@staticmethod
|
||||
def _tags_with_attrs_condition(**kwargs):
|
||||
found_tags: Set[Tag] = set()
|
||||
found_tags: list[Tag] = list()
|
||||
names = [attr["name"] for attr in kwargs["rule"]["condition"]["attrs"]]
|
||||
values = [re.compile(attr["value"]) for attr in kwargs["rule"]["condition"]["attrs"]]
|
||||
attr_conditions: Dict[str, str] = dict(zip(names, values))
|
||||
attr_conditions: dict[str, re] = dict(zip(names, values))
|
||||
for tag in kwargs["body_tag"].find_all([re.compile(tag) for tag in kwargs["tags"]],
|
||||
attr_conditions):
|
||||
found_tags.add(tag)
|
||||
found_tags.append(tag)
|
||||
return len(found_tags) != 0, list(found_tags)
|
||||
|
||||
@staticmethod
|
||||
def _tags_with_text_condition(**kwargs):
|
||||
# find all tags that are in List of tags and tags that contains required text
|
||||
found_tags: Set[Tag] = set()
|
||||
found_tags: list[Tag] = list()
|
||||
for tag in kwargs["body_tag"].find_all(
|
||||
lambda t: re.search(r"(?=(" + '|'.join([tag for tag in kwargs["tags"]]) + r"))",
|
||||
t.name) and re.search(re.compile(kwargs["rule"]["condition"]["text"]),
|
||||
t.text)):
|
||||
found_tags.add(tag)
|
||||
found_tags.append(tag)
|
||||
return len(found_tags) != 0, list(found_tags)
|
||||
|
||||
@staticmethod
|
||||
@@ -104,7 +111,7 @@ class HtmlPresetsProcessor:
|
||||
def _insert_span_with_attrs_before_tag(chapter_tag: BeautifulSoup,
|
||||
tag_to_be_removed: Tag,
|
||||
id_: str,
|
||||
class_: Union[List[str], str]):
|
||||
class_: Union[list[str], str]):
|
||||
"""Function inserts span before tag aren't supported by LiveCarta"""
|
||||
new_tag: Tag = chapter_tag.new_tag("span")
|
||||
new_tag.attrs["id"] = id_ or ""
|
||||
@@ -201,7 +208,7 @@ class HtmlPresetsProcessor:
|
||||
|
||||
def process_tags(self,
|
||||
body_tag: BeautifulSoup,
|
||||
preset_rules: List[Dict[str, Union[List[str], str, Dict[str, Union[List[Dict[str, str]], int, str]]]]],
|
||||
preset_rules: list[dict[str, Union[list[str], str, dict[str, Union[list[dict[str, str]], int, str]]]]],
|
||||
action):
|
||||
"""
|
||||
Function does action with tags
|
||||
@@ -220,9 +227,9 @@ class HtmlPresetsProcessor:
|
||||
|
||||
"""
|
||||
for preset_rule in preset_rules:
|
||||
tags: List[str] = preset_rule["tags"] if preset_rule.get(
|
||||
tags: list[str] = preset_rule["tags"] if preset_rule.get(
|
||||
"tags") else preset_rule["condition"]["tags"]
|
||||
found_tags: List[Tag] = []
|
||||
found_tags: list[Tag] = []
|
||||
if preset_rule["condition"]:
|
||||
conditions_on_tag = tuple((k, v) for k, v in preset_rule["condition"].items() if v)
|
||||
for condition_on_tag in conditions_on_tag:
|
||||
|
||||
@@ -131,6 +131,8 @@ class LiveCartaConfig:
|
||||
"border-left-width": [],
|
||||
"border-bottom-width": [],
|
||||
"border-top": [],
|
||||
"border-right": [],
|
||||
"border-left": [],
|
||||
"border-bottom": [],
|
||||
"list-style-type": [],
|
||||
"list-style-image": [],
|
||||
|
||||
@@ -32,6 +32,8 @@ class StyleReader:
|
||||
"border-left-width": self.convert_tag_style_values,
|
||||
"border-bottom-width": self.convert_tag_style_values,
|
||||
"border-top": self.convert_tag_style_values,
|
||||
"border-right": self.convert_tag_style_values,
|
||||
"border-left": self.convert_tag_style_values,
|
||||
"border-bottom": self.convert_tag_style_values,
|
||||
"list-style-type": lambda x: x if x in LiveCartaConfig.list_types else "disc",
|
||||
"list-style-image": lambda x: "disc",
|
||||
|
||||
Reference in New Issue
Block a user