diff --git a/src/epub_converter/epub_converter.py b/src/epub_converter/epub_converter.py index 9bda1b1..29959c0 100644 --- a/src/epub_converter/epub_converter.py +++ b/src/epub_converter/epub_converter.py @@ -15,15 +15,15 @@ from bs4 import BeautifulSoup, Tag, NavigableString from src.util.helpers import BookLogger from src.livecarta_config import LiveCartaConfig from src.data_objects import ChapterItem, NavPoint -from src.epub_converter.css_processor import CSSPreprocessor +from src.style_preprocessor import CSSPreprocessor from src.epub_converter.html_epub_processor import HtmlEpubPreprocessor from src.epub_converter.image_processing import update_images_src_links from src.epub_converter.footnotes_processing import preprocess_footnotes -from src.epub_converter.tag_inline_style_processor import TagInlineStyleProcessor +from src.tag_inline_style_processor import TagInlineStyleProcessor class EpubConverter: - def __init__(self, book_path, access=None, logger=None, css_processor=None, html_processor=None): + def __init__(self, book_path, access=None, logger: BookLogger = None, css_processor: CSSPreprocessor = None, html_processor: HtmlEpubPreprocessor = None): self.book_path = book_path self.access = access self.logger: BookLogger = logger @@ -257,7 +257,7 @@ class EpubConverter: sub_nodes = [] for elem in second: - if (bool(re.search('^section$|^part$', first.title.lower()))) and lvl == 1: + if (bool(re.search("^section$|^part$", first.title.lower()))) and lvl == 1: self.offset_sub_nodes.append( self.build_adjacency_list_from_toc(elem, lvl)) else: @@ -291,7 +291,7 @@ class EpubConverter: return False def build_adjacency_list_from_spine(self): - def build_manifest_id2html_href() -> dict: + def build_manifest_id2html_href() -> Dict[int, str]: links = dict() for item in self.ebooklib_book.get_items_of_type(ebooklib.ITEM_DOCUMENT): links[item.id] = item.file_name @@ -607,7 +607,7 @@ class EpubConverter: self.logger.log(indent + "Process title.") title_preprocessed: str = self.html_processor.prepare_title(title) self.logger.log(indent + "Process content.") - content_preprocessed: BeautifulSoup = self.html_processor.prepare_content( + content_preprocessed: Union[Tag, BeautifulSoup] = self.html_processor.prepare_content( title_preprocessed, content, remove_title_from_chapter=is_chapter) self.book_image_src_path2aws_path = update_images_src_links(content_preprocessed, diff --git a/src/epub_converter/epub_solver.py b/src/epub_converter/epub_solver.py index 58336e7..ceae0fc 100644 --- a/src/epub_converter/epub_solver.py +++ b/src/epub_converter/epub_solver.py @@ -1,5 +1,5 @@ from src.book_solver import BookSolver -from src.epub_converter.css_processor import CSSPreprocessor +from src.style_preprocessor import CSSPreprocessor from src.epub_converter.html_epub_processor import HtmlEpubPreprocessor from src.epub_converter.epub_converter import EpubConverter diff --git a/src/epub_converter/html_epub_processor.py b/src/epub_converter/html_epub_processor.py index e1c3b18..914b683 100644 --- a/src/epub_converter/html_epub_processor.py +++ b/src/epub_converter/html_epub_processor.py @@ -192,14 +192,18 @@ class HtmlEpubPreprocessor: tag_to_replace: str = rule["tag_to_replace"] if rule["condition"]: for condition_on_tag in ((k, v) for k, v in rule["condition"].items() if v): - if condition_on_tag[0] == 'parent_tags': + if condition_on_tag[0] == "parent_tags": for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]): if tag.parent.select(condition_on_tag[1]): tag.name = tag_to_replace - elif condition_on_tag[0] == 'child_tags': + elif condition_on_tag[0] == "child_tags": for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]): - if not tag.select(re.sub('[():]|not', '', condition_on_tag[1])): - tag.name = tag_to_replace + if "not" in condition_on_tag[1]: + if not tag.select(re.sub("[():]|not", "", condition_on_tag[1])): + tag.name = tag_to_replace + else: + if tag.select(condition_on_tag[1]): + tag.name = tag_to_replace elif condition_on_tag[0] == "attrs": for attr in rule["condition"]["attrs"]: for tag in chapter_tag.find_all([re.compile(tag) for tag in tags], @@ -236,15 +240,15 @@ class HtmlEpubPreprocessor: tag[attr_to_replace] = tag[attr] del tag[attr] - def _unwrap_tags(self, chapter_tag: BeautifulSoup, rules: Dict[str, List[str]]): + def _unwrap_tags(self, chapter_tag: BeautifulSoup, rules: List[Dict[str, List[str]]]): """ Function unwrap tags and moves id to span Parameters ---------- chapter_tag: BeautifulSoup Tag & contents of the chapter tag - rules: Dict[str, List[str]] - dict of tags to unwrap + rules: List[Dict[str, List[str]]] + list of conditions when fire function Returns ------- @@ -252,13 +256,14 @@ class HtmlEpubPreprocessor: Chapter Tag with unwrapped certain tags """ - for tag_name in rules["tags"]: - for tag in chapter_tag.select(tag_name): - # if tag is a subtag - if ">" in tag_name: - tag.parent.attrs.update(tag.attrs) - self._add_span_to_save_ids_for_links(tag, chapter_tag) - tag.unwrap() + for rule in rules: + for tag_name in rule["tags"]: + for tag in chapter_tag.select(tag_name): + # if tag is a subtag + if ">" in tag_name: + tag.parent.attrs.update(tag.attrs) + self._add_span_to_save_ids_for_links(tag, chapter_tag) + tag.unwrap() @staticmethod def _insert_tags_into_correspond_tags(chapter_tag: BeautifulSoup, @@ -293,14 +298,18 @@ class HtmlEpubPreprocessor: tags: List[str] = rule["tags"] if rule["condition"]: for condition_on_tag in ((k, v) for k, v in rule["condition"].items() if v): - if condition_on_tag[0] == 'parent_tags': + if condition_on_tag[0] == "parent_tags": for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]): if tag.parent.select(condition_on_tag[1]): insert(tag) - elif condition_on_tag[0] == 'child_tags': + elif condition_on_tag[0] == "child_tags": for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]): - if not tag.select(re.sub('[():]|not', '', condition_on_tag[1])): - insert(tag) + if "not" in condition_on_tag[1]: + if not tag.select(re.sub("[():]|not", "", condition_on_tag[1])): + tag.unwrap() + else: + if tag.select(condition_on_tag[1]): + tag.unwrap() elif condition_on_tag[0] == "attrs": for attr in rule["condition"]["attrs"]: for tag in chapter_tag.find_all([re.compile(tag) for tag in tags], @@ -441,7 +450,7 @@ class HtmlEpubPreprocessor: # 3-6. for rule in self.preset: func = self.name2function[rule["preset_name"]] - func(content_tag, rule['rules']) + func(content_tag, rule["rules"]) # 7. if remove_title_from_chapter: self._remove_headings_content(content_tag, title_str) diff --git a/src/epub_converter/css_processor.py b/src/style_preprocessor.py similarity index 100% rename from src/epub_converter/css_processor.py rename to src/style_preprocessor.py diff --git a/src/epub_converter/tag_inline_style_processor.py b/src/tag_inline_style_processor.py similarity index 100% rename from src/epub_converter/tag_inline_style_processor.py rename to src/tag_inline_style_processor.py