Add processor (no stem in file)

2021-12-01 16:08:19 +03:00
parent ad6be84c4b
commit ef3502cd0a
2 changed files with 16 additions and 10 deletions
--- a/src/epub_converter/epub_converter.py
+++ b/src/epub_converter/epub_converter.py
@@ -30,9 +30,11 @@ class EpubConverter:
        self.logger: BookLogger = logger
        self.ebooklib_book = epub.read_epub(file)

-        self.html_href2html_body_soup: Dict[str, BeautifulSoup] = {} # main container for all epub .xhtml files
-        self.html_href2subchapter_ids = defaultdict(list) # enumerate all subchapter id for each file
-        self.hrefs_added_to_toc = set() # enumerate all file paths that where added to TOC
+        # main container for all epub .xhtml files
+        self.html_href2html_body_soup: Dict[str, BeautifulSoup] = {}
+        # enumerate all subchapter id for each file
+        self.html_href2subchapter_ids = defaultdict(list)
+        self.hrefs_added_to_toc = set()  # enumerate all file paths that where added to TOC

        # toc tree structure stored as adj.list (NavPoint to list of NavPoints)
        # key = -1 for top level NavPoints
@@ -43,7 +45,8 @@ class EpubConverter:
        self.href_chapter_id2soup_html: Dict[tuple, BeautifulSoup] = {}

        self.internal_anchors = set()
-        self.id_anchor_exist_in_nav_points = False # flag to be updated while ebooklib.toc is parsed
+        # flag to be updated while ebooklib.toc is parsed
+        self.id_anchor_exist_in_nav_points = False
        self.img_href2img_bytes = {}  # file path to bytes
        self.old_image_path2aws_path = {}  # file path from <a> to generated aws path
        self.footnotes_contents: List[str] = []  # to be sent on server as is
@@ -191,7 +194,8 @@ class EpubConverter:
            nav_point = NavPoint(element)
            if nav_point.id:
                self.id_anchor_exist_in_nav_points = True
-                self.html_href2subchapter_ids[nav_point.href].append(nav_point.id)
+                self.html_href2subchapter_ids[nav_point.href].append(
+                    nav_point.id)
            self.adjacency_list[nav_point] = None
            self.hrefs_added_to_toc.add(nav_point.href)
            return nav_point
@@ -202,7 +206,8 @@ class EpubConverter:
            nav_point = NavPoint(first)
            if nav_point.id:
                self.id_anchor_exist_in_nav_points = True
-                self.html_href2subchapter_ids[nav_point.href].append(nav_point.id)
+                self.html_href2subchapter_ids[nav_point.href].append(
+                    nav_point.id)

            sub_nodes = []
            for i in second:
@@ -263,7 +268,8 @@ class EpubConverter:
        # go to line structure
        for html_href in self.html_href2html_body_soup:
            soup = self.html_href2html_body_soup[html_href]
-            self.html_href2html_body_soup[html_href] = unwrap_structural_tags(soup)
+            self.html_href2html_body_soup[html_href] = unwrap_structural_tags(
+                soup)

    @staticmethod
    def create_unique_id(href, id_):
@@ -440,12 +446,12 @@ class EpubConverter:
                                                                  path_to_html=nav_point.href,
                                                                  access=self.access,
                                                                  path2aws_path=self.old_image_path2aws_path,
-                                                                  book_id=self.file.stem or 'book_id')
+                                                                  book_id=lambda x: self.file.stem if hasattr(self.file, self.file.stem) else 'book_id')

        is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
        title_preprocessed = prepare_title(title)
        content_preprocessed = prepare_content(title_preprocessed, content,
-                                                                 remove_title_from_chapter=is_chapter)
+                                               remove_title_from_chapter=is_chapter)
        sub_nodes = []
        # warning! not EpubHtmlItems won't be added to chapter
        if self.adjacency_list.get(nav_point):
--- a/src/epub_converter/html_epub_preprocessor.py
+++ b/src/epub_converter/html_epub_preprocessor.py
@@ -56,7 +56,7 @@ def update_src_links_in_images(body_tag: Tag,
                path2aws_path[path_to_img_from_root] = new_folder
        else:
            new_folder = save_image_locally(
-                path_to_img_from_root, img_content, book_id)
+                path_to_img_from_root, img_content, 'book_id')

        img.attrs['src'] = str(new_folder)
        if img.attrs.get('width'):