diff --git a/src/epub_converter/epub_converter.py b/src/epub_converter/epub_converter.py index c9e3bbd..a93e577 100644 --- a/src/epub_converter/epub_converter.py +++ b/src/epub_converter/epub_converter.py @@ -40,6 +40,9 @@ class EpubConverter: # key = -1 for top level NavPoints self.adjacency_list: Dict[Union[NavPoint, -1], Union[list, None]] = {} + # list to offset Chapter_i on 1st level + self.offset_sub_nodes = [] + # container for all chapters soup objects # here soup object is only part of the .xhtml file self.href_chapter_id2soup_html: Dict[tuple, BeautifulSoup] = {} @@ -179,7 +182,6 @@ class EpubConverter: return links - # t_nodes = [] def build_adjacency_list_from_toc(self, element, lvl=0): """ self.adjacency_list builds based on TOC nested structure, got from self.ebooklib.toc @@ -211,27 +213,29 @@ class EpubConverter: nav_point.id) sub_nodes = [] - for i in second: - # if 'chapter' in (i.title.lower() if isinstance(i, Link) else i[0].title.lower()): - # self.t_nodes.append(self.build_adjacency_list_from_toc(i, lvl)) - # else: - sub_nodes.append( - self.build_adjacency_list_from_toc(i, lvl + 1)) - self.adjacency_list[nav_point] = sub_nodes + for elem in second: + if 'chapter' in (elem.title.lower() if isinstance(elem, Link) else elem[0].title.lower()): + self.offset_sub_nodes.append(self.build_adjacency_list_from_toc(elem, lvl)) + else: + sub_nodes.append( + self.build_adjacency_list_from_toc(elem, lvl + 1)) + + self.adjacency_list[nav_point] = sub_nodes or self.offset_sub_nodes self.hrefs_added_to_toc.add(nav_point.href) return nav_point elif isinstance(element, list) and (lvl == 0): nodes = [] - for i in element: + # go through every element + for elem in element: nodes.append( - self.build_adjacency_list_from_toc(i, lvl + 1)) - # for j in self.t_nodes: - # nodes.append(j) - # self.t_nodes = [] - # - # self.adjacency_list[-1] = nodes + self.build_adjacency_list_from_toc(elem, lvl + 1)) + # go through every offset sub element + for offset_sub_node in self.offset_sub_nodes: + nodes.append(offset_sub_node) + self.offset_sub_nodes = [] + self.adjacency_list[-1] = nodes else: assert 0, f'Error. Element is not tuple/Link/list instance: {type(element)}' @@ -460,7 +464,7 @@ class EpubConverter: path_to_html=nav_point.href, access=self.access, path2aws_path=self.book_image_src_path2aws_path, - book_id=self.file.stem if hasattr(self.file, self.file.stem) else 'book_id') + book_id=self.file.stem if hasattr(self.file, 'stem') else 'book_id') is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS title_preprocessed = prepare_title(title) @@ -506,7 +510,7 @@ if __name__ == "__main__": logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0) - json_converter = EpubConverter('../../epub/9781614382263.epub', + json_converter = EpubConverter('../../epub/9781634259804.epub', logger=logger_object) tmp = json_converter.convert_to_dict()