Make task to offset Chapter_i on 1st level

This commit is contained in:
Kiryl
2021-12-23 18:31:17 +03:00
parent 4b1109e6b4
commit e7d028073c

View File

@@ -40,6 +40,9 @@ class EpubConverter:
# key = -1 for top level NavPoints # key = -1 for top level NavPoints
self.adjacency_list: Dict[Union[NavPoint, -1], Union[list, None]] = {} self.adjacency_list: Dict[Union[NavPoint, -1], Union[list, None]] = {}
# list to offset Chapter_i on 1st level
self.offset_sub_nodes = []
# container for all chapters soup objects # container for all chapters soup objects
# here soup object is only part of the .xhtml file # here soup object is only part of the .xhtml file
self.href_chapter_id2soup_html: Dict[tuple, BeautifulSoup] = {} self.href_chapter_id2soup_html: Dict[tuple, BeautifulSoup] = {}
@@ -179,7 +182,6 @@ class EpubConverter:
return links return links
# t_nodes = []
def build_adjacency_list_from_toc(self, element, lvl=0): def build_adjacency_list_from_toc(self, element, lvl=0):
""" """
self.adjacency_list builds based on TOC nested structure, got from self.ebooklib.toc self.adjacency_list builds based on TOC nested structure, got from self.ebooklib.toc
@@ -211,27 +213,29 @@ class EpubConverter:
nav_point.id) nav_point.id)
sub_nodes = [] sub_nodes = []
for i in second: for elem in second:
# if 'chapter' in (i.title.lower() if isinstance(i, Link) else i[0].title.lower()): if 'chapter' in (elem.title.lower() if isinstance(elem, Link) else elem[0].title.lower()):
# self.t_nodes.append(self.build_adjacency_list_from_toc(i, lvl)) self.offset_sub_nodes.append(self.build_adjacency_list_from_toc(elem, lvl))
# else: else:
sub_nodes.append( sub_nodes.append(
self.build_adjacency_list_from_toc(i, lvl + 1)) self.build_adjacency_list_from_toc(elem, lvl + 1))
self.adjacency_list[nav_point] = sub_nodes
self.adjacency_list[nav_point] = sub_nodes or self.offset_sub_nodes
self.hrefs_added_to_toc.add(nav_point.href) self.hrefs_added_to_toc.add(nav_point.href)
return nav_point return nav_point
elif isinstance(element, list) and (lvl == 0): elif isinstance(element, list) and (lvl == 0):
nodes = [] nodes = []
for i in element: # go through every element
for elem in element:
nodes.append( nodes.append(
self.build_adjacency_list_from_toc(i, lvl + 1)) self.build_adjacency_list_from_toc(elem, lvl + 1))
# for j in self.t_nodes: # go through every offset sub element
# nodes.append(j) for offset_sub_node in self.offset_sub_nodes:
# self.t_nodes = [] nodes.append(offset_sub_node)
# self.offset_sub_nodes = []
# self.adjacency_list[-1] = nodes
self.adjacency_list[-1] = nodes
else: else:
assert 0, f'Error. Element is not tuple/Link/list instance: {type(element)}' assert 0, f'Error. Element is not tuple/Link/list instance: {type(element)}'
@@ -460,7 +464,7 @@ class EpubConverter:
path_to_html=nav_point.href, path_to_html=nav_point.href,
access=self.access, access=self.access,
path2aws_path=self.book_image_src_path2aws_path, path2aws_path=self.book_image_src_path2aws_path,
book_id=self.file.stem if hasattr(self.file, self.file.stem) else 'book_id') book_id=self.file.stem if hasattr(self.file, 'stem') else 'book_id')
is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
title_preprocessed = prepare_title(title) title_preprocessed = prepare_title(title)
@@ -506,7 +510,7 @@ if __name__ == "__main__":
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0) logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
json_converter = EpubConverter('../../epub/9781614382263.epub', json_converter = EpubConverter('../../epub/9781634259804.epub',
logger=logger_object) logger=logger_object)
tmp = json_converter.convert_to_dict() tmp = json_converter.convert_to_dict()