forked from LiveCarta/BookConverter
Make task to offset Chapter_i on 1st level
This commit is contained in:
@@ -40,6 +40,9 @@ class EpubConverter:
|
|||||||
# key = -1 for top level NavPoints
|
# key = -1 for top level NavPoints
|
||||||
self.adjacency_list: Dict[Union[NavPoint, -1], Union[list, None]] = {}
|
self.adjacency_list: Dict[Union[NavPoint, -1], Union[list, None]] = {}
|
||||||
|
|
||||||
|
# list to offset Chapter_i on 1st level
|
||||||
|
self.offset_sub_nodes = []
|
||||||
|
|
||||||
# container for all chapters soup objects
|
# container for all chapters soup objects
|
||||||
# here soup object is only part of the .xhtml file
|
# here soup object is only part of the .xhtml file
|
||||||
self.href_chapter_id2soup_html: Dict[tuple, BeautifulSoup] = {}
|
self.href_chapter_id2soup_html: Dict[tuple, BeautifulSoup] = {}
|
||||||
@@ -179,7 +182,6 @@ class EpubConverter:
|
|||||||
|
|
||||||
return links
|
return links
|
||||||
|
|
||||||
# t_nodes = []
|
|
||||||
def build_adjacency_list_from_toc(self, element, lvl=0):
|
def build_adjacency_list_from_toc(self, element, lvl=0):
|
||||||
"""
|
"""
|
||||||
self.adjacency_list builds based on TOC nested structure, got from self.ebooklib.toc
|
self.adjacency_list builds based on TOC nested structure, got from self.ebooklib.toc
|
||||||
@@ -211,27 +213,29 @@ class EpubConverter:
|
|||||||
nav_point.id)
|
nav_point.id)
|
||||||
|
|
||||||
sub_nodes = []
|
sub_nodes = []
|
||||||
for i in second:
|
for elem in second:
|
||||||
# if 'chapter' in (i.title.lower() if isinstance(i, Link) else i[0].title.lower()):
|
if 'chapter' in (elem.title.lower() if isinstance(elem, Link) else elem[0].title.lower()):
|
||||||
# self.t_nodes.append(self.build_adjacency_list_from_toc(i, lvl))
|
self.offset_sub_nodes.append(self.build_adjacency_list_from_toc(elem, lvl))
|
||||||
# else:
|
else:
|
||||||
sub_nodes.append(
|
sub_nodes.append(
|
||||||
self.build_adjacency_list_from_toc(i, lvl + 1))
|
self.build_adjacency_list_from_toc(elem, lvl + 1))
|
||||||
self.adjacency_list[nav_point] = sub_nodes
|
|
||||||
|
self.adjacency_list[nav_point] = sub_nodes or self.offset_sub_nodes
|
||||||
self.hrefs_added_to_toc.add(nav_point.href)
|
self.hrefs_added_to_toc.add(nav_point.href)
|
||||||
return nav_point
|
return nav_point
|
||||||
|
|
||||||
elif isinstance(element, list) and (lvl == 0):
|
elif isinstance(element, list) and (lvl == 0):
|
||||||
nodes = []
|
nodes = []
|
||||||
for i in element:
|
# go through every element
|
||||||
|
for elem in element:
|
||||||
nodes.append(
|
nodes.append(
|
||||||
self.build_adjacency_list_from_toc(i, lvl + 1))
|
self.build_adjacency_list_from_toc(elem, lvl + 1))
|
||||||
# for j in self.t_nodes:
|
# go through every offset sub element
|
||||||
# nodes.append(j)
|
for offset_sub_node in self.offset_sub_nodes:
|
||||||
# self.t_nodes = []
|
nodes.append(offset_sub_node)
|
||||||
#
|
self.offset_sub_nodes = []
|
||||||
# self.adjacency_list[-1] = nodes
|
|
||||||
|
|
||||||
|
self.adjacency_list[-1] = nodes
|
||||||
else:
|
else:
|
||||||
assert 0, f'Error. Element is not tuple/Link/list instance: {type(element)}'
|
assert 0, f'Error. Element is not tuple/Link/list instance: {type(element)}'
|
||||||
|
|
||||||
@@ -460,7 +464,7 @@ class EpubConverter:
|
|||||||
path_to_html=nav_point.href,
|
path_to_html=nav_point.href,
|
||||||
access=self.access,
|
access=self.access,
|
||||||
path2aws_path=self.book_image_src_path2aws_path,
|
path2aws_path=self.book_image_src_path2aws_path,
|
||||||
book_id=self.file.stem if hasattr(self.file, self.file.stem) else 'book_id')
|
book_id=self.file.stem if hasattr(self.file, 'stem') else 'book_id')
|
||||||
|
|
||||||
is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
|
is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
|
||||||
title_preprocessed = prepare_title(title)
|
title_preprocessed = prepare_title(title)
|
||||||
@@ -506,7 +510,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
|
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
|
||||||
|
|
||||||
json_converter = EpubConverter('../../epub/9781614382263.epub',
|
json_converter = EpubConverter('../../epub/9781634259804.epub',
|
||||||
logger=logger_object)
|
logger=logger_object)
|
||||||
tmp = json_converter.convert_to_dict()
|
tmp = json_converter.convert_to_dict()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user