forked from LiveCarta/BookConverter
Add processor (no stem in file)
This commit is contained in:
@@ -30,9 +30,11 @@ class EpubConverter:
|
||||
self.logger: BookLogger = logger
|
||||
self.ebooklib_book = epub.read_epub(file)
|
||||
|
||||
self.html_href2html_body_soup: Dict[str, BeautifulSoup] = {} # main container for all epub .xhtml files
|
||||
self.html_href2subchapter_ids = defaultdict(list) # enumerate all subchapter id for each file
|
||||
self.hrefs_added_to_toc = set() # enumerate all file paths that where added to TOC
|
||||
# main container for all epub .xhtml files
|
||||
self.html_href2html_body_soup: Dict[str, BeautifulSoup] = {}
|
||||
# enumerate all subchapter id for each file
|
||||
self.html_href2subchapter_ids = defaultdict(list)
|
||||
self.hrefs_added_to_toc = set() # enumerate all file paths that where added to TOC
|
||||
|
||||
# toc tree structure stored as adj.list (NavPoint to list of NavPoints)
|
||||
# key = -1 for top level NavPoints
|
||||
@@ -43,7 +45,8 @@ class EpubConverter:
|
||||
self.href_chapter_id2soup_html: Dict[tuple, BeautifulSoup] = {}
|
||||
|
||||
self.internal_anchors = set()
|
||||
self.id_anchor_exist_in_nav_points = False # flag to be updated while ebooklib.toc is parsed
|
||||
# flag to be updated while ebooklib.toc is parsed
|
||||
self.id_anchor_exist_in_nav_points = False
|
||||
self.img_href2img_bytes = {} # file path to bytes
|
||||
self.old_image_path2aws_path = {} # file path from <a> to generated aws path
|
||||
self.footnotes_contents: List[str] = [] # to be sent on server as is
|
||||
@@ -191,7 +194,8 @@ class EpubConverter:
|
||||
nav_point = NavPoint(element)
|
||||
if nav_point.id:
|
||||
self.id_anchor_exist_in_nav_points = True
|
||||
self.html_href2subchapter_ids[nav_point.href].append(nav_point.id)
|
||||
self.html_href2subchapter_ids[nav_point.href].append(
|
||||
nav_point.id)
|
||||
self.adjacency_list[nav_point] = None
|
||||
self.hrefs_added_to_toc.add(nav_point.href)
|
||||
return nav_point
|
||||
@@ -202,7 +206,8 @@ class EpubConverter:
|
||||
nav_point = NavPoint(first)
|
||||
if nav_point.id:
|
||||
self.id_anchor_exist_in_nav_points = True
|
||||
self.html_href2subchapter_ids[nav_point.href].append(nav_point.id)
|
||||
self.html_href2subchapter_ids[nav_point.href].append(
|
||||
nav_point.id)
|
||||
|
||||
sub_nodes = []
|
||||
for i in second:
|
||||
@@ -263,7 +268,8 @@ class EpubConverter:
|
||||
# go to line structure
|
||||
for html_href in self.html_href2html_body_soup:
|
||||
soup = self.html_href2html_body_soup[html_href]
|
||||
self.html_href2html_body_soup[html_href] = unwrap_structural_tags(soup)
|
||||
self.html_href2html_body_soup[html_href] = unwrap_structural_tags(
|
||||
soup)
|
||||
|
||||
@staticmethod
|
||||
def create_unique_id(href, id_):
|
||||
@@ -440,12 +446,12 @@ class EpubConverter:
|
||||
path_to_html=nav_point.href,
|
||||
access=self.access,
|
||||
path2aws_path=self.old_image_path2aws_path,
|
||||
book_id=self.file.stem or 'book_id')
|
||||
book_id=lambda x: self.file.stem if hasattr(self.file, self.file.stem) else 'book_id')
|
||||
|
||||
is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
|
||||
title_preprocessed = prepare_title(title)
|
||||
content_preprocessed = prepare_content(title_preprocessed, content,
|
||||
remove_title_from_chapter=is_chapter)
|
||||
remove_title_from_chapter=is_chapter)
|
||||
sub_nodes = []
|
||||
# warning! not EpubHtmlItems won't be added to chapter
|
||||
if self.adjacency_list.get(nav_point):
|
||||
|
||||
@@ -56,7 +56,7 @@ def update_src_links_in_images(body_tag: Tag,
|
||||
path2aws_path[path_to_img_from_root] = new_folder
|
||||
else:
|
||||
new_folder = save_image_locally(
|
||||
path_to_img_from_root, img_content, book_id)
|
||||
path_to_img_from_root, img_content, 'book_id')
|
||||
|
||||
img.attrs['src'] = str(new_folder)
|
||||
if img.attrs.get('width'):
|
||||
|
||||
Reference in New Issue
Block a user