Add processor (no stem in file)

This commit is contained in:
Kiryl
2021-12-01 16:08:19 +03:00
parent ad6be84c4b
commit ef3502cd0a
2 changed files with 16 additions and 10 deletions

View File

@@ -30,8 +30,10 @@ class EpubConverter:
self.logger: BookLogger = logger self.logger: BookLogger = logger
self.ebooklib_book = epub.read_epub(file) self.ebooklib_book = epub.read_epub(file)
self.html_href2html_body_soup: Dict[str, BeautifulSoup] = {} # main container for all epub .xhtml files # main container for all epub .xhtml files
self.html_href2subchapter_ids = defaultdict(list) # enumerate all subchapter id for each file self.html_href2html_body_soup: Dict[str, BeautifulSoup] = {}
# enumerate all subchapter id for each file
self.html_href2subchapter_ids = defaultdict(list)
self.hrefs_added_to_toc = set() # enumerate all file paths that where added to TOC self.hrefs_added_to_toc = set() # enumerate all file paths that where added to TOC
# toc tree structure stored as adj.list (NavPoint to list of NavPoints) # toc tree structure stored as adj.list (NavPoint to list of NavPoints)
@@ -43,7 +45,8 @@ class EpubConverter:
self.href_chapter_id2soup_html: Dict[tuple, BeautifulSoup] = {} self.href_chapter_id2soup_html: Dict[tuple, BeautifulSoup] = {}
self.internal_anchors = set() self.internal_anchors = set()
self.id_anchor_exist_in_nav_points = False # flag to be updated while ebooklib.toc is parsed # flag to be updated while ebooklib.toc is parsed
self.id_anchor_exist_in_nav_points = False
self.img_href2img_bytes = {} # file path to bytes self.img_href2img_bytes = {} # file path to bytes
self.old_image_path2aws_path = {} # file path from <a> to generated aws path self.old_image_path2aws_path = {} # file path from <a> to generated aws path
self.footnotes_contents: List[str] = [] # to be sent on server as is self.footnotes_contents: List[str] = [] # to be sent on server as is
@@ -191,7 +194,8 @@ class EpubConverter:
nav_point = NavPoint(element) nav_point = NavPoint(element)
if nav_point.id: if nav_point.id:
self.id_anchor_exist_in_nav_points = True self.id_anchor_exist_in_nav_points = True
self.html_href2subchapter_ids[nav_point.href].append(nav_point.id) self.html_href2subchapter_ids[nav_point.href].append(
nav_point.id)
self.adjacency_list[nav_point] = None self.adjacency_list[nav_point] = None
self.hrefs_added_to_toc.add(nav_point.href) self.hrefs_added_to_toc.add(nav_point.href)
return nav_point return nav_point
@@ -202,7 +206,8 @@ class EpubConverter:
nav_point = NavPoint(first) nav_point = NavPoint(first)
if nav_point.id: if nav_point.id:
self.id_anchor_exist_in_nav_points = True self.id_anchor_exist_in_nav_points = True
self.html_href2subchapter_ids[nav_point.href].append(nav_point.id) self.html_href2subchapter_ids[nav_point.href].append(
nav_point.id)
sub_nodes = [] sub_nodes = []
for i in second: for i in second:
@@ -263,7 +268,8 @@ class EpubConverter:
# go to line structure # go to line structure
for html_href in self.html_href2html_body_soup: for html_href in self.html_href2html_body_soup:
soup = self.html_href2html_body_soup[html_href] soup = self.html_href2html_body_soup[html_href]
self.html_href2html_body_soup[html_href] = unwrap_structural_tags(soup) self.html_href2html_body_soup[html_href] = unwrap_structural_tags(
soup)
@staticmethod @staticmethod
def create_unique_id(href, id_): def create_unique_id(href, id_):
@@ -440,7 +446,7 @@ class EpubConverter:
path_to_html=nav_point.href, path_to_html=nav_point.href,
access=self.access, access=self.access,
path2aws_path=self.old_image_path2aws_path, path2aws_path=self.old_image_path2aws_path,
book_id=self.file.stem or 'book_id') book_id=lambda x: self.file.stem if hasattr(self.file, self.file.stem) else 'book_id')
is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
title_preprocessed = prepare_title(title) title_preprocessed = prepare_title(title)

View File

@@ -56,7 +56,7 @@ def update_src_links_in_images(body_tag: Tag,
path2aws_path[path_to_img_from_root] = new_folder path2aws_path[path_to_img_from_root] = new_folder
else: else:
new_folder = save_image_locally( new_folder = save_image_locally(
path_to_img_from_root, img_content, book_id) path_to_img_from_root, img_content, 'book_id')
img.attrs['src'] = str(new_folder) img.attrs['src'] = str(new_folder)
if img.attrs.get('width'): if img.attrs.get('width'):