forked from LiveCarta/BookConverter
epub converter: add internal links processing 7
-figure -links without id - img width, height
This commit is contained in:
@@ -208,6 +208,34 @@ class EpubPostprocessor:
|
||||
new_id = self._create_unique_id(href, tag.attrs['id'])
|
||||
tag.attrs['id'] = new_id
|
||||
|
||||
# ---------------------------------------------------------------------------------
|
||||
internal_link_reg = re.compile(r'(^.+\.(html|xhtml)$)')
|
||||
for href in self.added_to_toc_hrefs:
|
||||
soup = self.href2soup_html[href]
|
||||
tags = soup.find_all('a', {'href': internal_link_reg})
|
||||
for t in tags:
|
||||
href_in_link = t.attrs['href']
|
||||
full_path = [path for path in self.added_to_toc_hrefs if href_in_link in path]
|
||||
if not full_path:
|
||||
self.logger.log(f'Error in {href} file. No {href_in_link} file found in added to TOC documents. '
|
||||
f'While processing href in {t}.')
|
||||
continue
|
||||
|
||||
href_in_link = full_path[0]
|
||||
new_id = self._create_unique_id(href_in_link, '')
|
||||
t.attrs['placeholder'] = '{{tempStyleToAnchor-' + new_id + '}}'
|
||||
if new_id not in self.internal_anchors:
|
||||
anchor_soup = self.href2soup_html[href_in_link]
|
||||
new_anchor_span = soup.new_tag("span")
|
||||
new_anchor_span.attrs['id'] = new_id
|
||||
new_anchor_span.attrs['class'] = 'link-anchor'
|
||||
new_anchor_span.string = "\xa0"
|
||||
anchor_soup.insert(0, new_anchor_span)
|
||||
self.internal_anchors.add(new_id)
|
||||
|
||||
del t.attrs['href']
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# write placeholder to all internal links
|
||||
internal_link_reg = re.compile(r'(^.+\.(html|xhtml)\#.+)|(^\#.+)')
|
||||
for href in self.added_to_toc_hrefs:
|
||||
|
||||
@@ -53,6 +53,10 @@ def update_src_links_in_images(body_tag: Tag,
|
||||
new_folder = save_image_locally(path_to_img_from_root, img_content, 'book_id')
|
||||
|
||||
img.attrs['src'] = str(new_folder)
|
||||
if img.attrs.get('width'):
|
||||
del img.attrs['width']
|
||||
if img.attrs.get('height'):
|
||||
del img.attrs['height']
|
||||
|
||||
return path2aws_path
|
||||
|
||||
@@ -269,6 +273,14 @@ def unwrap_structural_tags(body_tag):
|
||||
_add_span_to_save_ids_for_links(s)
|
||||
s.unwrap()
|
||||
|
||||
for s in body_tag.find_all("figure"):
|
||||
s.name = 'p'
|
||||
s.attrs['style'] = "text-align: center;"
|
||||
|
||||
for s in body_tag.find_all("figcaption"):
|
||||
_add_span_to_save_ids_for_links(s)
|
||||
s.unwrap()
|
||||
|
||||
for s in body_tag.find_all("aside"):
|
||||
s.name = 'blockquote'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user