forked from LiveCarta/BookConverter
Add concrete book_id for imgs on Local
This commit is contained in:
@@ -1,15 +1,15 @@
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import codecs
|
import codecs
|
||||||
import os
|
import ebooklib
|
||||||
|
from ebooklib import epub
|
||||||
|
from ebooklib.epub import Link, Section
|
||||||
|
from os import path
|
||||||
|
from pathlib import Path
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from premailer import transform
|
from premailer import transform
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import Dict, Union, List
|
from typing import Dict, Union, List
|
||||||
|
|
||||||
import ebooklib
|
|
||||||
from ebooklib import epub
|
|
||||||
from ebooklib.epub import Link, Section
|
|
||||||
from bs4 import BeautifulSoup, NavigableString, Tag
|
from bs4 import BeautifulSoup, NavigableString, Tag
|
||||||
|
|
||||||
from src.util.helpers import BookLogger
|
from src.util.helpers import BookLogger
|
||||||
@@ -370,8 +370,8 @@ class EpubConverter:
|
|||||||
prepared content
|
prepared content
|
||||||
|
|
||||||
"""
|
"""
|
||||||
dir_name = os.path.dirname(cur_file_path)
|
dir_name = path.dirname(cur_file_path)
|
||||||
normed_path = os.path.normpath(os.path.join(
|
normed_path = path.normpath(path.join(
|
||||||
dir_name, href_in_link)).replace("\\", "/")
|
dir_name, href_in_link)).replace("\\", "/")
|
||||||
full_path = [
|
full_path = [
|
||||||
path for path in self.hrefs_added_to_toc if normed_path in path]
|
path for path in self.hrefs_added_to_toc if normed_path in path]
|
||||||
@@ -446,7 +446,7 @@ class EpubConverter:
|
|||||||
a_tag_href, a_tag_id = internal_link_tag.attrs["href"].split("#")
|
a_tag_href, a_tag_id = internal_link_tag.attrs["href"].split("#")
|
||||||
a_tag_href_matched_to_toc = self.match_href_to_path_from_toc(
|
a_tag_href_matched_to_toc = self.match_href_to_path_from_toc(
|
||||||
toc_href, a_tag_href, internal_link_tag) if a_tag_href \
|
toc_href, a_tag_href, internal_link_tag) if a_tag_href \
|
||||||
else os.path.normpath(toc_href).replace("\\", "/")
|
else path.normpath(toc_href).replace("\\", "/")
|
||||||
if a_tag_href_matched_to_toc:
|
if a_tag_href_matched_to_toc:
|
||||||
new_id = self.create_unique_id(
|
new_id = self.create_unique_id(
|
||||||
a_tag_href_matched_to_toc, a_tag_id)
|
a_tag_href_matched_to_toc, a_tag_id)
|
||||||
@@ -594,8 +594,7 @@ class EpubConverter:
|
|||||||
path_to_html=nav_point.href,
|
path_to_html=nav_point.href,
|
||||||
access=self.access,
|
access=self.access,
|
||||||
path2aws_path=self.book_image_src_path2aws_path,
|
path2aws_path=self.book_image_src_path2aws_path,
|
||||||
book_id=self.file_path.stem
|
book_id=Path(self.file_path).stem)
|
||||||
if hasattr(self.file_path, "stem") else "book_id")
|
|
||||||
|
|
||||||
indent = " " * lvl
|
indent = " " * lvl
|
||||||
self.logger.log(indent + f"Chapter: {title} is processing.")
|
self.logger.log(indent + f"Chapter: {title} is processing.")
|
||||||
@@ -635,7 +634,7 @@ class EpubConverter:
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
epub_file_path = "../../epub/9781641050234.epub"
|
epub_file_path = "../../epub/9781614382264.epub"
|
||||||
logger_object = BookLogger(
|
logger_object = BookLogger(
|
||||||
name="epub", book_id=epub_file_path.split("/")[-1])
|
name="epub", book_id=epub_file_path.split("/")[-1])
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from src.access import Access
|
from src.access import Access
|
||||||
@@ -35,7 +34,6 @@ def update_images_src_links(body_tag: BeautifulSoup,
|
|||||||
book_id: str = None) -> dict:
|
book_id: str = None) -> dict:
|
||||||
"""Function makes dictionary image_src_path -> Amazon web service_path"""
|
"""Function makes dictionary image_src_path -> Amazon web service_path"""
|
||||||
img_tags = body_tag.find_all("img")
|
img_tags = body_tag.find_all("img")
|
||||||
|
|
||||||
for img in img_tags:
|
for img in img_tags:
|
||||||
path_to_img_from_html = img.attrs.get("src")
|
path_to_img_from_html = img.attrs.get("src")
|
||||||
html_folder = os.path.dirname(path_to_html)
|
html_folder = os.path.dirname(path_to_html)
|
||||||
@@ -55,7 +53,7 @@ def update_images_src_links(body_tag: BeautifulSoup,
|
|||||||
path2aws_path[path_to_img_from_root] = new_folder
|
path2aws_path[path_to_img_from_root] = new_folder
|
||||||
else:
|
else:
|
||||||
new_folder = save_image_locally(
|
new_folder = save_image_locally(
|
||||||
path_to_img_from_root, img_content, "book_id")
|
path_to_img_from_root, img_content, book_id)
|
||||||
|
|
||||||
img.attrs["src"] = str(new_folder)
|
img.attrs["src"] = str(new_folder)
|
||||||
if img.attrs.get("width"):
|
if img.attrs.get("width"):
|
||||||
|
|||||||
Reference in New Issue
Block a user