forked from LiveCarta/BookConverter
Add concrete book_id for imgs on Local
This commit is contained in:
@@ -1,15 +1,15 @@
|
||||
import re
|
||||
import json
|
||||
import codecs
|
||||
import os
|
||||
import ebooklib
|
||||
from ebooklib import epub
|
||||
from ebooklib.epub import Link, Section
|
||||
from os import path
|
||||
from pathlib import Path
|
||||
from itertools import chain
|
||||
from premailer import transform
|
||||
from collections import defaultdict
|
||||
from typing import Dict, Union, List
|
||||
|
||||
import ebooklib
|
||||
from ebooklib import epub
|
||||
from ebooklib.epub import Link, Section
|
||||
from bs4 import BeautifulSoup, NavigableString, Tag
|
||||
|
||||
from src.util.helpers import BookLogger
|
||||
@@ -370,8 +370,8 @@ class EpubConverter:
|
||||
prepared content
|
||||
|
||||
"""
|
||||
dir_name = os.path.dirname(cur_file_path)
|
||||
normed_path = os.path.normpath(os.path.join(
|
||||
dir_name = path.dirname(cur_file_path)
|
||||
normed_path = path.normpath(path.join(
|
||||
dir_name, href_in_link)).replace("\\", "/")
|
||||
full_path = [
|
||||
path for path in self.hrefs_added_to_toc if normed_path in path]
|
||||
@@ -446,7 +446,7 @@ class EpubConverter:
|
||||
a_tag_href, a_tag_id = internal_link_tag.attrs["href"].split("#")
|
||||
a_tag_href_matched_to_toc = self.match_href_to_path_from_toc(
|
||||
toc_href, a_tag_href, internal_link_tag) if a_tag_href \
|
||||
else os.path.normpath(toc_href).replace("\\", "/")
|
||||
else path.normpath(toc_href).replace("\\", "/")
|
||||
if a_tag_href_matched_to_toc:
|
||||
new_id = self.create_unique_id(
|
||||
a_tag_href_matched_to_toc, a_tag_id)
|
||||
@@ -594,8 +594,7 @@ class EpubConverter:
|
||||
path_to_html=nav_point.href,
|
||||
access=self.access,
|
||||
path2aws_path=self.book_image_src_path2aws_path,
|
||||
book_id=self.file_path.stem
|
||||
if hasattr(self.file_path, "stem") else "book_id")
|
||||
book_id=Path(self.file_path).stem)
|
||||
|
||||
indent = " " * lvl
|
||||
self.logger.log(indent + f"Chapter: {title} is processing.")
|
||||
@@ -635,7 +634,7 @@ class EpubConverter:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
epub_file_path = "../../epub/9781641050234.epub"
|
||||
epub_file_path = "../../epub/9781614382264.epub"
|
||||
logger_object = BookLogger(
|
||||
name="epub", book_id=epub_file_path.split("/")[-1])
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from src.access import Access
|
||||
@@ -35,7 +34,6 @@ def update_images_src_links(body_tag: BeautifulSoup,
|
||||
book_id: str = None) -> dict:
|
||||
"""Function makes dictionary image_src_path -> Amazon web service_path"""
|
||||
img_tags = body_tag.find_all("img")
|
||||
|
||||
for img in img_tags:
|
||||
path_to_img_from_html = img.attrs.get("src")
|
||||
html_folder = os.path.dirname(path_to_html)
|
||||
@@ -55,7 +53,7 @@ def update_images_src_links(body_tag: BeautifulSoup,
|
||||
path2aws_path[path_to_img_from_root] = new_folder
|
||||
else:
|
||||
new_folder = save_image_locally(
|
||||
path_to_img_from_root, img_content, "book_id")
|
||||
path_to_img_from_root, img_content, book_id)
|
||||
|
||||
img.attrs["src"] = str(new_folder)
|
||||
if img.attrs.get("width"):
|
||||
|
||||
Reference in New Issue
Block a user