This commit is contained in:
Svitin Egor
2024-11-06 12:36:34 +03:00
parent 66f3ff76de
commit eff687ab46
2 changed files with 36 additions and 3 deletions

View File

@@ -28,7 +28,15 @@ class EpubConverter:
self.ebooklib_book = epub.read_epub(book_path) self.ebooklib_book = epub.read_epub(book_path)
self.style_processor = style_processor self.style_processor = style_processor
self.html_processor = html_processor self.html_processor = html_processor
self.toc_styles = []
"""Font styles for chapter title"""
self.toc_styles.append({'level': 1, 'classes': [], 'font-style': []})
self.toc_styles.append({'level': 2, 'classes': [], 'font-style': []})
self.toc_styles.append({'level': 3, 'classes': [], 'font-style': []})
self.toc_styles.append({'level': 4, 'classes': ['.H3'], 'font-style': []})
styles = self.ebooklib_book.get_items_of_type(ebooklib.ITEM_STYLE)
for x in styles:
self.toc_styles = self.style_processor.get_tok_styles(x.content, self.toc_styles)
# main container for all epub .xhtml files # main container for all epub .xhtml files
self.html_href2html_body_soup: Dict[str, BeautifulSoup] = {} self.html_href2html_body_soup: Dict[str, BeautifulSoup] = {}
# enumerate all subchapter id for each file # enumerate all subchapter id for each file
@@ -104,6 +112,7 @@ class EpubConverter:
self.define_chapters_with_content() self.define_chapters_with_content()
self.book_logger.log(f"Converting html_nodes to LiveCarta chapter items.") self.book_logger.log(f"Converting html_nodes to LiveCarta chapter items.")
def build_href2soup_content(self) -> Dict[str, BeautifulSoup]: def build_href2soup_content(self) -> Dict[str, BeautifulSoup]:
# using EpubElements # using EpubElements
# for now just for HTML objects, as it is the simplest chapter # for now just for HTML objects, as it is the simplest chapter
@@ -543,6 +552,19 @@ class EpubConverter:
for tl_nav_point in top_level_nav_points: for tl_nav_point in top_level_nav_points:
self.detect_one_chapter(tl_nav_point) self.detect_one_chapter(tl_nav_point)
def prepare_chapter_tag(self, title: str, lvl: int) -> str:
print('Lvl: ' + str(lvl))
index: int = lvl - 1
title_styles = self.toc_styles[index]['font-style']
if len(title_styles) > 0:
for style in title_styles:
if (style == 'italic'):
title = '<i>' + title + '</i>'
return title
def html_node_to_livecarta_chapter_item(self, nav_point: NavPoint, lvl: int = 1) -> ChapterItem: def html_node_to_livecarta_chapter_item(self, nav_point: NavPoint, lvl: int = 1) -> ChapterItem:
""" """
Function prepare style, tags to json structure Function prepare style, tags to json structure
@@ -567,7 +589,7 @@ class EpubConverter:
self.book_logger.log(indent + f"Chapter: {title} is processing.") self.book_logger.log(indent + f"Chapter: {title} is processing.")
is_chapter: bool = lvl <= LiveCartaConfig.NUM_SUPPORTED_LEVELS is_chapter: bool = lvl <= LiveCartaConfig.NUM_SUPPORTED_LEVELS
self.book_logger.log(indent + "Process title.") self.book_logger.log(indent + "Process title.")
title_preprocessed: str = self.html_processor.prepare_title(title) title_preprocessed: str = self.prepare_chapter_tag(self.html_processor.prepare_title(title), lvl)
self.book_logger.log(indent + "Process content.") self.book_logger.log(indent + "Process content.")
content_preprocessed: Union[Tag, BeautifulSoup] = self.html_processor.prepare_content( content_preprocessed: Union[Tag, BeautifulSoup] = self.html_processor.prepare_content(
title_preprocessed, content, remove_title_from_chapter=is_chapter) title_preprocessed, content, remove_title_from_chapter=is_chapter)

View File

@@ -221,7 +221,6 @@ class StyleReader:
def build_css_file_content(self, css_content: str) -> str: def build_css_file_content(self, css_content: str) -> str:
"""Build css content with LiveCarta convention""" """Build css content with LiveCarta convention"""
sheet = cssutils.parseString(css_content, validate=False) sheet = cssutils.parseString(css_content, validate=False)
for css_rule in sheet: for css_rule in sheet:
if css_rule.type == css_rule.STYLE_RULE: if css_rule.type == css_rule.STYLE_RULE:
for style_type in css_rule.style: for style_type in css_rule.style:
@@ -230,3 +229,15 @@ class StyleReader:
css_text: str = sheet._getCssText().decode() css_text: str = sheet._getCssText().decode()
return css_text return css_text
def get_tok_styles(self, css_content: str, header_settings: list) -> list:
"""Create TOC styles"""
sheet = cssutils.parseString(css_content, validate=False)
for css_rule in sheet:
if css_rule.type == css_rule.STYLE_RULE:
for index, header_rule in enumerate(header_settings, start=0):
if css_rule.selectorText in header_rule['classes'] and css_rule.style.getPropertyValue('font-style') != '':
for style_value in css_rule.style.getPropertyValue('font-style').split():
header_settings[index]['font-style'].append(style_value.strip())
return header_settings