epub converter: prettify reading css

This commit is contained in:
shirshasa
2021-06-29 18:06:38 +03:00
parent 45c1931ab3
commit 9ff9759793

View File

@@ -41,9 +41,7 @@ class EpubPostprocessor:
self.href2soup_html: Dict[str, BeautifulSoup] = self.build_href2soup_content()
self.logger.log('CSS files processing.')
self.html_href2css_href = {}
self.css_href2content = {}
self.build_css_content()
self.css_href2content, self.html_href2css_href = self.build_css_content()
# add css
self.logger.log('CSS styles adding.')
self.add_css_styles2soup()
@@ -84,26 +82,37 @@ class EpubPostprocessor:
return nodes
def _read_css(self, css_href, html_path):
path_to_css_from_html = css_href
html_folder = dirname(html_path)
path_to_css_from_root = normpath(join(html_folder, path_to_css_from_html))
css_obj = self.ebooklib_book.get_item_with_href(path_to_css_from_root)
assert css_obj, f'Css style {css_href} was not in manifest.'
css_content: str = css_obj.get_content().decode()
return css_content
def build_css_content(self):
css_href2content, html_href2css_href = {}, {}
# html_href2css_href 1-to-1, todo: 1-to-many
for item in self.ebooklib_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
html_text = item.content
html_path = item.file_name
soup = BeautifulSoup(html_text, features='lxml')
for tag in soup.find_all('link', attrs={"type": "text/css"}):
if tag.attrs.get('rel') and ('alternate' in tag.attrs['rel']):
continue
css_href = tag.attrs.get('href')
self.html_href2css_href[item.file_name] = css_href
if css_href not in self.css_href2content:
path_to_css_from_html = css_href
html_folder = dirname(item.file_name)
path_to_css_from_root = normpath(join(html_folder, path_to_css_from_html))
css_obj = self.ebooklib_book.get_item_with_href(path_to_css_from_root)
assert css_obj, f'Css style {css_href} was not in manifest.'
css_content: str = css_obj.get_content().decode()
self.css_href2content[css_href] = clean_css(css_content)
html_href2css_href[html_path] = css_href
if css_href not in css_href2content:
css_href2content[css_href] = clean_css(self._read_css(css_href, html_path))
for i, tag in enumerate(soup.find_all('style')):
css_content = tag.string
self.html_href2css_href[item.file_name] = f'href{i}'
self.css_href2content[f'href{i}'] = clean_css(css_content)
html_href2css_href[html_path] = f'href{i}'
css_href2content[f'href{i}'] = clean_css(css_content)
return css_href2content, html_href2css_href
def add_css_styles2soup(self):
for href in self.href2soup_html: