epub converter: prettify reading css

2021-06-29 18:06:38 +03:00
parent 45c1931ab3
commit 9ff9759793
1 changed files with 23 additions and 14 deletions
--- a/src/epub_postprocessor.py
+++ b/src/epub_postprocessor.py
@@ -41,9 +41,7 @@ class EpubPostprocessor:
        self.href2soup_html: Dict[str, BeautifulSoup] = self.build_href2soup_content()

        self.logger.log('CSS files processing.')
-        self.html_href2css_href = {}
-        self.css_href2content = {}
-        self.build_css_content()
+        self.css_href2content, self.html_href2css_href = self.build_css_content()
        # add css
        self.logger.log('CSS styles adding.')
        self.add_css_styles2soup()
@@ -84,26 +82,37 @@ class EpubPostprocessor:

        return nodes

+    def _read_css(self, css_href, html_path):
+        path_to_css_from_html = css_href
+        html_folder = dirname(html_path)
+        path_to_css_from_root = normpath(join(html_folder, path_to_css_from_html))
+        css_obj = self.ebooklib_book.get_item_with_href(path_to_css_from_root)
+        assert css_obj, f'Css style {css_href} was not in manifest.'
+        css_content: str = css_obj.get_content().decode()
+        return css_content
+
    def build_css_content(self):
+        css_href2content, html_href2css_href = {}, {}
+        # html_href2css_href 1-to-1, todo: 1-to-many
+
        for item in self.ebooklib_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
            html_text = item.content
+            html_path = item.file_name
            soup = BeautifulSoup(html_text, features='lxml')
            for tag in soup.find_all('link', attrs={"type": "text/css"}):
+                if tag.attrs.get('rel') and ('alternate' in tag.attrs['rel']):
+                    continue
                css_href = tag.attrs.get('href')
-                self.html_href2css_href[item.file_name] = css_href
-                if css_href not in self.css_href2content:
-                    path_to_css_from_html = css_href
-                    html_folder = dirname(item.file_name)
-                    path_to_css_from_root = normpath(join(html_folder, path_to_css_from_html))
-                    css_obj = self.ebooklib_book.get_item_with_href(path_to_css_from_root)
-                    assert css_obj, f'Css style {css_href} was not in manifest.'
-                    css_content: str = css_obj.get_content().decode()
-                    self.css_href2content[css_href] = clean_css(css_content)
+                html_href2css_href[html_path] = css_href
+                if css_href not in css_href2content:
+                    css_href2content[css_href] = clean_css(self._read_css(css_href, html_path))

            for i, tag in enumerate(soup.find_all('style')):
                css_content = tag.string
-                self.html_href2css_href[item.file_name] = f'href{i}'
-                self.css_href2content[f'href{i}'] = clean_css(css_content)
+                html_href2css_href[html_path] = f'href{i}'
+                css_href2content[f'href{i}'] = clean_css(css_content)
+
+        return css_href2content, html_href2css_href

    def add_css_styles2soup(self):
        for href in self.href2soup_html: