From 6cd4f9401d6098310bece17eef765008808ca134 Mon Sep 17 00:00:00 2001 From: shirshasa Date: Fri, 23 Apr 2021 15:21:24 +0300 Subject: [PATCH] epub converter: update --- requirements.txt | 6 +++++- src/epub_postprocessor.py | 9 +++++++-- src/util/color_reader.py | 2 ++ src/util/css_reader.py | 4 ++-- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 54ef82a..04ceb99 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,8 @@ bs4>=0.0.1 pika>=1.1.0 requests>=2.22.0 -lxml>=4.5.0 \ No newline at end of file +lxml>=4.5.0 +cssutils~=2.2.0 +ebooklib~=0.17.1 +premailer~=3.8.0 +webcolors==1.3 \ No newline at end of file diff --git a/src/epub_postprocessor.py b/src/epub_postprocessor.py index 52a6744..8c23edd 100644 --- a/src/epub_postprocessor.py +++ b/src/epub_postprocessor.py @@ -1,5 +1,6 @@ import codecs import json +from os.path import dirname, normpath, join from collections import defaultdict from typing import Dict, Union @@ -89,8 +90,12 @@ class EpubPostprocessor: css_href = tag.attrs.get('href') self.html_href2css_href[item.file_name] = css_href if css_href not in self.css_href2content: - print(css_href) - css_content: str = self.ebooklib_book.get_item_with_href(css_href).get_content().decode() + path_to_css_from_html = css_href + html_folder = dirname(item.file_name) + path_to_css_from_root = normpath(join(html_folder, path_to_css_from_html)) + css_obj = self.ebooklib_book.get_item_with_href(path_to_css_from_root) + assert css_obj, f'Css style {css_href} was not in manifest.' + css_content: str = css_obj.get_content().decode() self.css_href2content[css_href] = clean_css(css_content) for i, tag in enumerate(soup.find_all('style')): diff --git a/src/util/color_reader.py b/src/util/color_reader.py index 740e170..c15a581 100644 --- a/src/util/color_reader.py +++ b/src/util/color_reader.py @@ -59,6 +59,8 @@ def str2color_name(s: str): name = get_hex_colour_name(s) return name + elif s in html4_hex_to_names.items(): + return s else: return '' diff --git a/src/util/css_reader.py b/src/util/css_reader.py index 1e1781e..b7977b3 100644 --- a/src/util/css_reader.py +++ b/src/util/css_reader.py @@ -76,8 +76,8 @@ LIVECARTA_STYLE_ATTRS_MAPPING = { 'font': lambda x: '', 'font-family': lambda x: LawCartaConfig.font_correspondence_table.get(x.capitalize()), 'font-size': convert_font_size, - 'color': str2color_name, - 'background-color': str2color_name, + 'color': lambda x: LawCartaConfig.HTML42LIVECARTA_COLORS.get(str2color_name(x), ''), + 'background-color': lambda x: LawCartaConfig.HTML42LIVECARTA_COLORS.get(str2color_name(x), ''), } LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {