From 37e47d0aa7fc7d3072d22008f2223997bbcc9819 Mon Sep 17 00:00:00 2001 From: shirshasa Date: Fri, 23 Apr 2021 14:20:21 +0300 Subject: [PATCH] epub converter: add color conversion, update adding style tags --- src/util/color_reader.py | 87 ++++++++++++++++++++++++++++++++++++++++ src/util/css_reader.py | 36 ++++++++++------- 2 files changed, 108 insertions(+), 15 deletions(-) create mode 100644 src/util/color_reader.py diff --git a/src/util/color_reader.py b/src/util/color_reader.py new file mode 100644 index 0000000..740e170 --- /dev/null +++ b/src/util/color_reader.py @@ -0,0 +1,87 @@ +from webcolors import html4_hex_to_names, hex_to_rgb, rgb_to_name + + +def closest_colour_rgb(requested_colour): + min_colours = {} + for key, name in html4_hex_to_names.items(): + r_c, g_c, b_c = hex_to_rgb(key) + rd = (r_c - requested_colour[0]) ** 2 + gd = (g_c - requested_colour[1]) ** 2 + bd = (b_c - requested_colour[2]) ** 2 + min_colours[(rd + gd + bd)] = name + + return min_colours[min(min_colours.keys())] + + +def get_rgb_colour_name(c): + try: + closest_name = actual_name = rgb_to_name(c, 'html4') + except ValueError: + closest_name = closest_colour_rgb(c) + actual_name = None + if actual_name: + return actual_name + else: + return closest_name + + +def get_hex_colour_name(c): + try: + c = hex_to_rgb(c) + except ValueError: + return '' + + try: + closest_name = actual_name = rgb_to_name(c, 'html4') + except ValueError: + closest_name = closest_colour_rgb(c) + actual_name = None + if actual_name: + return actual_name + else: + return closest_name + + +def str2color_name(s: str): + if 'rgb' in s: + s = s.replace('rgb', '').replace('(', '').replace(')', '') + try: + rgb = [int(x) for x in s.split(',')] + rgb = tuple(rgb) + except ValueError: + return '' + if len(rgb) != 3: + return '' + name = get_rgb_colour_name(rgb) + return name + + elif '#' in s: + name = get_hex_colour_name(s) + return name + + else: + return '' + + +if __name__ == '__main__': + + str2color_name('rgb(139, 0, 0)') + + colors = [ + (75, 0, 130), (255, 0, 255), + (139, 69, 19), (46, 139, 87), + (221, 160, 221) + ] + + hex_colors = [ + '#96F', '#000', '#4C4C4C', '#A00', '#99F' + ] + + for c in colors: + name = get_rgb_colour_name(c) + print("Actual colour:", c, ", closest colour name:", name) + + for c in hex_colors: + name = get_hex_colour_name(c) + print("Actual colour:", c, ", closest colour name:", name) + print() diff --git a/src/util/css_reader.py b/src/util/css_reader.py index d0b897d..1e1781e 100644 --- a/src/util/css_reader.py +++ b/src/util/css_reader.py @@ -8,6 +8,7 @@ from itertools import takewhile from logging import CRITICAL from src.config import LawCartaConfig +from src.util.color_reader import str2color_name cssutils.log.setLevel(CRITICAL) @@ -75,6 +76,8 @@ LIVECARTA_STYLE_ATTRS_MAPPING = { 'font': lambda x: '', 'font-family': lambda x: LawCartaConfig.font_correspondence_table.get(x.capitalize()), 'font-size': convert_font_size, + 'color': str2color_name, + 'background-color': str2color_name, } LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = { @@ -150,40 +153,43 @@ def add_inline_style_to_html_soup(soup1, css_text): for i in livecarta_p_ids: tag = soup1.find(attrs={'livecarta_id': i}) + tag_initial_name = tag.name tag_with_style = soup2.find(attrs={'livecarta_id': i}) if tag_with_style.attrs.get('style'): style = tag_with_style.attrs.get('style') + ';' to_remove = check_style_to_be_tag(style) + new_tags = [] for i, (p, v) in enumerate(to_remove): s = f'{p}:{v};' style = style.replace(s, '') if i == 0: tag.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)] - tmp_attrs = tag.attrs.copy() - tag.attrs = {} - - new_tag = BeautifulSoup(features='lxml').new_tag('span') - new_tag.attrs = tmp_attrs - tag.wrap(new_tag) - print(new_tag) + new_tags.append(tag) else: name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)] new_tag = BeautifulSoup(features='lxml').new_tag(name) - tag.wrap(new_tag) + new_tags[-1].wrap(new_tag) + new_tags.append(new_tag) if to_remove: - new_tag = BeautifulSoup(features='lxml').new_tag('span') - new_tag.attrs['style'] = style - tag.wrap(new_tag) - print(tag) - print(list(tag.parent)) - print() - print('---') + style = style.strip() + tmp_attrs = tag.attrs.copy() + tag.attrs = {} + + span_tag = BeautifulSoup(features='lxml').new_tag(tag_initial_name) + span_tag.attrs = tmp_attrs + if style: + span_tag.attrs['style'] = style + del span_tag.attrs['livecarta_id'] + + new_tags[-1].wrap(span_tag) else: tag.attrs['style'] = style del tag.attrs['livecarta_id'] + else: + del tag.attrs['livecarta_id'] return soup1