diff --git a/src/epub_converter/css_reader.py b/src/epub_converter/css_reader.py index 313b3a5..59b11af 100644 --- a/src/epub_converter/css_reader.py +++ b/src/epub_converter/css_reader.py @@ -14,11 +14,11 @@ from src.livecarta_config import LiveCartaConfig cssutils.log.setLevel(CRITICAL) -sizes_pr = [-1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56, +sizes_pr = [-100, -1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56, 1.63, 1.69, 1.75, 1.81, 1.88, 1.94, 2.0, 2.06, 2.13, 2.19, 2.25, 2.31, 2.38, 2.44, 2.5, 2.56, 2.63, 2.69, 2.75, 2.81, 2.88, 2.94, 3.0, 4.0, 5.0] -sizes_px = ['10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px', '17px', '18px', '19px', '20px', '21px', +sizes_px = ['0px', '10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px', '17px', '18px', '19px', '20px', '21px', '22px', '23px', '24px', '25px', '26px', '27px', '28px', '29px', '30px', '31px', '32px', '33px', '34px', '35px', '36px', '37px', '38px', '39px', '40px', '41px', '42px', '43px', '44px', '45px', '46px', '47px', '48px', '49px', '50px', '64px', '72px'] @@ -28,61 +28,42 @@ list_types = ['circle', 'disc', 'armenian', 'decimal', 'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none'] -def convert_font_size(value): - """ Function converts font-size in mapping """ - if 'pt' in value: - if int(value.replace('pt', '')) == LiveCartaConfig.LIVECARTA_DEFAULT_FONT_SIZE: - return '' - else: - return value.replace('pt', 'px') +def convert_tag_values(value): + """Function 1. converts values of tags from em/%/pt to px + 2. find closest font-size px + Parameters + ---------- + value: str - if value == '100%': - return '' - try: - if '%' in value: - value = float(value.replace('%', '')) - value = value / 100.0 - elif 'em' in value: - value = float(value.replace('em', '')) - else: - return '' - - if value > 5: - return '' + Returns + ------- + converted value: str + """ + def find_closest_size(value): possible_sizes = list(takewhile(lambda x: value > x, sizes_pr)) last_possible_size_index = sizes_pr.index(possible_sizes[-1]) return sizes_px[last_possible_size_index] - except ValueError: - return '' - - -def convert_indents(value): - """ Function converts text-indent and margin-left values to px """ - # 30px = 3.2% = 1.25em = 23pt - text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(-*\w+pt)') - has_style_attrs = re.search(text_indent_regexp, value) + font_size_regexp = re.compile(r'(^-*(\d*\.*\d+)%$)|(^-*(\d*\.*\d+)em$)|(^-*(\d*\.*\d+)pt$)') + has_style_attrs = re.search(font_size_regexp, value) if has_style_attrs: if has_style_attrs.group(1): - value = value.replace(has_style_attrs.group(1), - str(abs(int("0" + "".join(filter(str.isdigit, str(has_style_attrs.group(1))))) * 6)) + - 'px') - - elif has_style_attrs.group(2): - value = value.replace(has_style_attrs.group(2), - str(abs(int("0" + "".join(filter(str.isdigit, str(has_style_attrs.group(3))))) * 30)) + - 'px') - - elif has_style_attrs.group(4): - value = value.replace(has_style_attrs.group(4), - str(abs(int("0" + "".join(filter(str.isdigit, str(has_style_attrs.group(4))))))) + 'px') + value = float(value.replace('%', '')) / 100.0 + return find_closest_size(value) + elif has_style_attrs.group(3): + value = float(value.replace('em', '')) + return find_closest_size(value) + elif has_style_attrs.group(5): + return value.replace('pt', 'px') + else: + return '' return value -""" -LIVECARTA_STYLE_ATTRS = { css property: value } +""" +Dictionary LIVECARTA_STYLE_ATTRS = { css property: value } Style properties that can be used to fit livecarta css style convention. If property has empty list, it means that any value can be converted. If property has not empty list, it means that only certain property-value combinations can be transformed. @@ -115,7 +96,8 @@ LIVECARTA_STYLE_ATTRS = { 'list-style-type': [], 'list-style-image': [], 'margin-left': [], - 'margin-top': [] + 'margin-top': [], + 'margin': [], } @@ -132,18 +114,18 @@ def get_text_color(x): """ -LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function } +Dictionary LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function } Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated to suit livecarta style convention. """ LIVECARTA_STYLE_ATTRS_MAPPING = { - 'text-indent': convert_indents, + 'text-indent': convert_tag_values, 'font-variant': lambda x: x, 'text-align': lambda x: x, 'font': lambda x: '', 'font-family': lambda x: LiveCartaConfig.font_correspondence_table.get(x) or LiveCartaConfig.font_correspondence_table.get(x.capitalize()), - 'font-size': convert_font_size, + 'font-size': convert_tag_values, 'color': get_text_color, 'background-color': get_bg_color, 'background': get_bg_color, @@ -156,8 +138,9 @@ LIVECARTA_STYLE_ATTRS_MAPPING = { 'border-bottom': lambda x: x if x != '0' else '', 'list-style-type': lambda x: x if x in list_types else 'disc', 'list-style-image': lambda x: 'disc', - 'margin-left': convert_indents, - 'margin-top': convert_indents + 'margin-left': convert_tag_values, + 'margin-top': convert_tag_values, + 'margin': convert_tag_values, } """ @@ -181,10 +164,17 @@ LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = { def check_style_to_be_tag(style) -> List[tuple]: - """ - Some css style properties converts to tags. - Search for them and prepare list of properties to be removed from style string + """Function search style properties that can be converted to tags. + It searches for them and prepare list of properties to be removed from style string + Parameters + ---------- + style: str + + Returns + ------- + properties to remove: list """ + to_remove = [] for k in LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG: if f'{k[0]}:{k[1]}' in style: @@ -267,37 +257,40 @@ class TagStyleConverter: @staticmethod def process_indents_to_px(split_style: dict) -> str: - """ Function cleans using convert_indents() style string and returns new clean_style """ + """Function cleans style string using convert_tag_values() and returns new clean_style""" split_style = [k + ":" + v for k, v in split_style.items()] clean_style = '' for item in split_style: item = item.split(':') - if item[0] in ['text-indent', 'margin-left']: - item[1] = convert_indents(item[1]) + if item[0] in ['text-indent', 'margin-left', 'margin']: + if len(item[1].split(' ')) == 3: + item[1] = convert_tag_values(item[1].split(' ')[-2]) # split returns middle value + else: + item[1] = convert_tag_values(item[1].split(' ')[-1]) # split returns last value clean_style += item[0] + ': ' + item[1] + '; ' margin_left_regexp = re.compile( - r'(margin-left: *(-*\w+);*)') + r'((margin-left|margin): *(-*\w+);*)') text_indent_regexp = re.compile( r'(text-indent: *(-*\w+);*)') - has_margin_left = re.search(margin_left_regexp, clean_style) + has_margin = re.search(margin_left_regexp, clean_style) has_text_indent = re.search(text_indent_regexp, clean_style) - # formula_of_indent: indent = abs(margin_left - text_indent) - if has_margin_left: - num_ml = abs(int("0" + "".join( - filter(str.isdigit, str(has_margin_left.group(2)))))) + # formula_of_indent: indent = abs(margin - text_indent) + if has_margin: + num_m = abs(int("0" + "".join( + filter(str.isdigit, str(has_margin.group(3)))))) if has_text_indent: num_ti = abs(int("0" + "".join( filter(str.isdigit, str(has_text_indent.group(2)))))) clean_style = clean_style.replace(has_text_indent.group(1), 'text-indent: ' + - str(abs(num_ml - num_ti)) + 'px; ') - clean_style = clean_style.replace(has_margin_left.group(1), '') + str(abs(num_m - num_ti)) + 'px; ') + clean_style = clean_style.replace(has_margin.group(1), '') return clean_style - clean_style = clean_style.replace(has_margin_left.group(1), 'text-indent: ' + - str(abs(num_ml)) + 'px; ') + clean_style = clean_style.replace(has_margin.group(1), 'text-indent: ' + + str(abs(num_m)) + 'px; ') return clean_style elif has_text_indent: @@ -309,7 +302,7 @@ class TagStyleConverter: def preprocess_style(self): def remove_extra_spaces(style: str) -> dict: - """ Function to remove extra spaces in style to process clean_style """ + """Function to remove extra spaces in style to process clean_style""" # replace all spaces between '; & letter' to ';' style = re.sub(r"; *", ";", style) split_style: List = style.split(';') @@ -509,7 +502,7 @@ def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str): '@namespace epub "http://www.idpf.org/2007/ops";', '') livecarta_tmp_ids = [] could_have_style_in_livecarta_regexp = re.compile( - '(^div$)|(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|(^h[1-9]$)') + '(^div$)|(^p$)|(^span$)|(^code$)|(^kbd$)|(^var$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|(^h[1-9]$)') tags_with_possible_style_attr = html_soup.find_all( could_have_style_in_livecarta_regexp) for i, x in enumerate(tags_with_possible_style_attr): diff --git a/src/epub_converter/html_epub_preprocessor.py b/src/epub_converter/html_epub_preprocessor.py index c40c1ff..7d71c14 100644 --- a/src/epub_converter/html_epub_preprocessor.py +++ b/src/epub_converter/html_epub_preprocessor.py @@ -275,7 +275,7 @@ def unwrap_structural_tags(body_tag): :return: None """ - def _preserve_class_in_aside_tag(tag_): + def preserve_class_in_aside_tag(tag_): """to save css style inherited from class, copy class to aside tag (which is parent to tag_)""" # this is for Wiley books with boxes tag_class = tag_.attrs['class'] if not isinstance( @@ -561,8 +561,8 @@ def preprocess_pre_tags(chapter_tag): spans = pre.find_all("span") # if in
 there are multiple , we need to add 
after each content to_add_br = len(spans) > 1 - - for child in pre.children: + copy_contents = pre.contents[:] + for child in copy_contents: if isinstance(child, NavigableString): cleaned_text = prepare_formatted(str(child)) sub_strings = re.split('\r\n|\n|\r', cleaned_text) @@ -573,8 +573,8 @@ def preprocess_pre_tags(chapter_tag): else: for sub_child in child.children: if isinstance(sub_child, NavigableString): - cleaned_text2 = prepare_formatted(str(sub_child)) - sub_child.replace_with(NavigableString(cleaned_text2)) + cleaned_text = prepare_formatted(str(sub_child)) + sub_child.replace_with(NavigableString(cleaned_text)) else: sub_child.string = prepare_formatted(sub_child.text) cleaned_tag = child.extract() @@ -594,11 +594,15 @@ def preprocess_pre_tags(chapter_tag): def preprocess_code_tags(chapter_tag): """Function that emulates style of , , """ - for code in chapter_tag.find_all(re.compile("code|kdb|var")): - code.name = 'span' - if code.parent.name == "pre": - continue - code.attrs['style'] = 'color:#c7254e; font-size: 14px; font-family: courier new,courier,monospace;' + for parent_tag in chapter_tag.find_all(re.compile("pre|p")): + for code in parent_tag.find_all(re.compile("code|kbd|var")): + # if code.name == "code": + # parent_tag.name = "pre" + code.name = "span" + if parent_tag.name == "pre": + continue + # if tags aren't in pre + code.attrs['style'] = 'font-size: 14px; font-family: courier new,courier,monospace;' def prepare_title(title_of_chapter: str) -> str: @@ -614,11 +618,11 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro """Function finalise processing/cleaning content Parameters ---------- - title_str : str + title_str: str - content_tag : BeautifulSoup + content_tag: BeautifulSoup - remove_title_from_chapter : bool + remove_title_from_chapter: bool Steps ---------- @@ -629,10 +633,9 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro Returns ------- - str - Prepared content - + prepared content: str """ + # 0. cleaning \n to_remove = [] for child in content_tag.contents: diff --git a/src/livecarta_config.py b/src/livecarta_config.py index 21e7db1..7e57122 100644 --- a/src/livecarta_config.py +++ b/src/livecarta_config.py @@ -22,7 +22,7 @@ class LiveCartaConfig: "Trebuchet MS": "trebuchet ms,helvetica,sans-serif", "Verdana": "verdana,geneva,sans-serif", "monospace": "courier new,courier,monospace", - "sans-serif": "arial,helvetica,sans-serif", + "sans-serif": "arial,helvetica,sans-serif" } COLORS_MAP = {