Change indents processing

This commit is contained in:
Kibzik
2023-04-03 15:57:11 +03:00
parent 56920523af
commit 05323ddd45
2 changed files with 2 additions and 77 deletions

View File

@@ -52,13 +52,13 @@ class EpubBook(BookSolver):
if __name__ == "__main__": if __name__ == "__main__":
epub_file_path = f"../../books/epub/9781264269044.epub" epub_file_path = f"../../books/epub/9781284127362.epub"
logger_object = BookLogger(name="epub") logger_object = BookLogger(name="epub")
logger_object.configure_book_logger(book_id=epub_file_path.split("/")[-1]) logger_object.configure_book_logger(book_id=epub_file_path.split("/")[-1])
html_preset_processor = HtmlPresetsProcessor( html_preset_processor = HtmlPresetsProcessor(
logger=logger_object, preset_path="../../preset/epub_presets.json") logger=logger_object, preset_path="../../preset/presets.json")
style_preprocessor = StyleReader() style_preprocessor = StyleReader()
html_processor = HtmlEpubProcessor(logger=logger_object, html_processor = HtmlEpubProcessor(logger=logger_object,
html_preprocessor=html_preset_processor) html_preprocessor=html_preset_processor)

View File

@@ -48,76 +48,6 @@ class InlineStyleProcessor:
style_ = style_.replace("color:white;", "") style_ = style_.replace("color:white;", "")
return style_ return style_
# @staticmethod
# def duplicate_styles_check(split_style: list) -> list:
# style_name2style_value = {}
# # {key: val for for list_item in split_style}
# splitstrs = (list_item.split(":") for list_item in split_style)
# d = {key: val for key, val in splitstrs}
# for list_item in split_style:
# key, val = list_item.split(":")
# if key not in style_name2style_value.keys():
# style_name2style_value[key] = val
# split_style = [k + ":" + v for k, v in style_name2style_value.items()]
# return split_style
@staticmethod
def indents_processing(split_style: List[str]) -> str:
"""
Function process indents from left using
formula_of_indent: indent = closest_number(abs(margin - text_indent))
Parameters
----------
split_style: List[str]
list of styles split by ";"
Returns
----------
processed_style:str
processed style with counted indent
"""
def closest_number(value: int, m: int = 30) -> int:
"""
Function to find the number closest
to value and divisible by m
"""
# Find the quotient
q = round(value / m)
return m * q
processed_style = ";".join(split_style) + ';'
margin_left_regexp = re.compile(
r"(margin(-left)?:\s*-?(\d+(\.\d+)?)(\w*)\s*;)")
text_indent_regexp = re.compile(
r"(text-indent:\s*-?(\d+(\.\d+)?)(\w*)\s*;)")
has_margin = margin_left_regexp.search(processed_style)
has_text_indent = text_indent_regexp.search(processed_style)
if has_margin:
num_m = abs(float(has_margin.group(3)))
if has_text_indent:
num_ti = abs(float(has_text_indent.group(2)))
indent_value = str(closest_number(abs(num_m - num_ti)))
processed_style = processed_style.replace(
has_text_indent.group(0), f"text-indent: {indent_value}px;")
else:
indent_value = str(closest_number(abs(num_m)))
processed_style += f"text-indent: {indent_value}px;"
processed_style = margin_left_regexp.sub("", processed_style)
elif has_text_indent:
num_ti = abs(float(has_text_indent.group(2)))
indent_value = str(closest_number(num_ti))
processed_style = text_indent_regexp.sub(f"text-indent: {indent_value}px;", processed_style)
return processed_style.strip(";")
def process_inline_style(self) -> str: def process_inline_style(self) -> str:
""" """
Function processes final(css+initial inline) inline style Function processes final(css+initial inline) inline style
@@ -126,7 +56,6 @@ class InlineStyleProcessor:
1. Remove white color if tag doesn't have background color in style 1. Remove white color if tag doesn't have background color in style
2. Create list of styles from inline style 2. Create list of styles from inline style
3. Duplicate styles check - if the tag had duplicate styles 3. Duplicate styles check - if the tag had duplicate styles
4. Processing indents
Returns Returns
------- -------
@@ -146,10 +75,6 @@ class InlineStyleProcessor:
style = re.sub(r"; *", ";", inline_style) style = re.sub(r"; *", ";", inline_style)
# when we split style by ";", last element of the list is "" - None (remove it) # when we split style by ";", last element of the list is "" - None (remove it)
split_inline_style: list = list(filter(None, style.split(";"))) split_inline_style: list = list(filter(None, style.split(";")))
# 3. Duplicate styles check - if the tag had duplicate styles
# split_inline_style = self.duplicate_styles_check(split_inline_style)
# 4. Processing indents
inline_style: str = self.indents_processing(split_inline_style)
return inline_style return inline_style
else: else:
return "" return ""