Change indents processing

This commit is contained in:
Kibzik
2023-04-03 15:57:11 +03:00
parent 56920523af
commit 05323ddd45
2 changed files with 2 additions and 77 deletions

View File

@@ -52,13 +52,13 @@ class EpubBook(BookSolver):
if __name__ == "__main__":
epub_file_path = f"../../books/epub/9781264269044.epub"
epub_file_path = f"../../books/epub/9781284127362.epub"
logger_object = BookLogger(name="epub")
logger_object.configure_book_logger(book_id=epub_file_path.split("/")[-1])
html_preset_processor = HtmlPresetsProcessor(
logger=logger_object, preset_path="../../preset/epub_presets.json")
logger=logger_object, preset_path="../../preset/presets.json")
style_preprocessor = StyleReader()
html_processor = HtmlEpubProcessor(logger=logger_object,
html_preprocessor=html_preset_processor)

View File

@@ -48,76 +48,6 @@ class InlineStyleProcessor:
style_ = style_.replace("color:white;", "")
return style_
# @staticmethod
# def duplicate_styles_check(split_style: list) -> list:
# style_name2style_value = {}
# # {key: val for for list_item in split_style}
# splitstrs = (list_item.split(":") for list_item in split_style)
# d = {key: val for key, val in splitstrs}
# for list_item in split_style:
# key, val = list_item.split(":")
# if key not in style_name2style_value.keys():
# style_name2style_value[key] = val
# split_style = [k + ":" + v for k, v in style_name2style_value.items()]
# return split_style
@staticmethod
def indents_processing(split_style: List[str]) -> str:
"""
Function process indents from left using
formula_of_indent: indent = closest_number(abs(margin - text_indent))
Parameters
----------
split_style: List[str]
list of styles split by ";"
Returns
----------
processed_style:str
processed style with counted indent
"""
def closest_number(value: int, m: int = 30) -> int:
"""
Function to find the number closest
to value and divisible by m
"""
# Find the quotient
q = round(value / m)
return m * q
processed_style = ";".join(split_style) + ';'
margin_left_regexp = re.compile(
r"(margin(-left)?:\s*-?(\d+(\.\d+)?)(\w*)\s*;)")
text_indent_regexp = re.compile(
r"(text-indent:\s*-?(\d+(\.\d+)?)(\w*)\s*;)")
has_margin = margin_left_regexp.search(processed_style)
has_text_indent = text_indent_regexp.search(processed_style)
if has_margin:
num_m = abs(float(has_margin.group(3)))
if has_text_indent:
num_ti = abs(float(has_text_indent.group(2)))
indent_value = str(closest_number(abs(num_m - num_ti)))
processed_style = processed_style.replace(
has_text_indent.group(0), f"text-indent: {indent_value}px;")
else:
indent_value = str(closest_number(abs(num_m)))
processed_style += f"text-indent: {indent_value}px;"
processed_style = margin_left_regexp.sub("", processed_style)
elif has_text_indent:
num_ti = abs(float(has_text_indent.group(2)))
indent_value = str(closest_number(num_ti))
processed_style = text_indent_regexp.sub(f"text-indent: {indent_value}px;", processed_style)
return processed_style.strip(";")
def process_inline_style(self) -> str:
"""
Function processes final(css+initial inline) inline style
@@ -126,7 +56,6 @@ class InlineStyleProcessor:
1. Remove white color if tag doesn't have background color in style
2. Create list of styles from inline style
3. Duplicate styles check - if the tag had duplicate styles
4. Processing indents
Returns
-------
@@ -146,10 +75,6 @@ class InlineStyleProcessor:
style = re.sub(r"; *", ";", inline_style)
# when we split style by ";", last element of the list is "" - None (remove it)
split_inline_style: list = list(filter(None, style.split(";")))
# 3. Duplicate styles check - if the tag had duplicate styles
# split_inline_style = self.duplicate_styles_check(split_inline_style)
# 4. Processing indents
inline_style: str = self.indents_processing(split_inline_style)
return inline_style
else:
return ""