forked from LiveCarta/BookConverter
Change indents processing
This commit is contained in:
@@ -52,13 +52,13 @@ class EpubBook(BookSolver):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
epub_file_path = f"../../books/epub/9781264269044.epub"
|
epub_file_path = f"../../books/epub/9781284127362.epub"
|
||||||
|
|
||||||
logger_object = BookLogger(name="epub")
|
logger_object = BookLogger(name="epub")
|
||||||
logger_object.configure_book_logger(book_id=epub_file_path.split("/")[-1])
|
logger_object.configure_book_logger(book_id=epub_file_path.split("/")[-1])
|
||||||
|
|
||||||
html_preset_processor = HtmlPresetsProcessor(
|
html_preset_processor = HtmlPresetsProcessor(
|
||||||
logger=logger_object, preset_path="../../preset/epub_presets.json")
|
logger=logger_object, preset_path="../../preset/presets.json")
|
||||||
style_preprocessor = StyleReader()
|
style_preprocessor = StyleReader()
|
||||||
html_processor = HtmlEpubProcessor(logger=logger_object,
|
html_processor = HtmlEpubProcessor(logger=logger_object,
|
||||||
html_preprocessor=html_preset_processor)
|
html_preprocessor=html_preset_processor)
|
||||||
|
|||||||
@@ -48,76 +48,6 @@ class InlineStyleProcessor:
|
|||||||
style_ = style_.replace("color:white;", "")
|
style_ = style_.replace("color:white;", "")
|
||||||
return style_
|
return style_
|
||||||
|
|
||||||
# @staticmethod
|
|
||||||
# def duplicate_styles_check(split_style: list) -> list:
|
|
||||||
# style_name2style_value = {}
|
|
||||||
# # {key: val for for list_item in split_style}
|
|
||||||
# splitstrs = (list_item.split(":") for list_item in split_style)
|
|
||||||
# d = {key: val for key, val in splitstrs}
|
|
||||||
# for list_item in split_style:
|
|
||||||
# key, val = list_item.split(":")
|
|
||||||
# if key not in style_name2style_value.keys():
|
|
||||||
# style_name2style_value[key] = val
|
|
||||||
# split_style = [k + ":" + v for k, v in style_name2style_value.items()]
|
|
||||||
# return split_style
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def indents_processing(split_style: List[str]) -> str:
|
|
||||||
"""
|
|
||||||
Function process indents from left using
|
|
||||||
formula_of_indent: indent = closest_number(abs(margin - text_indent))
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
split_style: List[str]
|
|
||||||
list of styles split by ";"
|
|
||||||
|
|
||||||
Returns
|
|
||||||
----------
|
|
||||||
processed_style:str
|
|
||||||
processed style with counted indent
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def closest_number(value: int, m: int = 30) -> int:
|
|
||||||
"""
|
|
||||||
Function to find the number closest
|
|
||||||
to value and divisible by m
|
|
||||||
"""
|
|
||||||
# Find the quotient
|
|
||||||
q = round(value / m)
|
|
||||||
return m * q
|
|
||||||
|
|
||||||
processed_style = ";".join(split_style) + ';'
|
|
||||||
|
|
||||||
margin_left_regexp = re.compile(
|
|
||||||
r"(margin(-left)?:\s*-?(\d+(\.\d+)?)(\w*)\s*;)")
|
|
||||||
text_indent_regexp = re.compile(
|
|
||||||
r"(text-indent:\s*-?(\d+(\.\d+)?)(\w*)\s*;)")
|
|
||||||
|
|
||||||
has_margin = margin_left_regexp.search(processed_style)
|
|
||||||
has_text_indent = text_indent_regexp.search(processed_style)
|
|
||||||
|
|
||||||
if has_margin:
|
|
||||||
num_m = abs(float(has_margin.group(3)))
|
|
||||||
|
|
||||||
if has_text_indent:
|
|
||||||
num_ti = abs(float(has_text_indent.group(2)))
|
|
||||||
indent_value = str(closest_number(abs(num_m - num_ti)))
|
|
||||||
processed_style = processed_style.replace(
|
|
||||||
has_text_indent.group(0), f"text-indent: {indent_value}px;")
|
|
||||||
else:
|
|
||||||
indent_value = str(closest_number(abs(num_m)))
|
|
||||||
processed_style += f"text-indent: {indent_value}px;"
|
|
||||||
|
|
||||||
processed_style = margin_left_regexp.sub("", processed_style)
|
|
||||||
|
|
||||||
elif has_text_indent:
|
|
||||||
num_ti = abs(float(has_text_indent.group(2)))
|
|
||||||
indent_value = str(closest_number(num_ti))
|
|
||||||
processed_style = text_indent_regexp.sub(f"text-indent: {indent_value}px;", processed_style)
|
|
||||||
|
|
||||||
return processed_style.strip(";")
|
|
||||||
|
|
||||||
def process_inline_style(self) -> str:
|
def process_inline_style(self) -> str:
|
||||||
"""
|
"""
|
||||||
Function processes final(css+initial inline) inline style
|
Function processes final(css+initial inline) inline style
|
||||||
@@ -126,7 +56,6 @@ class InlineStyleProcessor:
|
|||||||
1. Remove white color if tag doesn't have background color in style
|
1. Remove white color if tag doesn't have background color in style
|
||||||
2. Create list of styles from inline style
|
2. Create list of styles from inline style
|
||||||
3. Duplicate styles check - if the tag had duplicate styles
|
3. Duplicate styles check - if the tag had duplicate styles
|
||||||
4. Processing indents
|
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@@ -146,10 +75,6 @@ class InlineStyleProcessor:
|
|||||||
style = re.sub(r"; *", ";", inline_style)
|
style = re.sub(r"; *", ";", inline_style)
|
||||||
# when we split style by ";", last element of the list is "" - None (remove it)
|
# when we split style by ";", last element of the list is "" - None (remove it)
|
||||||
split_inline_style: list = list(filter(None, style.split(";")))
|
split_inline_style: list = list(filter(None, style.split(";")))
|
||||||
# 3. Duplicate styles check - if the tag had duplicate styles
|
|
||||||
# split_inline_style = self.duplicate_styles_check(split_inline_style)
|
|
||||||
# 4. Processing indents
|
|
||||||
inline_style: str = self.indents_processing(split_inline_style)
|
|
||||||
return inline_style
|
return inline_style
|
||||||
else:
|
else:
|
||||||
return ""
|
return ""
|
||||||
|
|||||||
Reference in New Issue
Block a user