forked from LiveCarta/BookConverter
Clear font-family value of regex characters
This commit is contained in:
@@ -37,6 +37,7 @@ class CSSPreprocessor:
|
|||||||
"margin-left": self.convert_indents_tag_values,
|
"margin-left": self.convert_indents_tag_values,
|
||||||
"margin-top": self.convert_tag_style_values,
|
"margin-top": self.convert_tag_style_values,
|
||||||
"margin": self.convert_indents_tag_values,
|
"margin": self.convert_indents_tag_values,
|
||||||
|
"width": self.convert_tag_style_values,
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -104,16 +105,23 @@ class CSSPreprocessor:
|
|||||||
return size_value
|
return size_value
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def style_conditions(style_value, style_name):
|
def clean_value(style_value: str, style_name: str):
|
||||||
cleaned_value = style_value.replace("\"", "")
|
cleaned_value = style_value.replace("\"", "")
|
||||||
# cleaned_value = style_value.replace("+", "%2B")
|
if style_name == 'font-family':
|
||||||
|
for symbol in ["+", "*", ".", "%", "?", "$", "^", "[", "]"]:
|
||||||
|
cleaned_value = re.sub(
|
||||||
|
re.escape(f"{symbol}"), rf"\\{symbol}", cleaned_value)
|
||||||
|
return cleaned_value
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def style_conditions(style_value: str, style_name: str) -> tuple[bool, bool]:
|
||||||
constraints_on_value = LiveCartaConfig.LIVECARTA_STYLE_ATTRS.get(
|
constraints_on_value = LiveCartaConfig.LIVECARTA_STYLE_ATTRS.get(
|
||||||
style_name)
|
style_name)
|
||||||
value_not_in_possible_values_list = cleaned_value not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS[
|
value_not_in_possible_values_list = style_value not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS[
|
||||||
style_name]
|
style_name]
|
||||||
return cleaned_value, constraints_on_value, value_not_in_possible_values_list
|
return constraints_on_value, value_not_in_possible_values_list
|
||||||
|
|
||||||
def update_inline_styles_to_livecarta_convention(self, split_style: list):
|
def update_inline_styles_to_livecarta_convention(self, split_style: list) -> list:
|
||||||
for i, style in enumerate(split_style):
|
for i, style in enumerate(split_style):
|
||||||
style_name, style_value = style.split(":")
|
style_name, style_value = style.split(":")
|
||||||
if style_name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS:
|
if style_name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS:
|
||||||
@@ -121,9 +129,8 @@ class CSSPreprocessor:
|
|||||||
split_style[i] = ""
|
split_style[i] = ""
|
||||||
return split_style
|
return split_style
|
||||||
|
|
||||||
cleaned_value, constraints_on_value, value_not_in_possible_values_list =\
|
cleaned_value = self.clean_value(style_value, style_name)
|
||||||
self.style_conditions(style_value, style_name)
|
if all(self.style_conditions(cleaned_value, style_name)):
|
||||||
if constraints_on_value and value_not_in_possible_values_list:
|
|
||||||
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file
|
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file
|
||||||
split_style[i] = ""
|
split_style[i] = ""
|
||||||
else:
|
else:
|
||||||
@@ -138,18 +145,18 @@ class CSSPreprocessor:
|
|||||||
"""Build inline style with LiveCarta convention"""
|
"""Build inline style with LiveCarta convention"""
|
||||||
# replace all spaces between "; & letter" to ";"
|
# replace all spaces between "; & letter" to ";"
|
||||||
style = re.sub(r"; *", ";", style)
|
style = re.sub(r"; *", ";", style)
|
||||||
# when we split style by ";", last element of the list is "" - None
|
# when we split style by ";", last element of the list is "" - None (we remove it)
|
||||||
# remove it
|
|
||||||
split_style: list = list(filter(None, style.split(";")))
|
split_style: list = list(filter(None, style.split(";")))
|
||||||
# replace all spaces between ": & letter" to ":"
|
# replace all spaces between ": & letter" to ":"
|
||||||
split_style = [el.replace(
|
split_style = [el.replace(
|
||||||
re.search(r"(:\s*)", el).group(1), ":") for el in split_style]
|
re.search(r"(:\s*)", el).group(1), ":") for el in split_style]
|
||||||
|
|
||||||
split_style = self.update_inline_styles_to_livecarta_convention(split_style)
|
split_style = self.update_inline_styles_to_livecarta_convention(
|
||||||
|
split_style)
|
||||||
style = "; ".join(split_style)
|
style = "; ".join(split_style)
|
||||||
return style
|
return style
|
||||||
|
|
||||||
def process_inline_styles_in_html_soup(self, html_href2html_body_soup):
|
def process_inline_styles_in_html_soup(self, html_href2html_body_soup: dict):
|
||||||
"""This function is designed to convert inline html styles"""
|
"""This function is designed to convert inline html styles"""
|
||||||
for html_href in html_href2html_body_soup:
|
for html_href in html_href2html_body_soup:
|
||||||
html_content: BeautifulSoup = html_href2html_body_soup[html_href]
|
html_content: BeautifulSoup = html_href2html_body_soup[html_href]
|
||||||
@@ -185,16 +192,14 @@ class CSSPreprocessor:
|
|||||||
css_rule.style[style_type.name] = ""
|
css_rule.style[style_type.name] = ""
|
||||||
return
|
return
|
||||||
|
|
||||||
cleaned_value, constraints_on_value, value_not_in_possible_values_list =\
|
cleaned_value = self.clean_value(style_type.value, style_type.name)
|
||||||
self.style_conditions(style_type.value, style_type.name)
|
if all(self.style_conditions(cleaned_value, style_type.name)):
|
||||||
if constraints_on_value and value_not_in_possible_values_list:
|
|
||||||
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file
|
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file
|
||||||
css_rule.style[style_type.name] = ""
|
css_rule.style[style_type.name] = ""
|
||||||
else:
|
else:
|
||||||
if style_type.name in self.LIVECARTA_STYLE_ATTRS_MAPPING:
|
if style_type.name in self.LIVECARTA_STYLE_ATTRS_MAPPING:
|
||||||
# function that converts our data
|
# function that converts our data
|
||||||
func = self.LIVECARTA_STYLE_ATTRS_MAPPING[style_type.name]
|
func = self.LIVECARTA_STYLE_ATTRS_MAPPING[style_type.name]
|
||||||
print(cleaned_value)
|
|
||||||
css_rule.style[style_type.name] = func(cleaned_value)
|
css_rule.style[style_type.name] = func(cleaned_value)
|
||||||
|
|
||||||
def build_css_file_content(self, css_content: str) -> str:
|
def build_css_file_content(self, css_content: str) -> str:
|
||||||
|
|||||||
@@ -305,7 +305,7 @@ class EpubConverter:
|
|||||||
self.adjacency_list[-1].append(nav_point)
|
self.adjacency_list[-1].append(nav_point)
|
||||||
self.hrefs_added_to_toc.add(nav_point.href)
|
self.hrefs_added_to_toc.add(nav_point.href)
|
||||||
|
|
||||||
def add_not_added_files_to_adjacency_list(self, not_added):
|
def add_not_added_files_to_adjacency_list(self, not_added: list):
|
||||||
"""Function add files that not added to adjacency list"""
|
"""Function add files that not added to adjacency list"""
|
||||||
for i, file in enumerate(not_added):
|
for i, file in enumerate(not_added):
|
||||||
nav_point = NavPoint(
|
nav_point = NavPoint(
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ class HtmlEpubPreprocessor:
|
|||||||
return title
|
return title
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _remove_comments(chapter_tag):
|
def _remove_comments(chapter_tag: BeautifulSoup):
|
||||||
"""
|
"""
|
||||||
Function remove comments
|
Function remove comments
|
||||||
Parameters
|
Parameters
|
||||||
@@ -85,7 +85,7 @@ class HtmlEpubPreprocessor:
|
|||||||
element.extract()
|
element.extract()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _wrap_strings_with_p(chapter_tag):
|
def _wrap_strings_with_p(chapter_tag: BeautifulSoup):
|
||||||
"""
|
"""
|
||||||
Function converts headings that aren't supported by LiveCarta with <p>
|
Function converts headings that aren't supported by LiveCarta with <p>
|
||||||
Parameters
|
Parameters
|
||||||
@@ -108,7 +108,7 @@ class HtmlEpubPreprocessor:
|
|||||||
p_tag.append(str(node))
|
p_tag.append(str(node))
|
||||||
node.replace_with(p_tag)
|
node.replace_with(p_tag)
|
||||||
|
|
||||||
def _wrap_tags_with_table(self, chapter_tag, rules: list):
|
def _wrap_tags_with_table(self, chapter_tag: BeautifulSoup, rules: list):
|
||||||
"""
|
"""
|
||||||
Function wraps <tag> with <table>
|
Function wraps <tag> with <table>
|
||||||
Parameters
|
Parameters
|
||||||
@@ -153,7 +153,7 @@ class HtmlEpubPreprocessor:
|
|||||||
process_tag_using_table()
|
process_tag_using_table()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _tags_to_correspond_livecarta_tag(chapter_tag, rules: list):
|
def _tags_to_correspond_livecarta_tag(chapter_tag: BeautifulSoup, rules: list):
|
||||||
"""
|
"""
|
||||||
Function to replace all tags to correspond LiveCarta tags
|
Function to replace all tags to correspond LiveCarta tags
|
||||||
Parameters
|
Parameters
|
||||||
@@ -190,7 +190,7 @@ class HtmlEpubPreprocessor:
|
|||||||
# todo can cause appearance of \n <p><p>...</p></p> -> <p>\n</p> <p>...</p> <p>\n</p> (section)
|
# todo can cause appearance of \n <p><p>...</p></p> -> <p>\n</p> <p>...</p> <p>\n</p> (section)
|
||||||
tag.name = tag_to_replace
|
tag.name = tag_to_replace
|
||||||
|
|
||||||
def _unwrap_tags(self, chapter_tag, rules: dict):
|
def _unwrap_tags(self, chapter_tag: BeautifulSoup, rules: dict):
|
||||||
"""
|
"""
|
||||||
Function unwrap tags and moves id to span
|
Function unwrap tags and moves id to span
|
||||||
Parameters
|
Parameters
|
||||||
@@ -213,7 +213,7 @@ class HtmlEpubPreprocessor:
|
|||||||
tag.unwrap()
|
tag.unwrap()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _insert_tags_into_correspond_tags(chapter_tag, rules: list):
|
def _insert_tags_into_correspond_tags(chapter_tag: BeautifulSoup, rules: list):
|
||||||
"""
|
"""
|
||||||
Function inserts tags into correspond tags
|
Function inserts tags into correspond tags
|
||||||
Parameters
|
Parameters
|
||||||
@@ -257,14 +257,14 @@ class HtmlEpubPreprocessor:
|
|||||||
for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
|
for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
|
||||||
insert(tag)
|
insert(tag)
|
||||||
|
|
||||||
def _remove_headings_content(self, content_tag, title_of_chapter: str):
|
def _remove_headings_content(self, chapter_tag, title_of_chapter: str):
|
||||||
"""
|
"""
|
||||||
Function
|
Function
|
||||||
- cleans/removes headings from chapter in order to avoid duplication of chapter titles in the content
|
- cleans/removes headings from chapter in order to avoid duplication of chapter titles in the content
|
||||||
- adds span with id in order to
|
- adds span with id in order to
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
content_tag: soup object
|
chapter_tag: soup object
|
||||||
Tag of the page
|
Tag of the page
|
||||||
title_of_chapter: str
|
title_of_chapter: str
|
||||||
Chapter title
|
Chapter title
|
||||||
@@ -276,15 +276,15 @@ class HtmlEpubPreprocessor:
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
title_of_chapter = title_of_chapter.lower()
|
title_of_chapter = title_of_chapter.lower()
|
||||||
for tag in content_tag.contents:
|
for tag in chapter_tag.contents:
|
||||||
text = tag if isinstance(tag, NavigableString) else tag.text
|
text = tag if isinstance(tag, NavigableString) else tag.text
|
||||||
if re.sub(r"[\s\xa0]", "", text):
|
if re.sub(r"[\s\xa0]", "", text):
|
||||||
text = re.sub(r"[\s\xa0]", " ", text).lower()
|
text = re.sub(r"[\s\xa0]", " ", text).lower()
|
||||||
text = text.strip() # delete extra spaces
|
text = text.strip() # delete extra spaces
|
||||||
if title_of_chapter == text or \
|
if title_of_chapter == text or \
|
||||||
(title_of_chapter in text and
|
(title_of_chapter in text and
|
||||||
re.findall(r"^h[1-3]$", tag.name or content_tag.name)):
|
re.findall(r"^h[1-3]$", tag.name or chapter_tag.name)):
|
||||||
self._add_span_to_save_ids_for_links(tag, content_tag)
|
self._add_span_to_save_ids_for_links(tag, chapter_tag)
|
||||||
tag.extract()
|
tag.extract()
|
||||||
return
|
return
|
||||||
elif not isinstance(tag, NavigableString):
|
elif not isinstance(tag, NavigableString):
|
||||||
@@ -329,7 +329,7 @@ class HtmlEpubPreprocessor:
|
|||||||
table.attrs["border"] = "1"
|
table.attrs["border"] = "1"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _class_removing(chapter_tag):
|
def _class_removing(chapter_tag: BeautifulSoup):
|
||||||
"""
|
"""
|
||||||
Function removes classes that aren't created by converter
|
Function removes classes that aren't created by converter
|
||||||
Parameters
|
Parameters
|
||||||
|
|||||||
Reference in New Issue
Block a user