Formatting

This commit is contained in:
Kiryl
2022-06-01 16:23:53 +03:00
parent 5039417a0f
commit c0ef0b6d6e
13 changed files with 318 additions and 185 deletions

View File

@@ -201,4 +201,4 @@ class Access:
pass
else:
raise Exception(
f'{response.status_code} Bad request: {response.json()["message"]}.')
f'{response.status_code} Bad request: {response.json()["message"]}.')

View File

@@ -29,12 +29,13 @@ class BookSolver:
self.logger_object = BookLogger(name=f'{__name__}_{self.book_id}',
book_id=book_id,
main_logger=main_logger)
self.status_wrapper = BookStatusWrapper(access, self.logger_object, book_id)
self.status_wrapper = BookStatusWrapper(
access, self.logger_object, book_id)
assert LiveCartaConfig.SUPPORTED_LEVELS == len(LiveCartaConfig.SUPPORTED_HEADERS), \
"Length of headers doesn't match allowed levels."
def save_book_file(self, content):
def save_book_file(self, content: str):
"""
Function saves binary content of file to .docx/.epub
Parameters
@@ -43,17 +44,21 @@ class BookSolver:
binary content of the file
"""
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
folder_path = os.path.join(folder_path, f'{self.book_type}/{self.book_id}')
folder_path = os.path.dirname(
os.path.dirname(os.path.abspath(__file__)))
folder_path = os.path.join(
folder_path, f'{self.book_type}/{self.book_id}')
pathlib.Path(folder_path).mkdir(parents=True, exist_ok=True)
file_path = os.path.join(folder_path, f'{self.book_id}.{self.book_type}')
file_path = os.path.join(
folder_path, f'{self.book_id}.{self.book_type}')
try:
with open(file_path, 'wb+') as file:
file.write(content)
self.logger_object.log(f'File was saved to folder: {folder_path}.')
except Exception as exc:
self.logger_object.log(f"Error in writing {self.book_type} file.", logging.ERROR)
self.logger_object.log(
f"Error in writing {self.book_type} file.", logging.ERROR)
self.logger_object.log_error_to_main_log()
raise exc
@@ -62,12 +67,14 @@ class BookSolver:
def get_book_file(self):
"""Method for getting and saving book from server"""
try:
self.logger_object.log(f'Start receiving file from server. URL: {self.access.url}/doc-convert/{self.book_id}/file')
self.logger_object.log(f'Start receiving file from server. URL:'
f' {self.access.url}/doc-convert/{self.book_id}/file')
content = self.access.get_book(self.book_id)
self.logger_object.log('File was received from server.')
self.save_book_file(content)
except FileNotFoundError as f_err:
self.logger_object.log("Can't get file from server.", logging.ERROR)
self.logger_object.log(
"Can't get file from server.", logging.ERROR)
self.logger_object.log_error_to_main_log()
raise f_err
except Exception as exc:
@@ -75,14 +82,17 @@ class BookSolver:
def check_output_directory(self):
if self.output_path is None:
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
output_path = os.path.join(folder_path, f'json/{self.book_id}.json')
folder_path = os.path.dirname(
os.path.dirname(os.path.abspath(__file__)))
output_path = os.path.join(
folder_path, f'json/{self.book_id}.json')
self.output_path = output_path
self.output_path = pathlib.Path(self.output_path)
self.logger_object.log(f'Output file path: {self.output_path}')
pathlib.Path(self.output_path).parent.mkdir(parents=True, exist_ok=True)
pathlib.Path(self.output_path).parent.mkdir(
parents=True, exist_ok=True)
self.output_path.touch(exist_ok=True)
def write_to_json(self, content: dict):
@@ -90,9 +100,11 @@ class BookSolver:
try:
with codecs.open(self.output_path, 'w', encoding='utf-8') as f:
json.dump(content, f, ensure_ascii=False)
self.logger_object.log(f'Data has been saved to .json file: {self.output_path}')
self.logger_object.log(
f'Data has been saved to .json file: {self.output_path}')
except Exception as exc:
self.logger_object.log('Error has occurred while writing json file.' + str(exc), logging.ERROR)
self.logger_object.log(
'Error has occurred while writing .json file.' + str(exc), logging.ERROR)
def send_json_content_to_server(self, content: dict):
"""Function sends json_content to site"""
@@ -100,14 +112,15 @@ class BookSolver:
self.access.send_book(self.book_id, content)
self.logger_object.log(f'JSON data has been sent to server.')
except Exception as exc:
self.logger_object.log('Error has occurred while sending json content.', logging.ERROR)
self.logger_object.log(
'Error has occurred while sending json content.', logging.ERROR)
self.logger_object.log_error_to_main_log()
self.status_wrapper.set_error()
raise exc
@abstractmethod
def get_converted_book(self):
self.logger_object.log('Beginning of processing json output.')
self.logger_object.log('Beginning of processing .json output.')
self.status_wrapper.set_generating()
return {}
@@ -119,21 +132,24 @@ class BookSolver:
"""
try:
self.logger_object.log(f'Beginning of conversion from .{self.book_type} to .json.')
self.logger_object.log(
f'Beginning of conversion from .{self.book_type} to .json.')
self.get_book_file()
self.status_wrapper.set_processing()
content_dict = self.get_converted_book()
self.status_wrapper.set_generating()
self.write_to_json(content_dict)
self.send_json_content_to_server(content_dict)
self.logger_object.log(f'End of the conversion to LiveCarta format. Check {self.output_path}.')
self.logger_object.log(
f'End of the conversion to LiveCarta format. Check {self.output_path}.')
except Exception as exc:
self.status_wrapper.set_error()
self.logger_object.log('Error has occurred while conversion.', logging.ERROR)
self.logger_object.log(
'Error has occurred while conversion.', logging.ERROR)
self.logger_object.log_error_to_main_log(str(exc))
raise exc
def conversion_local(self, file_name: str):
def conversion_local(self, file_path: str):
"""
Function
- without downloading book from server (local)
@@ -141,13 +157,16 @@ class BookSolver:
"""
try:
self.logger_object.log(f'Data has been downloaded from {file_name}.json file: ..\converter\json')
self.logger_object.log(
f'Data has been downloaded from {file_path} file')
self.status_wrapper.set_processing()
with codecs.open(f'json/{file_name}.json', 'r', encoding='utf-8') as f_json:
with codecs.open(file_path, 'r', encoding='utf-8') as f_json:
content_dict = json.load(f_json)
self.status_wrapper.set_generating()
self.send_json_content_to_server(content_dict)
self.logger_object.log(f'Sent a file to server. Check LiveCarta.')
except Exception as exc:
self.status_wrapper.set_error()
self.logger_object.log('Error has occurred while reading json file.' + str(exc), logging.ERROR)
self.logger_object.log_error_to_main_log(str(exc))
self.logger_object.log(
'Error has occurred while reading json file.' + str(exc), logging.ERROR)
self.logger_object.log_error_to_main_log(str(exc))

View File

@@ -88,4 +88,4 @@ class ChapterItem:
}
def __str__(self):
return '<Chapter: %s>' % self.title
return '<Chapter: %s>' % self.title

View File

@@ -21,13 +21,22 @@ class HTMLDocxPreprocessor:
self.top_level_headers = None
self.content = list()
def _clean_tag(self, tag, attr_name, attr_value):
def _clean_tag(self, tag: str, attr_name: str, attr_value: re):
"""
Function to clean tags by its name and attribute value.
Parameters
----------
tag: str
tag name to clean
attr_name: str
attribute name
attr_value: [str,re]
attribute value
Returns
-------
clean tag
:param tag: Tag name to clean.
:param attr_name: Attribute name.
:param attr_value: Attribute value.
"""
tags = self.body_tag.find_all(tag, {attr_name: attr_value})
for tag in tags:
@@ -56,12 +65,19 @@ class HTMLDocxPreprocessor:
return value
@classmethod
def convert_font_pt_to_px(cls, style):
def convert_font_pt_to_px(cls, style: str) -> str:
"""
Method converts point in the font-size to pixels.
Function converts point in the font-size to pixels.
Parameters
----------
style: str
str with style to proces
Returns
-------
: str
str with converted style
:param style: Str with style to process.
:return: Str with converted style.
"""
size = re.search(r"font-size: (\d{1,3})pt", style)
@@ -77,7 +93,10 @@ class HTMLDocxPreprocessor:
return re.sub(size + "pt", str(new_size) + "px", style)
def _font_to_span(self):
"""Function to convert <font> tag to <span>. If font style is default, then remove this tag."""
"""
Function to convert <font> tag to <span>.
If font style is default, then remove this tag.
"""
fonts = self.body_tag.find_all("font")
for font in fonts:
face = font.get("face")
@@ -105,7 +124,8 @@ class HTMLDocxPreprocessor:
if len(font.attrs) == 0:
font.unwrap()
assert len(self.body_tag.find_all("font")) == 0 # on this step there should be no more <font> tags
# on this step there should be no more <font> tags
assert len(self.body_tag.find_all("font")) == 0
def delete_content_before_toc(self):
# remove all tag upper the <TOC> only in content !!! body tag is not updated
@@ -116,11 +136,15 @@ class HTMLDocxPreprocessor:
def clean_trash(self):
"""Function to remove all styles and tags we don't need."""
self._clean_tag('span', 'style', re.compile(r'^background: #[0-9a-fA-F]{6}$'))
self._clean_tag('span', 'lang', re.compile(r'^ru-RU$')) # todo: check for another languages
self._clean_tag('span', 'style', re.compile('^letter-spacing: -?[\d\.]+pt$'))
self._clean_tag('span', 'style', re.compile(
r'^background: #[0-9a-fA-F]{6}$'))
# todo: check for another languages
self._clean_tag('span', 'lang', re.compile(r'^ru-RU$'))
self._clean_tag('span', 'style', re.compile(
'^letter-spacing: -?[\d\.]+pt$'))
self._clean_tag('font', 'face', re.compile(r'^Times New Roman[\w, ]+$'))
self._clean_tag('font', 'face', re.compile(
r'^Times New Roman[\w, ]+$'))
self._clean_tag("a", "name", "_GoBack")
self._clean_underline_links()
@@ -128,7 +152,8 @@ class HTMLDocxPreprocessor:
self._font_to_span()
# replace toc with empty <TOC> tag
tables = self.body_tag.find_all("div", id=re.compile(r'^Table of Contents\d+'))
tables = self.body_tag.find_all(
"div", id=re.compile(r'^Table of Contents\d+'))
for table in tables:
table.wrap(self.html_soup.new_tag("TOC"))
table.decompose()
@@ -138,7 +163,7 @@ class HTMLDocxPreprocessor:
paragraphs = self.body_tag.find_all('p')
for p in paragraphs:
# libra converts some \n into <p> with 2 </br>
# libre converts some \n into <p> with 2 </br>
# there we remove 1 unnecessary <br>
brs = p.find_all('br')
text = p.text
@@ -156,9 +181,11 @@ class HTMLDocxPreprocessor:
if style:
indent = re.search(r'text-indent: ([\d\.]{1,4})in', style)
margin_left = re.search(r'margin-left: ([\d\.]{1,4})in', style)
margin_right = re.search(r'margin-right: ([\d\.]{1,4})in', style)
margin_right = re.search(
r'margin-right: ([\d\.]{1,4})in', style)
margin_top = re.search(r'margin-top: ([\d\.]{1,4})in', style)
margin_bottom = re.search(r'margin-bottom: ([\d\.]{1,4})in', style)
margin_bottom = re.search(
r'margin-bottom: ([\d\.]{1,4})in', style)
else:
indent = None
margin_left = None
@@ -195,6 +222,7 @@ class HTMLDocxPreprocessor:
def _process_tables(self):
"""Function to process tables. Set "border" attribute."""
tables = self.body_tag.find_all("table")
for table in tables:
tds = table.find_all("td")
@@ -258,21 +286,24 @@ class HTMLDocxPreprocessor:
for x in has_i_tag_or_br]
if all(has_i_tag_or_br) and is_zero_border:
new_div = BeautifulSoup(features='lxml').new_tag('blockquote')
new_div = BeautifulSoup(
features='lxml').new_tag('blockquote')
for p in paragraphs:
new_div.append(p)
table.replaceWith(new_div)
def _process_hrefs(self):
a_tags_with_href = self.body_tag.find_all('a', {'href': re.compile('^.*http.+')})
a_tags_with_href = self.body_tag.find_all(
'a', {'href': re.compile('^.*http.+')})
# remove char=end of file for some editors
for tag in a_tags_with_href:
tag.string = tag.text.replace('\u200c', '')
tag['href'] = tag.attrs.get('href').replace('%E2%80%8C', '')
a_tags_with_href = self.body_tag.find_all('a', {'href': re.compile('^(?!#sdfootnote)')})
a_tags_with_href = self.body_tag.find_all(
'a', {'href': re.compile('^(?!#sdfootnote)')})
for tag in a_tags_with_href:
tag.string = tag.text.replace('\u200c', '')
tag.string = tag.text.replace('\u200b', '') # zero-width-space
@@ -286,23 +317,25 @@ class HTMLDocxPreprocessor:
def _process_footnotes(self):
"""Function returns list of footnotes and delete them from html_soup."""
footnote_anchors = self.body_tag.find_all('a', class_='sdfootnoteanc')
footnote_content = self.body_tag.find_all('div', id=re.compile(r'^sdfootnote\d+$'))
footnote_content = self.body_tag.find_all(
'div', id=re.compile(r'^sdfootnote\d+$'))
footnote_amt = len(footnote_anchors)
assert footnote_amt == len(footnote_content), \
'Something went wrong with footnotes after libra conversion'
'Something went wrong with footnotes after libre conversion'
footnotes = []
for i, (anc_tag, cont_tag) in enumerate(zip(footnote_anchors, footnote_content)):
true_a_tag = cont_tag.find_all('a', class_=re.compile(r'^sdfootnote.+$'))[0]
true_a_tag = cont_tag.find_all(
'a', class_=re.compile(r'^sdfootnote.+$'))[0]
if true_a_tag.attrs.get('href') is None:
cont_tag.a.decompose()
continue
assert anc_tag['name'] == true_a_tag['href'][1:], \
'Something went wrong with footnotes after libra conversion'
'Something went wrong with footnotes after libre conversion'
new_tag = BeautifulSoup(features='lxml').new_tag('sup')
new_tag['class'] = 'footnote-element'
@@ -355,8 +388,10 @@ class HTMLDocxPreprocessor:
if len(img_tags):
if access is None:
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
new_path = pathlib.Path(os.path.join(folder_path, f'json/img_{book_id}/'))
folder_path = os.path.dirname(
os.path.dirname(os.path.abspath(__file__)))
new_path = pathlib.Path(os.path.join(
folder_path, f'json/img_{book_id}/'))
new_path.mkdir(exist_ok=True)
for img in img_tags:
@@ -370,10 +405,12 @@ class HTMLDocxPreprocessor:
if access is not None:
link = access.send_image(img_path, doc_id=book_id)
img.attrs['src'] = link
self.logger_object.log(f'{img_name} successfully uploaded.')
self.logger_object.log(
f'{img_name} successfully uploaded.')
else:
img_size = os.path.getsize(img_path)
self.logger_object.log(f'{img_name} successfully loaded. Image size: {img_size}.', logging.DEBUG)
self.logger_object.log(
f'{img_name} successfully loaded. Image size: {img_size}.', logging.DEBUG)
new_img_path = new_path / img_name
copyfile(img_path, new_img_path)
img.attrs["src"] = str(new_img_path)
@@ -408,7 +445,8 @@ class HTMLDocxPreprocessor:
def _process_toc_links(self):
"""Function to extract nodes which contains TOC links, remove links from file and detect headers."""
toc_links = self.body_tag.find_all("a", {'name': re.compile(r'^_Toc\d+')})
toc_links = self.body_tag.find_all(
"a", {'name': re.compile(r'^_Toc\d+')})
headers = [link.parent for link in toc_links]
outline_level = "1" # All the unknown outlines will be predicted as <h1>
for tag in headers:
@@ -418,7 +456,8 @@ class HTMLDocxPreprocessor:
elif tag.name == "p":
exist_in_toc = self._check_parent_link_exist_in_toc(tag)
if tag in self.body_tag.find_all("p") and exist_in_toc:
new_tag = BeautifulSoup(features="lxml").new_tag("h" + outline_level)
new_tag = BeautifulSoup(
features="lxml").new_tag("h" + outline_level)
text = tag.text
tag.replaceWith(new_tag)
new_tag.string = text
@@ -440,14 +479,16 @@ class HTMLDocxPreprocessor:
@staticmethod
def clean_tag_from_tabs(tag: NavigableString):
cleaned = re.sub(r'(\s+)+', ' ', tag)
this = BeautifulSoup.new_string(BeautifulSoup(features="lxml"), cleaned, NavigableString)
this = BeautifulSoup.new_string(BeautifulSoup(
features="lxml"), cleaned, NavigableString)
tag.replace_with(this)
# print('input: ', repr(tag))
# print('test: ', repr(cleaned))
def clean_tag_from_numbering(self, tag):
cleaned = self.clean_title_from_numbering(tag)
this = BeautifulSoup.new_string(BeautifulSoup(features="lxml"), cleaned, NavigableString)
this = BeautifulSoup.new_string(BeautifulSoup(
features="lxml"), cleaned, NavigableString)
tag.replace_with(this)
# print('input: ', repr(tag))
# print('test: ', repr(cleaned))
@@ -484,7 +525,8 @@ class HTMLDocxPreprocessor:
"""
headers_info = []
header_tags = self.body_tag.find_all(re.compile("^h[1-9]$"))
headers_outline = [int(re.sub(r"^h", "", tag.name)) for tag in header_tags]
headers_outline = [int(re.sub(r"^h", "", tag.name))
for tag in header_tags]
if headers_outline:
top_level_outline = min(headers_outline)
top_level_headers = [tag for tag in header_tags
@@ -518,13 +560,17 @@ class HTMLDocxPreprocessor:
Assume header(s) to be introduction if:
1. one header not numbered, before 1 numbered header
2. it is first header from the top level list and it equals to 'introduction'
2. it is first header from the top level list and it equals to 'introductio
Returns
-------
None
mark each top-level header with flag should_be_numbered = true/false
Result :
Mark each top-level header with flag should_be_numbered = true/false
"""
is_numbered_header = [header['is_numbered'] for header in self.top_level_headers]
is_title = [header['is_introduction'] for header in self.top_level_headers]
is_numbered_header = [header['is_numbered']
for header in self.top_level_headers]
is_title = [header['is_introduction']
for header in self.top_level_headers]
first_not_numbered = is_numbered_header and is_numbered_header[0] == 0
second_is_numbered_or_not_exist = all(is_numbered_header[1:2])
@@ -539,7 +585,19 @@ class HTMLDocxPreprocessor:
self.top_level_headers[i]['should_be_numbered'] = True
def _process_headings(self):
"""Function to process tags <h>."""
"""
Function to process tags <h>.
Steps
----------
1. remove <b>, <span>
2. clean text in header from numbering and \n
Returns
-------
None
processed <h> tags
"""
header_tags = self.body_tag.find_all(re.compile("^h[1-9]$"))
# 1. remove <b>, <span>
@@ -581,36 +639,52 @@ class HTMLDocxPreprocessor:
for i, item in enumerate(content):
if type(content[i]) is NavigableString:
cleaned = re.sub(r'(\s+)+', ' ', content[i])
this = BeautifulSoup.new_string(BeautifulSoup(features="lxml"), cleaned, NavigableString)
this = BeautifulSoup.new_string(BeautifulSoup(
features="lxml"), cleaned, NavigableString)
content[i].replace_with(this)
content[i] = this
else:
self.apply_func_to_last_child(content[i], self.clean_tag_from_tabs)
self.apply_func_to_last_child(
content[i], self.clean_tag_from_tabs)
content[0] = '' if content[0] == ' ' else content[0]
content = [item for item in content if item != '']
if type(content[0]) is NavigableString:
cleaned = self.clean_title_from_numbering(content[0])
this = BeautifulSoup.new_string(BeautifulSoup(features="lxml"), cleaned, NavigableString)
this = BeautifulSoup.new_string(BeautifulSoup(
features="lxml"), cleaned, NavigableString)
content[0].replace_with(this)
content[0] = this
else:
self.apply_func_to_last_child(content[0], self.clean_tag_from_numbering)
self.apply_func_to_last_child(
content[0], self.clean_tag_from_numbering)
def _process_lists(self):
"""
Function to process tags <li>.
Unwrap <p> tags.
Function
- process tags <li>.
- unwrap <p> tags.
Parameters
----------
body_tag: Tag, soup object
Returns
-------
None
uwrap <p> tag with li
"""
li_tags = self.body_tag.find_all("li")
for il_tag in li_tags:
il_tag.attrs.update(il_tag.p.attrs)
il_tag.p.unwrap()
for li_tag in li_tags:
li_tag.attrs.update(li_tag.p.attrs)
li_tag.p.unwrap()
def process_html(self, access, html_path, book_id):
def process_html(self, access=None, html_path='', book_id='local'):
"""Process html code to satisfy LiveCarta formatting."""
self.logger_object.log('Beginning of processing .html file.')
try:
self.logger_object.log(f'Processing TOC and headers.')
self._process_toc_links()
@@ -628,18 +702,22 @@ class HTMLDocxPreprocessor:
self.logger_object.log('Tables processing.')
self._process_tables()
self.logger_object.log(f'{self.tables_amount} tables have been processed.')
self.logger_object.log(
f'{self.tables_amount} tables have been processed.')
self.logger_object.log('Hrefs processing.')
self._process_hrefs()
self.logger_object.log('Footnotes processing.')
self._process_footnotes()
self.logger_object.log(f'{len(self.footnotes)} footnotes have been processed.')
self.logger_object.log(
f'{len(self.footnotes)} footnotes have been processed.')
self.logger_object.log('Image processing.')
self._process_images(access=access, html_path=html_path, book_id=book_id)
self.logger_object.log(f'{len(self.images)} images have been processed.')
self._process_images(
access=access, html_path=html_path, book_id=book_id)
self.logger_object.log(
f'{len(self.images)} images have been processed.')
self._process_footer()
self._process_div()
@@ -658,7 +736,8 @@ class HTMLDocxPreprocessor:
self.delete_content_before_toc()
except Exception as exc:
self.logger_object.log('Error has occurred while processing html.', logging.ERROR)
self.logger_object.log(
'Error has occurred while processing html.', logging.ERROR)
self.logger_object.log_error_to_main_log()
if self.status_wrapper:
self.status_wrapper.set_error()

View File

@@ -5,7 +5,7 @@ from copy import copy
from src.livecarta_config import LiveCartaConfig
class LibraHTML2JSONConverter:
class LibreHTML2JSONConverter:
def __init__(self, content, footnotes, top_level_headers, logger_object, book_api_status=None):
self.content_dict = None
self.content = content
@@ -15,12 +15,19 @@ class LibraHTML2JSONConverter:
self.book_api_status = book_api_status
@staticmethod
def format_html(html_text):
def format_html(html_text: str) -> str:
"""
Function to remove useless symbols from html code.
Parameters
----------
html_text: str
text to process.
Returns
-------
new_text: str
cleaned text
:param html_text: Text to process.
:return: Cleaned text.
"""
new_text = re.sub(r'([\n\t])', ' ', html_text)
return new_text
@@ -29,8 +36,15 @@ class LibraHTML2JSONConverter:
def header_to_livecarta_chapter_item(self, ind) -> (dict, int):
"""
Function process header and collects all content for it.
Parameters
----------
ind: int
index of header in content list.
Returns
-------
result, ind
:param ind: Index of header in content list.
"""
if self.content[ind].name in LiveCartaConfig.SUPPORTED_HEADERS:
title = str(self.content[ind])
@@ -38,7 +52,8 @@ class LibraHTML2JSONConverter:
title = title.replace(f'</{self.content[ind].name}>', '')
title = re.sub(r'^\n', '', title)
curr_outline = int(re.sub(r"^h", "", self.content[ind].name)) # extract outline from tag
# extract outline from tag
curr_outline = int(re.sub(r"^h", "", self.content[ind].name))
result = {
'title': f'{title}',
'contents': [],
@@ -53,7 +68,8 @@ class LibraHTML2JSONConverter:
outline = int(re.sub(r"^h", "", self.content[ind].name))
# - recursion step until h_i > h_initial
if outline > curr_outline:
header_dict, ind = self.header_to_livecarta_chapter_item(ind)
header_dict, ind = self.header_to_livecarta_chapter_item(
ind)
if ch_content:
result['contents'].append("".join(ch_content))
ch_content = []
@@ -108,7 +124,8 @@ class LibraHTML2JSONConverter:
chapter = []
while ind < len(self.content) and self.content[ind].name not in LiveCartaConfig.SUPPORTED_HEADERS:
if not self._is_empty_p_tag(self.content[ind]):
chapter.append(self.format_html(str(self.content[ind])))
chapter.append(self.format_html(
str(self.content[ind])))
ind += 1
if chapter:
res = {
@@ -121,9 +138,11 @@ class LibraHTML2JSONConverter:
if res:
json_strc.append(res)
ch_amt += 1
self.logger_object.log(f'Chapter {ch_amt} has been added to structure.')
self.logger_object.log(
f'Chapter {ch_amt} has been added to structure.')
except Exception as exc:
self.logger_object.log('Error has occurred while making json structure.', logging.ERROR)
self.logger_object.log(
'Error has occurred while making json structure.', logging.ERROR)
self.logger_object.log_error_to_main_log()
if self.book_api_status:
self.book_api_status.set_error()

View File

@@ -14,21 +14,23 @@ from src.livecarta_config import LiveCartaConfig
cssutils.log.setLevel(CRITICAL)
sizes_pr = [-100, -1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56,
1.63, 1.69, 1.75, 1.81, 1.88, 1.94, 2.0, 2.06, 2.13, 2.19, 2.25, 2.31, 2.38, 2.44, 2.5, 2.56, 2.63, 2.69,
2.75, 2.81, 2.88, 2.94, 3.0, 4.0, 5.0]
sizes_pr = [-100, -1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0,
1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56, 1.63, 1.69,
1.75, 1.81, 1.88, 1.94, 2.0, 2.06, 2.13, 2.19, 2.25, 2.31, 2.38,
2.44, 2.5, 2.56, 2.63, 2.69, 2.75, 2.81, 2.88, 2.94, 3.0, 4.0, 5.0]
sizes_px = ['0px', '10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px', '17px', '18px', '19px', '20px', '21px',
'22px', '23px', '24px', '25px', '26px', '27px', '28px', '29px', '30px', '31px', '32px', '33px', '34px',
'35px', '36px', '37px', '38px', '39px', '40px', '41px', '42px', '43px', '44px', '45px', '46px', '47px',
'48px', '49px', '50px', '64px', '72px']
sizes_px = ['0px', '10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px',
'17px', '18px', '19px', '20px', '21px', '22px', '23px', '24px', '25px',
'26px', '27px', '28px', '29px', '30px', '31px', '32px', '33px', '34px',
'35px', '36px', '37px', '38px', '39px', '40px', '41px', '42px', '43px',
'44px', '45px', '46px', '47px', '48px', '49px', '50px', '64px', '72px']
list_types = ['circle', 'disc', 'armenian', 'decimal',
'decimal-leading-zero', 'georgian', 'lower-alpha', 'lower-latin',
'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none']
def convert_tag_values(value: str) -> str:
def convert_tag_style_values(value: str) -> str:
"""
Function
- converts values of tags from em/%/pt to px
@@ -42,8 +44,8 @@ def convert_tag_values(value: str) -> str:
value: str
"""
def find_closest_size(value):
possible_sizes = list(takewhile(lambda x: value > x, sizes_pr))
def find_closest_size(size_value):
possible_sizes = list(takewhile(lambda x: size_value > x, sizes_pr))
last_possible_size_index = sizes_pr.index(possible_sizes[-1])
return sizes_px[last_possible_size_index]
@@ -122,12 +124,13 @@ Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING shou
to suit livecarta style convention.
"""
LIVECARTA_STYLE_ATTRS_MAPPING = {
'text-indent': convert_tag_values,
'text-indent': convert_tag_style_values,
'font-variant': lambda x: x,
'text-align': lambda x: x,
'font': lambda x: '',
'font-family': lambda x: LiveCartaConfig.font_correspondence_table.get(x) or LiveCartaConfig.font_correspondence_table.get(x.capitalize()),
'font-size': convert_tag_values,
'font-family': lambda x: LiveCartaConfig.font_correspondence_table.get(x) or
LiveCartaConfig.font_correspondence_table.get(x.capitalize()),
'font-size': convert_tag_style_values,
'color': get_text_color,
'background-color': get_bg_color,
'background': get_bg_color,
@@ -140,9 +143,9 @@ LIVECARTA_STYLE_ATTRS_MAPPING = {
'border-bottom': lambda x: x if x != '0' else '',
'list-style-type': lambda x: x if x in list_types else 'disc',
'list-style-image': lambda x: 'disc',
'margin-left': convert_tag_values,
'margin-top': convert_tag_values,
'margin': convert_tag_values,
'margin-left': convert_tag_style_values,
'margin-top': convert_tag_style_values,
'margin': convert_tag_style_values,
}
"""
@@ -269,10 +272,10 @@ class TagStyleConverter:
item = item.split(':')
if item[0] in ['text-indent', 'margin-left', 'margin']:
if len(item[1].split(' ')) == 3:
item[1] = convert_tag_values(item[1].split(
item[1] = convert_tag_style_values(item[1].split(
' ')[-2]) # split returns middle value
else:
item[1] = convert_tag_values(item[1].split(
item[1] = convert_tag_style_values(item[1].split(
' ')[-1]) # split returns last value
clean_style += item[0] + ': ' + item[1] + '; '
@@ -343,7 +346,8 @@ class TagStyleConverter:
split_inline_style: dict = remove_extra_spaces(inline_style)
# repetition check - if the tag had already had inline style that isn't in the css styles, add this to style parsed from css
# repetition check - if the tag had already had inline style
# that isn't in the css styles, add this to style parsed from css
repeat_styles = list(set(split_ultimate_style.keys())
& set(split_inline_style.keys()))
@@ -409,7 +413,8 @@ class TagStyleConverter:
if has_p_style_attrs:
p_style += item + ';'
initial_style = initial_style.replace(item + ';', '')
# here check that this style i exactly the same. Not 'align' when we have 'text-align', or 'border' when we have 'border-top'
# here check that this style i exactly the same.
# Not 'align' when we have 'text-align', or 'border' when we have 'border-top'
styles_to_be_saved_in_span = [((attr + ':') in initial_style) & (
'-' + attr not in initial_style) for attr in styles_cant_be_in_p]
if any(styles_to_be_saved_in_span):
@@ -549,4 +554,4 @@ if __name__ == '__main__':
'pr01s05.xhtml').get_body_content().decode()
html_soup = BeautifulSoup(html_, features='lxml')
print(convert_html_soup_with_css_style(html_soup, css_cleaned))
print(convert_html_soup_with_css_style(html_soup, css_cleaned))

View File

@@ -1,6 +1,7 @@
from src.book_solver import BookSolver
from src.epub_converter.epub_converter import EpubConverter
class EpubBook(BookSolver):
"""Class of .epub type book - child of BookSolver"""
@@ -10,10 +11,19 @@ class EpubBook(BookSolver):
def get_converted_book(self):
"""
1. Convert epub to html
2. Parse from line structure to nested structure
Function
Steps
----------
1. Converts .epub to .html
2. Parses from line structure to nested structure
Returns
----------
content_dict
json for LiveCarta platform
"""
json_converter = EpubConverter(self.file_path, access=self.access, logger=self.logger_object)
json_converter = EpubConverter(
self.file_path, access=self.access, logger=self.logger_object)
content_dict = json_converter.convert_to_dict()
self.status_wrapper.set_generating()
return content_dict
return content_dict

View File

@@ -71,7 +71,7 @@ def update_images_src_links(body_tag: BeautifulSoup,
return path2aws_path
def preprocess_table(body_tag: BeautifulSoup):
def _preprocess_table(body_tag: BeautifulSoup):
"""Function to preprocess tables and tags(td|th|tr): style"""
tables = body_tag.find_all("table")
for table in tables:
@@ -99,7 +99,7 @@ def preprocess_table(body_tag: BeautifulSoup):
table.attrs['border'] = '1'
def process_lists(body_tag: BeautifulSoup):
def _process_lists(body_tag: BeautifulSoup):
"""
Function
- process tags <li>.
@@ -121,7 +121,7 @@ def process_lists(body_tag: BeautifulSoup):
li_tag.p.unwrap()
def insert_span_with_attrs_before_tag(main_tag, tag, id_, class_):
def _insert_span_with_attrs_before_tag(main_tag, tag, id_, class_):
"""Function inserts span before tag aren't supported by livecarta"""
new_tag = main_tag.new_tag("span")
new_tag.attrs['id'] = id_ or ''
@@ -130,21 +130,21 @@ def insert_span_with_attrs_before_tag(main_tag, tag, id_, class_):
tag.insert_before(new_tag)
def clean_headings_content(content: BeautifulSoup, title: str):
def _clean_headings_content(content: BeautifulSoup, title: str):
def add_span_to_save_ids_for_links(tag_to_be_removed: Tag, body_tag: BeautifulSoup):
if tag_to_be_removed.attrs.get('id'):
insert_span_with_attrs_before_tag(body_tag,
tag_to_be_removed,
id_=tag_to_be_removed.attrs.get(
'id'),
class_=tag_to_be_removed.attrs.get('class'))
_insert_span_with_attrs_before_tag(body_tag,
tag_to_be_removed,
id_=tag_to_be_removed.attrs.get(
'id'),
class_=tag_to_be_removed.attrs.get('class'))
for sub_tag in tag_to_be_removed.find_all():
if sub_tag.attrs.get('id'):
insert_span_with_attrs_before_tag(body_tag,
tag_to_be_removed,
id_=sub_tag.attrs['id'],
class_=sub_tag.attrs.get('class'))
_insert_span_with_attrs_before_tag(body_tag,
tag_to_be_removed,
id_=sub_tag.attrs['id'],
class_=sub_tag.attrs.get('class'))
title = title.lower()
for child in content.contents:
@@ -165,7 +165,7 @@ def clean_headings_content(content: BeautifulSoup, title: str):
break
def heading_tag_to_p_tag(body_tag):
def _heading_tag_to_p_tag(body_tag):
"""Function to convert all lower level headings to p tags"""
pattern = f'^h[{LiveCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
header_tags = body_tag.find_all(re.compile(pattern))
@@ -173,7 +173,7 @@ def heading_tag_to_p_tag(body_tag):
tag.name = 'p'
def clean_title_from_numbering(title: str):
def _clean_title_from_numbering(title: str):
"""Function removes numbering from titles"""
title = re.sub(r'^(\s+)+', '', title)
# title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title) # delete chapter numbering from the title
@@ -182,7 +182,7 @@ def clean_title_from_numbering(title: str):
return title
def replace_with_livecarta_anchor_tag(anchor, i):
def _replace_with_livecarta_anchor_tag(anchor, i):
"""Function replace noteref_tag(anchor) with new livecarta tag"""
new_tag = BeautifulSoup(features='lxml').new_tag('sup')
new_tag['class'] = 'footnote-element'
@@ -257,7 +257,7 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
if footnote_tag.parent.attrs.get('role') and footnote_tag.parent.attrs.get('role') == 'doc-endnote':
footnote_tag = footnote_tag.parent
new_noterefs_tags.append(
replace_with_livecarta_anchor_tag(noteref_tag, i))
_replace_with_livecarta_anchor_tag(noteref_tag, i))
content = footnote_tag.text
# footnote_tag.decompose()
footnotes.append(content)
@@ -292,7 +292,7 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
"""
def preserve_class_in_aside_tag(tag_):
def _preserve_class_in_aside_tag(tag_):
"""to save css style inherited from class, copy class to aside tag (which is parent to tag_)"""
# this is for Wiley books with boxes
tag_class = tag_.attrs['class'] if not isinstance(
@@ -301,7 +301,7 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
if not tag_.parent.attrs.get('class'):
tag_.parent.attrs['class'] = tag_class
def preserve_class_in_section_tag(tag_: BeautifulSoup) -> bool:
def _preserve_class_in_section_tag(tag_: BeautifulSoup) -> bool:
"""
Function saves css style inherited from class, copies class to child <p>
returns True, if <section> could be unwrapped
@@ -332,13 +332,13 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
else:
return True
def add_span_to_save_ids_for_links(tag_to_be_removed):
def _add_span_to_save_ids_for_links(tag_to_be_removed):
if tag_to_be_removed.attrs.get('id'):
insert_span_with_attrs_before_tag(main_tag=body_tag, tag=tag_to_be_removed,
id_=tag_to_be_removed.attrs['id'],
class_=tag_to_be_removed.attrs.get('class'))
_insert_span_with_attrs_before_tag(main_tag=body_tag, tag=tag_to_be_removed,
id_=tag_to_be_removed.attrs['id'],
class_=tag_to_be_removed.attrs.get('class'))
def replace_div_tag_with_table():
def _replace_div_tag_with_table():
"""
Function replace <div> with <table>:
1. Convert div with certain classes to tables
@@ -350,11 +350,11 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
div_class = div.attrs['class'] if not isinstance(
div.attrs['class'], list) else div.attrs['class'][0]
if div_class in ['C409', 'C409a']:
wrap_block_tag_with_table(
_wrap_block_tag_with_table(
body_tag, old_tag=div, width='100', border='solid 3px', bg_color='#e7e7e9')
elif div_class in ['C441', 'C816']:
wrap_block_tag_with_table(
_wrap_block_tag_with_table(
body_tag, old_tag=div, width='100', border='solid #6e6e70 1px', bg_color='#e7e7e8')
if div.attrs.get('style'):
@@ -363,7 +363,7 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
'background-color') + len('background-color')
start_index_of_color = end_index + 2
bg_color = div.attrs['style'][start_index_of_color:start_index_of_color + 7]
wrap_block_tag_with_table(
_wrap_block_tag_with_table(
body_tag, old_tag=div, width='100', border='', bg_color=bg_color)
elif div.attrs.get('style') == '':
del div.attrs['style']
@@ -379,7 +379,7 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
if all(is_not_struct_tag):
div.name = 'p'
continue
add_span_to_save_ids_for_links(div)
_add_span_to_save_ids_for_links(div)
div.unwrap()
# comments removal
@@ -387,18 +387,18 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
for element in tag(text=lambda text: isinstance(text, Comment)):
element.extract()
replace_div_tag_with_table()
_replace_div_tag_with_table()
for s in body_tag.find_all("section"):
could_be_unwrapped = True
if s.attrs.get('class'):
could_be_unwrapped = preserve_class_in_section_tag(s)
add_span_to_save_ids_for_links(s)
could_be_unwrapped = _preserve_class_in_section_tag(s)
_add_span_to_save_ids_for_links(s)
if could_be_unwrapped:
s.unwrap()
for s in body_tag.find_all("article"):
add_span_to_save_ids_for_links(s)
_add_span_to_save_ids_for_links(s)
s.unwrap()
for s in body_tag.find_all("figure"):
@@ -407,22 +407,22 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
s.attrs['style'] = "text-align: center;"
for s in body_tag.find_all("figcaption"):
add_span_to_save_ids_for_links(s)
_add_span_to_save_ids_for_links(s)
s.unwrap()
for s in body_tag.find_all("aside"):
s.name = 'blockquote'
for s in body_tag.find_all("main"):
add_span_to_save_ids_for_links(s)
_add_span_to_save_ids_for_links(s)
s.unwrap()
for s in body_tag.find_all("body"):
add_span_to_save_ids_for_links(s)
_add_span_to_save_ids_for_links(s)
s.unwrap()
for s in body_tag.find_all("html"):
add_span_to_save_ids_for_links(s)
_add_span_to_save_ids_for_links(s)
s.unwrap()
for s in body_tag.find_all("header"):
@@ -442,7 +442,7 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
assert all(
parents_marks_are_body), 'Anchor for chapter is deeper than 2 level. Chapters can not be parsed.'
heading_tag_to_p_tag(body_tag)
_heading_tag_to_p_tag(body_tag)
# wrap NavigableString with <p>
for node in body_tag:
@@ -500,7 +500,7 @@ def get_tags_between_chapter_marks(first_id: str, href: str, html_soup: Beautifu
return tags
def wrap_block_tag_with_table(main_tag, old_tag, width='95', border='1px', bg_color=None):
def _wrap_block_tag_with_table(main_tag, old_tag, width='95', border='1px', bg_color=None):
"""Function wraps <block> with <table>"""
table = main_tag.new_tag("table")
table.attrs['border'] = border
@@ -520,7 +520,7 @@ def wrap_block_tag_with_table(main_tag, old_tag, width='95', border='1px', bg_co
return table
def clean_wiley_block(block):
def _clean_wiley_block(block):
hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
for hr in hrs:
hr.extract()
@@ -530,30 +530,30 @@ def clean_wiley_block(block):
h.insert_before(BeautifulSoup(features='lxml').new_tag("br"))
def preprocess_block_tags(chapter_tag):
def _preprocess_block_tags(chapter_tag):
"""Function preprocessing <block> tags"""
for block in chapter_tag.find_all("blockquote"):
if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
clean_wiley_block(block)
_clean_wiley_block(block)
color = '#DDDDDD' if block.attrs.get(
'class') == 'feature1' else None
color = '#EEEEEE' if block.attrs.get(
'class') == 'feature2' else color
wrap_block_tag_with_table(chapter_tag, block, bg_color=color)
_wrap_block_tag_with_table(chapter_tag, block, bg_color=color)
block.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
block.unwrap()
for future_block in chapter_tag.find_all("p", attrs={"class": re.compile("feature[1234]")}):
clean_wiley_block(future_block)
_clean_wiley_block(future_block)
color = '#DDDDDD' if future_block.attrs.get(
'class') == 'feature1' else None
color = '#EEEEEE' if future_block.attrs.get(
'class') == 'feature2' else color
wrap_block_tag_with_table(chapter_tag, future_block, bg_color=color)
_wrap_block_tag_with_table(chapter_tag, future_block, bg_color=color)
def prepare_formatted(text: str) -> str:
def _prepare_formatted(text: str) -> str:
"""Function replaces special symbols with their Unicode representation"""
text = text.replace("<", "\x3C")
text = text.replace(">", "\x3E")
@@ -563,7 +563,7 @@ def prepare_formatted(text: str) -> str:
return text
def wrap_preformatted_span_with_table(chapter_tag: Tag, span_tag: Tag) -> Tag:
def _wrap_preformatted_span_with_table(chapter_tag: Tag, span_tag: Tag) -> Tag:
"""Function wraps <span> with <table>"""
table, tbody, tr, td = chapter_tag.new_tag("table"), chapter_tag.new_tag(
"tbody"), chapter_tag.new_tag("tr"), chapter_tag.new_tag("td")
@@ -577,7 +577,7 @@ def wrap_preformatted_span_with_table(chapter_tag: Tag, span_tag: Tag) -> Tag:
return table
def preprocess_pre_tags(chapter_tag: BeautifulSoup):
def _preprocess_pre_tags(chapter_tag: BeautifulSoup):
"""
Function preprocessing <pre> tags
Parameters
@@ -601,7 +601,7 @@ def preprocess_pre_tags(chapter_tag: BeautifulSoup):
for child in copy_contents:
# Navigable String
if isinstance(child, NavigableString):
cleaned_text = prepare_formatted(str(child))
cleaned_text = _prepare_formatted(str(child))
sub_strings = re.split('\r\n|\n|\r', cleaned_text)
for string in sub_strings[:-1]:
new_tag.append(NavigableString(string))
@@ -612,24 +612,24 @@ def preprocess_pre_tags(chapter_tag: BeautifulSoup):
else:
for sub_child in child.children:
if isinstance(sub_child, NavigableString):
cleaned_text = prepare_formatted(str(sub_child))
cleaned_text = _prepare_formatted(str(sub_child))
sub_child.replace_with(NavigableString(cleaned_text))
else:
sub_child.string = prepare_formatted(sub_child.text)
sub_child.string = _prepare_formatted(sub_child.text)
cleaned_tag = child.extract()
new_tag.append(cleaned_tag)
if to_add_br:
new_tag.append(BeautifulSoup(
features='lxml').new_tag('br'))
pre.replace_with(new_tag)
table = wrap_preformatted_span_with_table(chapter_tag, new_tag)
table = _wrap_preformatted_span_with_table(chapter_tag, new_tag)
# add <p> to save brs
p_for_br = chapter_tag.new_tag("p")
p_for_br.string = "\xa0"
table.insert_after(p_for_br)
def preprocess_code_tags(chapter_tag: BeautifulSoup):
def _preprocess_code_tags(chapter_tag: BeautifulSoup):
"""
Function
- transform <code>, <kdb>, <var> tags into span
@@ -658,7 +658,7 @@ def prepare_title(title_of_chapter: str) -> str:
title_str = BeautifulSoup(title_of_chapter, features='lxml').string
title_str = re.sub(r'([\n\t\xa0])', ' ', title_str)
title_str = re.sub(r' +', ' ', title_str).rstrip()
title_str = clean_title_from_numbering(title_str)
title_str = _clean_title_from_numbering(title_str)
return title_str
@@ -696,18 +696,18 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
# 2. heading removal
if remove_title_from_chapter:
clean_headings_content(content_tag, title_str)
_clean_headings_content(content_tag, title_str)
# 3. processing tags (<li>, <table>, <code>, <pre>, <block>)
process_lists(content_tag)
preprocess_table(content_tag)
preprocess_code_tags(content_tag)
preprocess_pre_tags(content_tag)
preprocess_block_tags(content_tag)
_process_lists(content_tag)
_preprocess_table(content_tag)
_preprocess_code_tags(content_tag)
_preprocess_pre_tags(content_tag)
_preprocess_block_tags(content_tag)
# 4. class removal
for tag in content_tag.find_all(recursive=True):
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
'footnote-element']):
del tag.attrs['class']
return str(content_tag)
return str(content_tag)

View File

@@ -1,6 +1,7 @@
import os
import argparse
def parse_args():
parser = argparse.ArgumentParser(description="Utility for folders's clean up.")
parser.add_argument('-f', '--folders', type=str, nargs='*', help='Names of the folders to be cleaned.')

View File

@@ -3,6 +3,7 @@ import sys
import argparse
import subprocess
def parse_args():
parser = argparse.ArgumentParser(description="Utility for checking installed packages.")
parser.add_argument('-p', '--packages', type=str, nargs='*', help='Names of the packages.')

View File

@@ -4,7 +4,6 @@ from colorsys import hls_to_rgb
from webcolors import html4_hex_to_names, hex_to_rgb, rgb_to_name, rgb_percent_to_hex, rgb_to_hex, css3_names_to_hex
def closest_colour_rgb(requested_color):
""" Function finds closes colour rgb """
min_colours = {}

View File

@@ -20,15 +20,15 @@ class ColoredFormatter(logging.Formatter):
def format(self, record):
seq = self.MAPPING.get(record.levelname, 37) # default white
record.levelname = ('{0}{1}m{2}{3}') \
record.levelname = '{0}{1}m{2}{3}' \
.format(self.PREFIX, seq, record.levelname, self.SUFFIX)
return logging.Formatter.format(self, record)
class BookLogger:
def __init__(self, name, book_id, main_logger=None,
filemode='w+', logging_level=logging.INFO, logging_format=
'%(asctime)s - %(levelname)s - %(message)s [%(filename)s:%(lineno)d in %(funcName)s]'):
filemode='w+', logging_level=logging.INFO,
logging_format='%(asctime)s - %(levelname)s - %(message)s [%(filename)s:%(lineno)d in %(funcName)s]'):
"""
Method for Logger configuration. Logger will write to file.
:param name: name of the Logger.
@@ -107,4 +107,4 @@ class BookStatusWrapper:
self.set_status('[GENERATE]')
def set_error(self):
self.set_status('[ERROR]')
self.set_status('[ERROR]')

View File

@@ -82,7 +82,7 @@ def rgb2closest_html_color_name(color):
pass
if hue_diff in diff2base_color_dict:
dist_cur_color =(hue_request - hue_html) ** 2 + (s_request - s_html) ** 2 + (v_request - v_html) ** 2
dist_cur_color = (hue_request - hue_html) ** 2 + (s_request - s_html) ** 2 + (v_request - v_html) ** 2
hue_prev, s_prev, v_prev = HTML_COLORS_HSV[diff2base_color_dict[hue_diff]]
dist_prev_color = (hue_request - hue_prev) ** 2 + (s_request - s_prev) ** 2 + (v_request - v_prev) ** 2
if dist_cur_color < dist_prev_color:
@@ -95,7 +95,7 @@ def rgb2closest_html_color_name(color):
if __name__ == '__main__':
hex_colors = [
#'#945893',
# '#945893',
# '#96F',
# '#000', # black
# '#4C4C4C', # black
@@ -115,5 +115,5 @@ if __name__ == '__main__':
for c in hex_colors:
n = rgb2closest_html_color_name(c)
print(n) # "Actual colour:", c, ", closest colour name:",
print(n) # "Actual colour:", c, ", closest colour name:",
# print()