forked from LiveCarta/BookConverter
Change paths to books
This commit is contained in:
@@ -29,7 +29,7 @@ class LibreHTML2JSONConverter:
|
||||
cleaned text
|
||||
|
||||
"""
|
||||
new_text = re.sub(r'([\n\t])', ' ', html_text)
|
||||
new_text = re.sub(r"([\n\t])", " ", html_text)
|
||||
return new_text
|
||||
|
||||
# TODO: rethink the function structure without indexes.
|
||||
@@ -48,16 +48,16 @@ class LibreHTML2JSONConverter:
|
||||
"""
|
||||
if self.content[ind].name in LiveCartaConfig.SUPPORTED_HEADERS:
|
||||
title = str(self.content[ind])
|
||||
title = title.replace(f'<{self.content[ind].name}>', '')
|
||||
title = title.replace(f'</{self.content[ind].name}>', '')
|
||||
title = re.sub(r'^\n', '', title)
|
||||
title = title.replace(f"<{self.content[ind].name}>", "")
|
||||
title = title.replace(f"</{self.content[ind].name}>", "")
|
||||
title = re.sub(r"^\n", "", title)
|
||||
|
||||
# extract outline from tag
|
||||
curr_outline = int(re.sub(r"^h", "", self.content[ind].name))
|
||||
result = {
|
||||
'title': f'{title}',
|
||||
'contents': [],
|
||||
'sub_items': []
|
||||
"title": f"{title}",
|
||||
"contents": [],
|
||||
"sub_items": []
|
||||
}
|
||||
ch_content = []
|
||||
ind += 1
|
||||
@@ -71,9 +71,9 @@ class LibreHTML2JSONConverter:
|
||||
header_dict, ind = self.header_to_livecarta_chapter_item(
|
||||
ind)
|
||||
if ch_content:
|
||||
result['contents'].append("".join(ch_content))
|
||||
result["contents"].append("".join(ch_content))
|
||||
ch_content = []
|
||||
result['sub_items'].append(header_dict)
|
||||
result["sub_items"].append(header_dict)
|
||||
# - current h_i <= h_initial, end of recursion
|
||||
else:
|
||||
# return result, ind
|
||||
@@ -85,21 +85,21 @@ class LibreHTML2JSONConverter:
|
||||
ind += 1
|
||||
|
||||
if ch_content:
|
||||
result['contents'].append("".join(ch_content))
|
||||
result["contents"].append("".join(ch_content))
|
||||
return result, ind
|
||||
return ''
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _is_empty_p_tag(tag):
|
||||
if tag.name != 'p':
|
||||
if tag.name != "p":
|
||||
return False
|
||||
|
||||
temp_tag = copy(tag)
|
||||
brs = temp_tag.find_all('br')
|
||||
brs = temp_tag.find_all("br")
|
||||
for br in brs:
|
||||
br.decompose()
|
||||
|
||||
text = re.sub(r'\s+', '', temp_tag.text)
|
||||
text = re.sub(r"\s+", "", temp_tag.text)
|
||||
if text:
|
||||
return False
|
||||
|
||||
@@ -117,7 +117,7 @@ class LibreHTML2JSONConverter:
|
||||
res, ind = self.header_to_livecarta_chapter_item(ind)
|
||||
|
||||
else:
|
||||
chapter_title = f'Untitled chapter {ch_num}'
|
||||
chapter_title = f"Untitled chapter {ch_num}"
|
||||
chapter = []
|
||||
while ind < len(self.content) and self.content[ind].name not in LiveCartaConfig.SUPPORTED_HEADERS:
|
||||
if not self._is_empty_p_tag(self.content[ind]):
|
||||
@@ -126,9 +126,9 @@ class LibreHTML2JSONConverter:
|
||||
ind += 1
|
||||
if chapter:
|
||||
res = {
|
||||
'title': chapter_title,
|
||||
'contents': ["".join(chapter)],
|
||||
'sub_items': []
|
||||
"title": chapter_title,
|
||||
"contents": ["".join(chapter)],
|
||||
"sub_items": []
|
||||
}
|
||||
ch_num += 1
|
||||
|
||||
@@ -136,10 +136,10 @@ class LibreHTML2JSONConverter:
|
||||
json_strc.append(res)
|
||||
ch_amt += 1
|
||||
self.logger_object.log(
|
||||
f'Chapter {ch_amt} has been added to structure.')
|
||||
f"Chapter {ch_amt} has been added to structure.")
|
||||
except Exception as exc:
|
||||
self.logger_object.log(
|
||||
'Error has occurred while making json structure.', logging.ERROR)
|
||||
"Error has occurred while making json structure.", logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
if self.book_api_status:
|
||||
self.book_api_status.set_error()
|
||||
@@ -148,10 +148,10 @@ class LibreHTML2JSONConverter:
|
||||
# Add is_introduction field to json structure
|
||||
# after deleting content before toc, some chapters can be deleted
|
||||
if self.top_level_headers:
|
||||
same_first_titles = self.top_level_headers[0]['title'] == json_strc[0]['title']
|
||||
is_first_header_introduction = not self.top_level_headers[0]['should_be_numbered']
|
||||
same_first_titles = self.top_level_headers[0]["title"] == json_strc[0]["title"]
|
||||
is_first_header_introduction = not self.top_level_headers[0]["should_be_numbered"]
|
||||
|
||||
json_strc[0]['is_introduction'] = is_first_header_introduction
|
||||
json_strc[0]["is_introduction"] = is_first_header_introduction
|
||||
|
||||
self.content_dict = {
|
||||
"content": json_strc,
|
||||
|
||||
Reference in New Issue
Block a user