forked from LiveCarta/BookConverter
update book conversion
- new resulted json structure
This commit is contained in:
29
src/book.py
29
src/book.py
@@ -823,7 +823,11 @@ class Book:
|
|||||||
if self.content[ind].name in self.SUPPORTED_HEADERS:
|
if self.content[ind].name in self.SUPPORTED_HEADERS:
|
||||||
title = self.content[ind].text
|
title = self.content[ind].text
|
||||||
curr_outline = int(re.sub(r"^h", "", self.content[ind].name)) # extract outline from tag
|
curr_outline = int(re.sub(r"^h", "", self.content[ind].name)) # extract outline from tag
|
||||||
result = {title: []}
|
result = {
|
||||||
|
'title': title,
|
||||||
|
'contents': [],
|
||||||
|
'sub_items': []
|
||||||
|
}
|
||||||
ch_content = []
|
ch_content = []
|
||||||
ind += 1
|
ind += 1
|
||||||
|
|
||||||
@@ -833,24 +837,23 @@ class Book:
|
|||||||
outline = int(re.sub(r"^h", "", self.content[ind].name))
|
outline = int(re.sub(r"^h", "", self.content[ind].name))
|
||||||
# - recursion step until h_i > h_initial
|
# - recursion step until h_i > h_initial
|
||||||
if outline > curr_outline:
|
if outline > curr_outline:
|
||||||
res, ind = self.header_to_json(ind)
|
header_dict, ind = self.header_to_json(ind)
|
||||||
if ch_content:
|
if ch_content:
|
||||||
result[title].append("".join(ch_content))
|
result['contents'].append("".join(ch_content))
|
||||||
ch_content = []
|
ch_content = []
|
||||||
result[title].append(res)
|
result['sub_items'].append(header_dict)
|
||||||
# - current h_i <= h_initial, end of recursion
|
# - current h_i <= h_initial, end of recursion
|
||||||
else:
|
else:
|
||||||
# return result, ind
|
# return result, ind
|
||||||
break
|
break
|
||||||
# 2. next tag is not a header. add new paragraphs
|
# 2. next tag is not a header. add new paragraphs
|
||||||
else:
|
else:
|
||||||
res = self.format_html(str(self.content[ind]))
|
html_str = self.format_html(str(self.content[ind]))
|
||||||
# result[title].append(res)
|
ch_content.append(html_str)
|
||||||
ch_content.append(res)
|
|
||||||
ind += 1
|
ind += 1
|
||||||
|
|
||||||
if ch_content:
|
if ch_content:
|
||||||
result[title].append("".join(ch_content))
|
result['contents'].append("".join(ch_content))
|
||||||
return result, ind
|
return result, ind
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
@@ -886,13 +889,6 @@ class Book:
|
|||||||
if self.content[ind].name in self.SUPPORTED_HEADERS:
|
if self.content[ind].name in self.SUPPORTED_HEADERS:
|
||||||
res, ind = self.header_to_json(ind)
|
res, ind = self.header_to_json(ind)
|
||||||
|
|
||||||
assert len(res.keys()) == 1, 'Something went wrong during header to json conversion.'
|
|
||||||
|
|
||||||
top_level_header = list(res.keys())[0]
|
|
||||||
res = {
|
|
||||||
'title': top_level_header,
|
|
||||||
'contents': res[top_level_header]
|
|
||||||
}
|
|
||||||
else:
|
else:
|
||||||
chapter_title = f'Untitled chapter {ch_num}'
|
chapter_title = f'Untitled chapter {ch_num}'
|
||||||
chapter = []
|
chapter = []
|
||||||
@@ -903,7 +899,8 @@ class Book:
|
|||||||
if chapter:
|
if chapter:
|
||||||
res = {
|
res = {
|
||||||
'title': chapter_title,
|
'title': chapter_title,
|
||||||
'contents': ["".join(chapter)]
|
'contents': ["".join(chapter)],
|
||||||
|
'sub_items': []
|
||||||
}
|
}
|
||||||
ch_num += 1
|
ch_num += 1
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user