update book conversion

- new resulted json structure
This commit is contained in:
shirshasa
2020-06-08 13:14:08 +03:00
parent bbe690bf80
commit 8a32aeb58d

View File

@@ -823,7 +823,11 @@ class Book:
if self.content[ind].name in self.SUPPORTED_HEADERS:
title = self.content[ind].text
curr_outline = int(re.sub(r"^h", "", self.content[ind].name)) # extract outline from tag
result = {title: []}
result = {
'title': title,
'contents': [],
'sub_items': []
}
ch_content = []
ind += 1
@@ -833,24 +837,23 @@ class Book:
outline = int(re.sub(r"^h", "", self.content[ind].name))
# - recursion step until h_i > h_initial
if outline > curr_outline:
res, ind = self.header_to_json(ind)
header_dict, ind = self.header_to_json(ind)
if ch_content:
result[title].append("".join(ch_content))
result['contents'].append("".join(ch_content))
ch_content = []
result[title].append(res)
result['sub_items'].append(header_dict)
# - current h_i <= h_initial, end of recursion
else:
# return result, ind
break
# 2. next tag is not a header. add new paragraphs
else:
res = self.format_html(str(self.content[ind]))
# result[title].append(res)
ch_content.append(res)
html_str = self.format_html(str(self.content[ind]))
ch_content.append(html_str)
ind += 1
if ch_content:
result[title].append("".join(ch_content))
result['contents'].append("".join(ch_content))
return result, ind
return ''
@@ -886,13 +889,6 @@ class Book:
if self.content[ind].name in self.SUPPORTED_HEADERS:
res, ind = self.header_to_json(ind)
assert len(res.keys()) == 1, 'Something went wrong during header to json conversion.'
top_level_header = list(res.keys())[0]
res = {
'title': top_level_header,
'contents': res[top_level_header]
}
else:
chapter_title = f'Untitled chapter {ch_num}'
chapter = []
@@ -903,7 +899,8 @@ class Book:
if chapter:
res = {
'title': chapter_title,
'contents': ["".join(chapter)]
'contents': ["".join(chapter)],
'sub_items': []
}
ch_num += 1