From 8a32aeb58d50a1789ab40b7c1eb5a5f800f37dae Mon Sep 17 00:00:00 2001 From: shirshasa Date: Mon, 8 Jun 2020 13:14:08 +0300 Subject: [PATCH] update book conversion - new resulted json structure --- src/book.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/book.py b/src/book.py index f457b8c..d570ee3 100644 --- a/src/book.py +++ b/src/book.py @@ -823,7 +823,11 @@ class Book: if self.content[ind].name in self.SUPPORTED_HEADERS: title = self.content[ind].text curr_outline = int(re.sub(r"^h", "", self.content[ind].name)) # extract outline from tag - result = {title: []} + result = { + 'title': title, + 'contents': [], + 'sub_items': [] + } ch_content = [] ind += 1 @@ -833,24 +837,23 @@ class Book: outline = int(re.sub(r"^h", "", self.content[ind].name)) # - recursion step until h_i > h_initial if outline > curr_outline: - res, ind = self.header_to_json(ind) + header_dict, ind = self.header_to_json(ind) if ch_content: - result[title].append("".join(ch_content)) + result['contents'].append("".join(ch_content)) ch_content = [] - result[title].append(res) + result['sub_items'].append(header_dict) # - current h_i <= h_initial, end of recursion else: # return result, ind break # 2. next tag is not a header. add new paragraphs else: - res = self.format_html(str(self.content[ind])) - # result[title].append(res) - ch_content.append(res) + html_str = self.format_html(str(self.content[ind])) + ch_content.append(html_str) ind += 1 if ch_content: - result[title].append("".join(ch_content)) + result['contents'].append("".join(ch_content)) return result, ind return '' @@ -886,13 +889,6 @@ class Book: if self.content[ind].name in self.SUPPORTED_HEADERS: res, ind = self.header_to_json(ind) - assert len(res.keys()) == 1, 'Something went wrong during header to json conversion.' - - top_level_header = list(res.keys())[0] - res = { - 'title': top_level_header, - 'contents': res[top_level_header] - } else: chapter_title = f'Untitled chapter {ch_num}' chapter = [] @@ -903,7 +899,8 @@ class Book: if chapter: res = { 'title': chapter_title, - 'contents': ["".join(chapter)] + 'contents': ["".join(chapter)], + 'sub_items': [] } ch_num += 1