diff --git a/src/docx_converter.py b/src/docx_converter.py index 38efb1c..ac317d7 100644 --- a/src/docx_converter.py +++ b/src/docx_converter.py @@ -11,7 +11,7 @@ from bs4 import BeautifulSoup, NavigableString from config import BookLogger, BookApiWrapper, LawCartaConfig from html_preprocessor import HTMLPreprocessor -from json_converter import JSONConverter +from json_postprocessor import JSONConverter class Book: @@ -202,7 +202,7 @@ class Book: parser = HTMLPreprocessor(html_soup, self.logger_object) content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id) json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper) - content_dict = json_converter.convert_to_json() + content_dict = json_converter.convert_to_dict() self.write_to_json(content_dict) self.write_html_from_list(parser.body_tag) @@ -223,7 +223,7 @@ class Book: content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id) json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper) - content_dict = json_converter.convert_to_json() + content_dict = json_converter.convert_to_dict() self.write_to_json(content_dict) self.write_html_from_list(parser.body_tag) @@ -246,7 +246,7 @@ class Book: self.book_api_wrapper.set_generate_status() json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper) - content_dict = json_converter.convert_to_json() + content_dict = json_converter.convert_to_dict() self.write_to_json(content_dict) self.send_json_content(content_dict) self.logger_object.log(f'End of the conversion to LawCarta format. Check {self.output_path}.') diff --git a/src/json_converter.py b/src/json_postprocessor.py similarity index 95% rename from src/json_converter.py rename to src/json_postprocessor.py index a956282..f613cf9 100644 --- a/src/json_converter.py +++ b/src/json_postprocessor.py @@ -26,7 +26,7 @@ class JSONConverter: return new_text # TODO: rethink the function structure without indexes. - def header_to_json(self, ind): + def header_to_livecarta_chapter_item(self, ind) -> (dict, int): """ Function process header and collects all content for it. @@ -53,7 +53,7 @@ class JSONConverter: outline = int(re.sub(r"^h", "", self.content[ind].name)) # - recursion step until h_i > h_initial if outline > curr_outline: - header_dict, ind = self.header_to_json(ind) + header_dict, ind = self.header_to_livecarta_chapter_item(ind) if ch_content: result['contents'].append("".join(ch_content)) ch_content = [] @@ -89,7 +89,7 @@ class JSONConverter: return True - def convert_to_json(self): + def convert_to_dict(self): """ Function which convert list of html nodes to appropriate json structure. """ @@ -103,7 +103,7 @@ class JSONConverter: res = {} if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS: - res, ind = self.header_to_json(ind) + res, ind = self.header_to_livecarta_chapter_item(ind) else: chapter_title = f'Untitled chapter {ch_num}'