From 81ccbf5af3f3b0f50a7867c30b4753fa2035ff0b Mon Sep 17 00:00:00 2001 From: shirshasa Date: Wed, 14 Apr 2021 13:53:12 +0300 Subject: [PATCH 1/2] converter: rename from book.py --- src/consumer.py | 2 +- src/{book.py => docx_converter.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/{book.py => docx_converter.py} (100%) diff --git a/src/consumer.py b/src/consumer.py index 3c164ae..9abf4b7 100644 --- a/src/consumer.py +++ b/src/consumer.py @@ -10,7 +10,7 @@ from threading import Event import pika from access import Access -from book import Book +from docx_converter import Book def configure_file_logger(name, filename='logs/converter_log.log', filemode='w+', diff --git a/src/book.py b/src/docx_converter.py similarity index 100% rename from src/book.py rename to src/docx_converter.py From a353f0346f0c8b096e0a21565f5d575250f69389 Mon Sep 17 00:00:00 2001 From: shirshasa Date: Wed, 14 Apr 2021 14:15:34 +0300 Subject: [PATCH 2/2] converter: just renaming file and functions --- src/docx_converter.py | 8 ++++---- src/{json_converter.py => json_postprocessor.py} | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) rename src/{json_converter.py => json_postprocessor.py} (95%) diff --git a/src/docx_converter.py b/src/docx_converter.py index 38efb1c..ac317d7 100644 --- a/src/docx_converter.py +++ b/src/docx_converter.py @@ -11,7 +11,7 @@ from bs4 import BeautifulSoup, NavigableString from config import BookLogger, BookApiWrapper, LawCartaConfig from html_preprocessor import HTMLPreprocessor -from json_converter import JSONConverter +from json_postprocessor import JSONConverter class Book: @@ -202,7 +202,7 @@ class Book: parser = HTMLPreprocessor(html_soup, self.logger_object) content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id) json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper) - content_dict = json_converter.convert_to_json() + content_dict = json_converter.convert_to_dict() self.write_to_json(content_dict) self.write_html_from_list(parser.body_tag) @@ -223,7 +223,7 @@ class Book: content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id) json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper) - content_dict = json_converter.convert_to_json() + content_dict = json_converter.convert_to_dict() self.write_to_json(content_dict) self.write_html_from_list(parser.body_tag) @@ -246,7 +246,7 @@ class Book: self.book_api_wrapper.set_generate_status() json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper) - content_dict = json_converter.convert_to_json() + content_dict = json_converter.convert_to_dict() self.write_to_json(content_dict) self.send_json_content(content_dict) self.logger_object.log(f'End of the conversion to LawCarta format. Check {self.output_path}.') diff --git a/src/json_converter.py b/src/json_postprocessor.py similarity index 95% rename from src/json_converter.py rename to src/json_postprocessor.py index a956282..f613cf9 100644 --- a/src/json_converter.py +++ b/src/json_postprocessor.py @@ -26,7 +26,7 @@ class JSONConverter: return new_text # TODO: rethink the function structure without indexes. - def header_to_json(self, ind): + def header_to_livecarta_chapter_item(self, ind) -> (dict, int): """ Function process header and collects all content for it. @@ -53,7 +53,7 @@ class JSONConverter: outline = int(re.sub(r"^h", "", self.content[ind].name)) # - recursion step until h_i > h_initial if outline > curr_outline: - header_dict, ind = self.header_to_json(ind) + header_dict, ind = self.header_to_livecarta_chapter_item(ind) if ch_content: result['contents'].append("".join(ch_content)) ch_content = [] @@ -89,7 +89,7 @@ class JSONConverter: return True - def convert_to_json(self): + def convert_to_dict(self): """ Function which convert list of html nodes to appropriate json structure. """ @@ -103,7 +103,7 @@ class JSONConverter: res = {} if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS: - res, ind = self.header_to_json(ind) + res, ind = self.header_to_livecarta_chapter_item(ind) else: chapter_title = f'Untitled chapter {ch_num}'