forked from LiveCarta/BookConverter
Merge pull request #139 from Teqniksoft/refactoring
Refactoring: converter
This commit is contained in:
@@ -10,7 +10,7 @@ from threading import Event
|
||||
import pika
|
||||
|
||||
from access import Access
|
||||
from book import Book
|
||||
from docx_converter import Book
|
||||
|
||||
|
||||
def configure_file_logger(name, filename='logs/converter_log.log', filemode='w+',
|
||||
|
||||
@@ -11,7 +11,7 @@ from bs4 import BeautifulSoup, NavigableString
|
||||
|
||||
from config import BookLogger, BookApiWrapper, LawCartaConfig
|
||||
from html_preprocessor import HTMLPreprocessor
|
||||
from json_converter import JSONConverter
|
||||
from json_postprocessor import JSONConverter
|
||||
|
||||
|
||||
class Book:
|
||||
@@ -202,7 +202,7 @@ class Book:
|
||||
parser = HTMLPreprocessor(html_soup, self.logger_object)
|
||||
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
||||
content_dict = json_converter.convert_to_json()
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
self.write_to_json(content_dict)
|
||||
self.write_html_from_list(parser.body_tag)
|
||||
|
||||
@@ -223,7 +223,7 @@ class Book:
|
||||
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
||||
|
||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
||||
content_dict = json_converter.convert_to_json()
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
|
||||
self.write_to_json(content_dict)
|
||||
self.write_html_from_list(parser.body_tag)
|
||||
@@ -246,7 +246,7 @@ class Book:
|
||||
self.book_api_wrapper.set_generate_status()
|
||||
|
||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
||||
content_dict = json_converter.convert_to_json()
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
self.write_to_json(content_dict)
|
||||
self.send_json_content(content_dict)
|
||||
self.logger_object.log(f'End of the conversion to LawCarta format. Check {self.output_path}.')
|
||||
@@ -26,7 +26,7 @@ class JSONConverter:
|
||||
return new_text
|
||||
|
||||
# TODO: rethink the function structure without indexes.
|
||||
def header_to_json(self, ind):
|
||||
def header_to_livecarta_chapter_item(self, ind) -> (dict, int):
|
||||
"""
|
||||
Function process header and collects all content for it.
|
||||
|
||||
@@ -53,7 +53,7 @@ class JSONConverter:
|
||||
outline = int(re.sub(r"^h", "", self.content[ind].name))
|
||||
# - recursion step until h_i > h_initial
|
||||
if outline > curr_outline:
|
||||
header_dict, ind = self.header_to_json(ind)
|
||||
header_dict, ind = self.header_to_livecarta_chapter_item(ind)
|
||||
if ch_content:
|
||||
result['contents'].append("".join(ch_content))
|
||||
ch_content = []
|
||||
@@ -89,7 +89,7 @@ class JSONConverter:
|
||||
|
||||
return True
|
||||
|
||||
def convert_to_json(self):
|
||||
def convert_to_dict(self):
|
||||
"""
|
||||
Function which convert list of html nodes to appropriate json structure.
|
||||
"""
|
||||
@@ -103,7 +103,7 @@ class JSONConverter:
|
||||
res = {}
|
||||
|
||||
if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS:
|
||||
res, ind = self.header_to_json(ind)
|
||||
res, ind = self.header_to_livecarta_chapter_item(ind)
|
||||
|
||||
else:
|
||||
chapter_title = f'Untitled chapter {ch_num}'
|
||||
Reference in New Issue
Block a user