forked from LiveCarta/BookConverter
converter: just renaming file and functions
This commit is contained in:
@@ -11,7 +11,7 @@ from bs4 import BeautifulSoup, NavigableString
|
|||||||
|
|
||||||
from config import BookLogger, BookApiWrapper, LawCartaConfig
|
from config import BookLogger, BookApiWrapper, LawCartaConfig
|
||||||
from html_preprocessor import HTMLPreprocessor
|
from html_preprocessor import HTMLPreprocessor
|
||||||
from json_converter import JSONConverter
|
from json_postprocessor import JSONConverter
|
||||||
|
|
||||||
|
|
||||||
class Book:
|
class Book:
|
||||||
@@ -202,7 +202,7 @@ class Book:
|
|||||||
parser = HTMLPreprocessor(html_soup, self.logger_object)
|
parser = HTMLPreprocessor(html_soup, self.logger_object)
|
||||||
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
||||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
||||||
content_dict = json_converter.convert_to_json()
|
content_dict = json_converter.convert_to_dict()
|
||||||
self.write_to_json(content_dict)
|
self.write_to_json(content_dict)
|
||||||
self.write_html_from_list(parser.body_tag)
|
self.write_html_from_list(parser.body_tag)
|
||||||
|
|
||||||
@@ -223,7 +223,7 @@ class Book:
|
|||||||
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
||||||
|
|
||||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
||||||
content_dict = json_converter.convert_to_json()
|
content_dict = json_converter.convert_to_dict()
|
||||||
|
|
||||||
self.write_to_json(content_dict)
|
self.write_to_json(content_dict)
|
||||||
self.write_html_from_list(parser.body_tag)
|
self.write_html_from_list(parser.body_tag)
|
||||||
@@ -246,7 +246,7 @@ class Book:
|
|||||||
self.book_api_wrapper.set_generate_status()
|
self.book_api_wrapper.set_generate_status()
|
||||||
|
|
||||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
||||||
content_dict = json_converter.convert_to_json()
|
content_dict = json_converter.convert_to_dict()
|
||||||
self.write_to_json(content_dict)
|
self.write_to_json(content_dict)
|
||||||
self.send_json_content(content_dict)
|
self.send_json_content(content_dict)
|
||||||
self.logger_object.log(f'End of the conversion to LawCarta format. Check {self.output_path}.')
|
self.logger_object.log(f'End of the conversion to LawCarta format. Check {self.output_path}.')
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ class JSONConverter:
|
|||||||
return new_text
|
return new_text
|
||||||
|
|
||||||
# TODO: rethink the function structure without indexes.
|
# TODO: rethink the function structure without indexes.
|
||||||
def header_to_json(self, ind):
|
def header_to_livecarta_chapter_item(self, ind) -> (dict, int):
|
||||||
"""
|
"""
|
||||||
Function process header and collects all content for it.
|
Function process header and collects all content for it.
|
||||||
|
|
||||||
@@ -53,7 +53,7 @@ class JSONConverter:
|
|||||||
outline = int(re.sub(r"^h", "", self.content[ind].name))
|
outline = int(re.sub(r"^h", "", self.content[ind].name))
|
||||||
# - recursion step until h_i > h_initial
|
# - recursion step until h_i > h_initial
|
||||||
if outline > curr_outline:
|
if outline > curr_outline:
|
||||||
header_dict, ind = self.header_to_json(ind)
|
header_dict, ind = self.header_to_livecarta_chapter_item(ind)
|
||||||
if ch_content:
|
if ch_content:
|
||||||
result['contents'].append("".join(ch_content))
|
result['contents'].append("".join(ch_content))
|
||||||
ch_content = []
|
ch_content = []
|
||||||
@@ -89,7 +89,7 @@ class JSONConverter:
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def convert_to_json(self):
|
def convert_to_dict(self):
|
||||||
"""
|
"""
|
||||||
Function which convert list of html nodes to appropriate json structure.
|
Function which convert list of html nodes to appropriate json structure.
|
||||||
"""
|
"""
|
||||||
@@ -103,7 +103,7 @@ class JSONConverter:
|
|||||||
res = {}
|
res = {}
|
||||||
|
|
||||||
if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS:
|
if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS:
|
||||||
res, ind = self.header_to_json(ind)
|
res, ind = self.header_to_livecarta_chapter_item(ind)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
chapter_title = f'Untitled chapter {ch_num}'
|
chapter_title = f'Untitled chapter {ch_num}'
|
||||||
Reference in New Issue
Block a user