epub converter: files and classes renaming

This commit is contained in:
shirshasa
2021-09-07 13:19:02 +03:00
parent 178896d510
commit 4c22438906
5 changed files with 13 additions and 13 deletions

View File

@@ -6,8 +6,8 @@ from subprocess import PIPE
from threading import Event
from bs4 import BeautifulSoup
from html_preprocessor import HTMLPreprocessor
from json_postprocessor import JSONConverter
from html_docx_preprocessor import HTMLDocxPreprocessor
from json_postprocessor import DocxHTML2JSONConverter
from src.solver import BookSolver
@@ -117,9 +117,9 @@ class DocxBook(BookSolver):
def convert_from_html(self):
html_soup = self.read_html()
parser = HTMLPreprocessor(html_soup, self.logger_object)
parser = HTMLDocxPreprocessor(html_soup, self.logger_object)
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
json_converter = DocxHTML2JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
content_dict = json_converter.convert_to_dict()
self.write_to_json(content_dict)
self.write_html_from_list(parser.body_tag)
@@ -137,13 +137,13 @@ class DocxBook(BookSolver):
html_soup = self.read_html()
self.logger_object.log('Beginning of processing .html file.')
parser = HTMLPreprocessor(html_soup, self.logger_object)
parser = HTMLDocxPreprocessor(html_soup, self.logger_object)
bs_tags, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
self.logger_object.log('Beginning of processing json output.')
self.status_wrapper.set_generating()
json_converter = JSONConverter(bs_tags, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
json_converter = DocxHTML2JSONConverter(bs_tags, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
content_dict = json_converter.convert_to_dict()
return content_dict

View File

@@ -21,7 +21,7 @@ from css_reader import clean_css, add_inline_style_to_html_soup
from livecarta_config import LawCartaConfig, BookLogger
class EpubPostprocessor:
class EpubConverter:
def __init__(self, file, access=None, logger=None):
self.file = file
self.access = access
@@ -411,8 +411,8 @@ if __name__ == "__main__":
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/9781119682387_pre_code2.epub',
logger=logger_object)
json_converter = EpubConverter('/home/katerina/PycharmProjects/Jenia/converter/epub/9781119682387_pre_code2.epub',
logger=logger_object)
tmp = json_converter.convert_to_dict()
with codecs.open('tmp.json', 'w', encoding='utf-8') as f:

View File

@@ -1,4 +1,4 @@
from epub_postprocessor import EpubPostprocessor
from epub_converter import EpubConverter
from src.solver import BookSolver
@@ -10,7 +10,7 @@ class EpubBook(BookSolver):
self.book_type = 'epub'
def get_converted_book(self):
json_converter = EpubPostprocessor(self.file_path, access=self.access, logger=self.logger_object)
json_converter = EpubConverter(self.file_path, access=self.access, logger=self.logger_object)
content_dict = json_converter.convert_to_dict()
self.status_wrapper.set_generating()
return content_dict

View File

@@ -10,7 +10,7 @@ from bs4 import BeautifulSoup, NavigableString, Tag
from livecarta_config import LawCartaConfig, BookLogger, BookStatusWrapper
class HTMLPreprocessor:
class HTMLDocxPreprocessor:
def __init__(self, html_soup, logger_object, status_wrapper=None):
self.body_tag = html_soup.body

View File

@@ -5,7 +5,7 @@ from copy import copy
from livecarta_config import LawCartaConfig
class JSONConverter:
class DocxHTML2JSONConverter:
def __init__(self, content, footnotes, top_level_headers, logger_object, book_api_status=None):
self.content_dict = None
self.content = content