forked from LiveCarta/BookConverter
epub converter: files and classes renaming
This commit is contained in:
@@ -6,8 +6,8 @@ from subprocess import PIPE
|
||||
from threading import Event
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from html_preprocessor import HTMLPreprocessor
|
||||
from json_postprocessor import JSONConverter
|
||||
from html_docx_preprocessor import HTMLDocxPreprocessor
|
||||
from json_postprocessor import DocxHTML2JSONConverter
|
||||
from src.solver import BookSolver
|
||||
|
||||
|
||||
@@ -117,9 +117,9 @@ class DocxBook(BookSolver):
|
||||
|
||||
def convert_from_html(self):
|
||||
html_soup = self.read_html()
|
||||
parser = HTMLPreprocessor(html_soup, self.logger_object)
|
||||
parser = HTMLDocxPreprocessor(html_soup, self.logger_object)
|
||||
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
|
||||
json_converter = DocxHTML2JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
self.write_to_json(content_dict)
|
||||
self.write_html_from_list(parser.body_tag)
|
||||
@@ -137,13 +137,13 @@ class DocxBook(BookSolver):
|
||||
html_soup = self.read_html()
|
||||
self.logger_object.log('Beginning of processing .html file.')
|
||||
|
||||
parser = HTMLPreprocessor(html_soup, self.logger_object)
|
||||
parser = HTMLDocxPreprocessor(html_soup, self.logger_object)
|
||||
bs_tags, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
||||
|
||||
self.logger_object.log('Beginning of processing json output.')
|
||||
self.status_wrapper.set_generating()
|
||||
|
||||
json_converter = JSONConverter(bs_tags, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
|
||||
json_converter = DocxHTML2JSONConverter(bs_tags, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
return content_dict
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ from css_reader import clean_css, add_inline_style_to_html_soup
|
||||
from livecarta_config import LawCartaConfig, BookLogger
|
||||
|
||||
|
||||
class EpubPostprocessor:
|
||||
class EpubConverter:
|
||||
def __init__(self, file, access=None, logger=None):
|
||||
self.file = file
|
||||
self.access = access
|
||||
@@ -411,8 +411,8 @@ if __name__ == "__main__":
|
||||
|
||||
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
|
||||
|
||||
json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/9781119682387_pre_code2.epub',
|
||||
logger=logger_object)
|
||||
json_converter = EpubConverter('/home/katerina/PycharmProjects/Jenia/converter/epub/9781119682387_pre_code2.epub',
|
||||
logger=logger_object)
|
||||
tmp = json_converter.convert_to_dict()
|
||||
|
||||
with codecs.open('tmp.json', 'w', encoding='utf-8') as f:
|
||||
@@ -1,4 +1,4 @@
|
||||
from epub_postprocessor import EpubPostprocessor
|
||||
from epub_converter import EpubConverter
|
||||
from src.solver import BookSolver
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ class EpubBook(BookSolver):
|
||||
self.book_type = 'epub'
|
||||
|
||||
def get_converted_book(self):
|
||||
json_converter = EpubPostprocessor(self.file_path, access=self.access, logger=self.logger_object)
|
||||
json_converter = EpubConverter(self.file_path, access=self.access, logger=self.logger_object)
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
self.status_wrapper.set_generating()
|
||||
return content_dict
|
||||
|
||||
@@ -10,7 +10,7 @@ from bs4 import BeautifulSoup, NavigableString, Tag
|
||||
from livecarta_config import LawCartaConfig, BookLogger, BookStatusWrapper
|
||||
|
||||
|
||||
class HTMLPreprocessor:
|
||||
class HTMLDocxPreprocessor:
|
||||
|
||||
def __init__(self, html_soup, logger_object, status_wrapper=None):
|
||||
self.body_tag = html_soup.body
|
||||
@@ -5,7 +5,7 @@ from copy import copy
|
||||
from livecarta_config import LawCartaConfig
|
||||
|
||||
|
||||
class JSONConverter:
|
||||
class DocxHTML2JSONConverter:
|
||||
def __init__(self, content, footnotes, top_level_headers, logger_object, book_api_status=None):
|
||||
self.content_dict = None
|
||||
self.content = content
|
||||
|
||||
Reference in New Issue
Block a user