From 4c22438906b7f1415c5ee274d14cb4a0e0649e29 Mon Sep 17 00:00:00 2001 From: shirshasa Date: Tue, 7 Sep 2021 13:19:02 +0300 Subject: [PATCH] epub converter: files and classes renaming --- src/docx_solver.py | 12 ++++++------ src/{epub_postprocessor.py => epub_converter.py} | 6 +++--- src/epub_solver.py | 4 ++-- ...tml_preprocessor.py => html_docx_preprocessor.py} | 2 +- src/json_postprocessor.py | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) rename src/{epub_postprocessor.py => epub_converter.py} (98%) rename src/{html_preprocessor.py => html_docx_preprocessor.py} (99%) diff --git a/src/docx_solver.py b/src/docx_solver.py index 1bd9475..52df91c 100644 --- a/src/docx_solver.py +++ b/src/docx_solver.py @@ -6,8 +6,8 @@ from subprocess import PIPE from threading import Event from bs4 import BeautifulSoup -from html_preprocessor import HTMLPreprocessor -from json_postprocessor import JSONConverter +from html_docx_preprocessor import HTMLDocxPreprocessor +from json_postprocessor import DocxHTML2JSONConverter from src.solver import BookSolver @@ -117,9 +117,9 @@ class DocxBook(BookSolver): def convert_from_html(self): html_soup = self.read_html() - parser = HTMLPreprocessor(html_soup, self.logger_object) + parser = HTMLDocxPreprocessor(html_soup, self.logger_object) content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id) - json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper) + json_converter = DocxHTML2JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper) content_dict = json_converter.convert_to_dict() self.write_to_json(content_dict) self.write_html_from_list(parser.body_tag) @@ -137,13 +137,13 @@ class DocxBook(BookSolver): html_soup = self.read_html() self.logger_object.log('Beginning of processing .html file.') - parser = HTMLPreprocessor(html_soup, self.logger_object) + parser = HTMLDocxPreprocessor(html_soup, self.logger_object) bs_tags, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id) self.logger_object.log('Beginning of processing json output.') self.status_wrapper.set_generating() - json_converter = JSONConverter(bs_tags, footnotes, top_level_headers, self.logger_object, self.status_wrapper) + json_converter = DocxHTML2JSONConverter(bs_tags, footnotes, top_level_headers, self.logger_object, self.status_wrapper) content_dict = json_converter.convert_to_dict() return content_dict diff --git a/src/epub_postprocessor.py b/src/epub_converter.py similarity index 98% rename from src/epub_postprocessor.py rename to src/epub_converter.py index ef9b028..653649d 100644 --- a/src/epub_postprocessor.py +++ b/src/epub_converter.py @@ -21,7 +21,7 @@ from css_reader import clean_css, add_inline_style_to_html_soup from livecarta_config import LawCartaConfig, BookLogger -class EpubPostprocessor: +class EpubConverter: def __init__(self, file, access=None, logger=None): self.file = file self.access = access @@ -411,8 +411,8 @@ if __name__ == "__main__": logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0) - json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/9781119682387_pre_code2.epub', - logger=logger_object) + json_converter = EpubConverter('/home/katerina/PycharmProjects/Jenia/converter/epub/9781119682387_pre_code2.epub', + logger=logger_object) tmp = json_converter.convert_to_dict() with codecs.open('tmp.json', 'w', encoding='utf-8') as f: diff --git a/src/epub_solver.py b/src/epub_solver.py index 08ffbcc..7cece30 100644 --- a/src/epub_solver.py +++ b/src/epub_solver.py @@ -1,4 +1,4 @@ -from epub_postprocessor import EpubPostprocessor +from epub_converter import EpubConverter from src.solver import BookSolver @@ -10,7 +10,7 @@ class EpubBook(BookSolver): self.book_type = 'epub' def get_converted_book(self): - json_converter = EpubPostprocessor(self.file_path, access=self.access, logger=self.logger_object) + json_converter = EpubConverter(self.file_path, access=self.access, logger=self.logger_object) content_dict = json_converter.convert_to_dict() self.status_wrapper.set_generating() return content_dict diff --git a/src/html_preprocessor.py b/src/html_docx_preprocessor.py similarity index 99% rename from src/html_preprocessor.py rename to src/html_docx_preprocessor.py index 5af2226..679ad4c 100644 --- a/src/html_preprocessor.py +++ b/src/html_docx_preprocessor.py @@ -10,7 +10,7 @@ from bs4 import BeautifulSoup, NavigableString, Tag from livecarta_config import LawCartaConfig, BookLogger, BookStatusWrapper -class HTMLPreprocessor: +class HTMLDocxPreprocessor: def __init__(self, html_soup, logger_object, status_wrapper=None): self.body_tag = html_soup.body diff --git a/src/json_postprocessor.py b/src/json_postprocessor.py index 4f55852..98814be 100644 --- a/src/json_postprocessor.py +++ b/src/json_postprocessor.py @@ -5,7 +5,7 @@ from copy import copy from livecarta_config import LawCartaConfig -class JSONConverter: +class DocxHTML2JSONConverter: def __init__(self, content, footnotes, top_level_headers, logger_object, book_api_status=None): self.content_dict = None self.content = content