From a571cb7009c5df9fd91f37573c2c58f5d49c96f9 Mon Sep 17 00:00:00 2001 From: Kiryl Date: Thu, 8 Sep 2022 13:12:49 +0300 Subject: [PATCH] Change names --- src/docx_converter/docx_solver.py | 14 +++++++------- ...l_preprocessor.py => html_presets_processor.py} | 6 ++++-- ...tyle_processor.py => inline_style_processor.py} | 4 ++-- src/{style_preprocessor.py => style_reader.py} | 2 +- 4 files changed, 14 insertions(+), 12 deletions(-) rename src/{html_preprocessor.py => html_presets_processor.py} (97%) rename src/{tag_inline_style_processor.py => inline_style_processor.py} (99%) rename src/{style_preprocessor.py => style_reader.py} (99%) diff --git a/src/docx_converter/docx_solver.py b/src/docx_converter/docx_solver.py index 56cffd7..3cd324d 100644 --- a/src/docx_converter/docx_solver.py +++ b/src/docx_converter/docx_solver.py @@ -5,8 +5,8 @@ from threading import Event from src.book_solver import BookSolver from src.util.helpers import BookLogger -from src.html_preprocessor import HtmlPreprocessor -from src.style_preprocessor import StylePreprocessor +from src.html_presets_processor import HtmlPresetsProcessor +from src.style_reader import StyleReader from src.docx_converter.docx2libre_html import Docx2LibreHtml from src.docx_converter.html_docx_processor import HtmlDocxProcessor from src.docx_converter.libre_html2json_converter import LibreHtml2JsonConverter @@ -49,9 +49,9 @@ class DocxBook(BookSolver): # 2. Parses and cleans html, gets list of tags, gets footnotes try: - html_preprocessor = HtmlPreprocessor( + html_preprocessor = HtmlPresetsProcessor( logger=self.logger_object, preset_path="presets/docx_presets.json") - style_preprocessor = StylePreprocessor() + style_preprocessor = StyleReader() html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=self.logger_object, html_preprocessor=html_preprocessor, @@ -80,7 +80,7 @@ class DocxBook(BookSolver): if __name__ == "__main__": - docx_file_path = "../../books/docx/Bar_Exam_MPT_2e_prepared.docx" + docx_file_path = "../../books/docx/AmericanGovernment3e-WEB.docx" logger_object = BookLogger( name="docx", book_id=docx_file_path.split("/")[-1]) locker = Event() @@ -89,9 +89,9 @@ if __name__ == "__main__": html_converter = Docx2LibreHtml(file_path=docx_file_path, logger=logger_object, libre_locker=locker) - html_preprocessor = HtmlPreprocessor( + html_preprocessor = HtmlPresetsProcessor( logger=logger_object, preset_path="../../presets/docx_presets.json") - style_preprocessor = StylePreprocessor() + style_preprocessor = StyleReader() html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=logger_object, html_preprocessor=html_preprocessor, style_preprocessor=style_preprocessor) content, footnotes, top_level_headers = html_processor.process_html( diff --git a/src/html_preprocessor.py b/src/html_presets_processor.py similarity index 97% rename from src/html_preprocessor.py rename to src/html_presets_processor.py index 6c57016..5f5bae3 100644 --- a/src/html_preprocessor.py +++ b/src/html_presets_processor.py @@ -7,7 +7,7 @@ from typing import List, Dict, Union from src.util.helpers import BookLogger -class HtmlPreprocessor: +class HtmlPresetsProcessor: def __init__(self, logger: BookLogger, preset_path): self.preset = json.load(open(preset_path)) self.logger = logger @@ -107,6 +107,8 @@ class HtmlPreprocessor: del kwargs["tag"][attr] elif attr_value_to_replace: kwargs["tag"].attrs[attr] = attr_value_to_replace + elif attr: + del kwargs["tag"][attr] @staticmethod def _unwrap_tag(**kwargs): @@ -172,7 +174,7 @@ class HtmlPreprocessor: action(body_tag=body_tag, tag=tag, rule=rule) -def _preprocess_html(html_preprocessor: HtmlPreprocessor, html_soup: BeautifulSoup): +def _process_presets(html_preprocessor: HtmlPresetsProcessor, html_soup: BeautifulSoup): for rule in html_preprocessor.preset: # html_preprocessor.logger.log(rule["preset_name"].title() + " process.") action = html_preprocessor.name2action[rule["preset_name"]] diff --git a/src/tag_inline_style_processor.py b/src/inline_style_processor.py similarity index 99% rename from src/tag_inline_style_processor.py rename to src/inline_style_processor.py index 42ed0d4..d63122a 100644 --- a/src/tag_inline_style_processor.py +++ b/src/inline_style_processor.py @@ -10,7 +10,7 @@ from src.livecarta_config import LiveCartaConfig cssutils.log.setLevel(CRITICAL) -class TagInlineStyleProcessor: +class InlineStyleProcessor: def __init__(self, tag_inline_style: Tag): # tag with inline style + style parsed from css file self.tag_inline_style = tag_inline_style @@ -251,6 +251,6 @@ def modify_html_soup_with_css_styles(html_soup: BeautifulSoup, css_text: str = " # go through the tags with inline style + style parsed from css file for tag_inline_style in tags_with_inline_style: - style_converter = TagInlineStyleProcessor(tag_inline_style) + style_converter = InlineStyleProcessor(tag_inline_style) style_converter.convert_initial_tag() return inline_soup diff --git a/src/style_preprocessor.py b/src/style_reader.py similarity index 99% rename from src/style_preprocessor.py rename to src/style_reader.py index da05488..9810caf 100644 --- a/src/style_preprocessor.py +++ b/src/style_reader.py @@ -7,7 +7,7 @@ from src.util.color_reader import str2hex from src.livecarta_config import LiveCartaConfig -class StylePreprocessor: +class StyleReader: def __init__(self): """ Dictionary LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }