forked from LiveCarta/BookConverter
Change names
This commit is contained in:
@@ -5,8 +5,8 @@ from threading import Event
|
|||||||
|
|
||||||
from src.book_solver import BookSolver
|
from src.book_solver import BookSolver
|
||||||
from src.util.helpers import BookLogger
|
from src.util.helpers import BookLogger
|
||||||
from src.html_preprocessor import HtmlPreprocessor
|
from src.html_presets_processor import HtmlPresetsProcessor
|
||||||
from src.style_preprocessor import StylePreprocessor
|
from src.style_reader import StyleReader
|
||||||
from src.docx_converter.docx2libre_html import Docx2LibreHtml
|
from src.docx_converter.docx2libre_html import Docx2LibreHtml
|
||||||
from src.docx_converter.html_docx_processor import HtmlDocxProcessor
|
from src.docx_converter.html_docx_processor import HtmlDocxProcessor
|
||||||
from src.docx_converter.libre_html2json_converter import LibreHtml2JsonConverter
|
from src.docx_converter.libre_html2json_converter import LibreHtml2JsonConverter
|
||||||
@@ -49,9 +49,9 @@ class DocxBook(BookSolver):
|
|||||||
|
|
||||||
# 2. Parses and cleans html, gets list of tags, gets footnotes
|
# 2. Parses and cleans html, gets list of tags, gets footnotes
|
||||||
try:
|
try:
|
||||||
html_preprocessor = HtmlPreprocessor(
|
html_preprocessor = HtmlPresetsProcessor(
|
||||||
logger=self.logger_object, preset_path="presets/docx_presets.json")
|
logger=self.logger_object, preset_path="presets/docx_presets.json")
|
||||||
style_preprocessor = StylePreprocessor()
|
style_preprocessor = StyleReader()
|
||||||
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup,
|
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup,
|
||||||
logger=self.logger_object,
|
logger=self.logger_object,
|
||||||
html_preprocessor=html_preprocessor,
|
html_preprocessor=html_preprocessor,
|
||||||
@@ -80,7 +80,7 @@ class DocxBook(BookSolver):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
docx_file_path = "../../books/docx/Bar_Exam_MPT_2e_prepared.docx"
|
docx_file_path = "../../books/docx/AmericanGovernment3e-WEB.docx"
|
||||||
logger_object = BookLogger(
|
logger_object = BookLogger(
|
||||||
name="docx", book_id=docx_file_path.split("/")[-1])
|
name="docx", book_id=docx_file_path.split("/")[-1])
|
||||||
locker = Event()
|
locker = Event()
|
||||||
@@ -89,9 +89,9 @@ if __name__ == "__main__":
|
|||||||
html_converter = Docx2LibreHtml(file_path=docx_file_path,
|
html_converter = Docx2LibreHtml(file_path=docx_file_path,
|
||||||
logger=logger_object, libre_locker=locker)
|
logger=logger_object, libre_locker=locker)
|
||||||
|
|
||||||
html_preprocessor = HtmlPreprocessor(
|
html_preprocessor = HtmlPresetsProcessor(
|
||||||
logger=logger_object, preset_path="../../presets/docx_presets.json")
|
logger=logger_object, preset_path="../../presets/docx_presets.json")
|
||||||
style_preprocessor = StylePreprocessor()
|
style_preprocessor = StyleReader()
|
||||||
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=logger_object,
|
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=logger_object,
|
||||||
html_preprocessor=html_preprocessor, style_preprocessor=style_preprocessor)
|
html_preprocessor=html_preprocessor, style_preprocessor=style_preprocessor)
|
||||||
content, footnotes, top_level_headers = html_processor.process_html(
|
content, footnotes, top_level_headers = html_processor.process_html(
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from typing import List, Dict, Union
|
|||||||
from src.util.helpers import BookLogger
|
from src.util.helpers import BookLogger
|
||||||
|
|
||||||
|
|
||||||
class HtmlPreprocessor:
|
class HtmlPresetsProcessor:
|
||||||
def __init__(self, logger: BookLogger, preset_path):
|
def __init__(self, logger: BookLogger, preset_path):
|
||||||
self.preset = json.load(open(preset_path))
|
self.preset = json.load(open(preset_path))
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
@@ -107,6 +107,8 @@ class HtmlPreprocessor:
|
|||||||
del kwargs["tag"][attr]
|
del kwargs["tag"][attr]
|
||||||
elif attr_value_to_replace:
|
elif attr_value_to_replace:
|
||||||
kwargs["tag"].attrs[attr] = attr_value_to_replace
|
kwargs["tag"].attrs[attr] = attr_value_to_replace
|
||||||
|
elif attr:
|
||||||
|
del kwargs["tag"][attr]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _unwrap_tag(**kwargs):
|
def _unwrap_tag(**kwargs):
|
||||||
@@ -172,7 +174,7 @@ class HtmlPreprocessor:
|
|||||||
action(body_tag=body_tag, tag=tag, rule=rule)
|
action(body_tag=body_tag, tag=tag, rule=rule)
|
||||||
|
|
||||||
|
|
||||||
def _preprocess_html(html_preprocessor: HtmlPreprocessor, html_soup: BeautifulSoup):
|
def _process_presets(html_preprocessor: HtmlPresetsProcessor, html_soup: BeautifulSoup):
|
||||||
for rule in html_preprocessor.preset:
|
for rule in html_preprocessor.preset:
|
||||||
# html_preprocessor.logger.log(rule["preset_name"].title() + " process.")
|
# html_preprocessor.logger.log(rule["preset_name"].title() + " process.")
|
||||||
action = html_preprocessor.name2action[rule["preset_name"]]
|
action = html_preprocessor.name2action[rule["preset_name"]]
|
||||||
@@ -10,7 +10,7 @@ from src.livecarta_config import LiveCartaConfig
|
|||||||
cssutils.log.setLevel(CRITICAL)
|
cssutils.log.setLevel(CRITICAL)
|
||||||
|
|
||||||
|
|
||||||
class TagInlineStyleProcessor:
|
class InlineStyleProcessor:
|
||||||
def __init__(self, tag_inline_style: Tag):
|
def __init__(self, tag_inline_style: Tag):
|
||||||
# tag with inline style + style parsed from css file
|
# tag with inline style + style parsed from css file
|
||||||
self.tag_inline_style = tag_inline_style
|
self.tag_inline_style = tag_inline_style
|
||||||
@@ -251,6 +251,6 @@ def modify_html_soup_with_css_styles(html_soup: BeautifulSoup, css_text: str = "
|
|||||||
|
|
||||||
# go through the tags with inline style + style parsed from css file
|
# go through the tags with inline style + style parsed from css file
|
||||||
for tag_inline_style in tags_with_inline_style:
|
for tag_inline_style in tags_with_inline_style:
|
||||||
style_converter = TagInlineStyleProcessor(tag_inline_style)
|
style_converter = InlineStyleProcessor(tag_inline_style)
|
||||||
style_converter.convert_initial_tag()
|
style_converter.convert_initial_tag()
|
||||||
return inline_soup
|
return inline_soup
|
||||||
@@ -7,7 +7,7 @@ from src.util.color_reader import str2hex
|
|||||||
from src.livecarta_config import LiveCartaConfig
|
from src.livecarta_config import LiveCartaConfig
|
||||||
|
|
||||||
|
|
||||||
class StylePreprocessor:
|
class StyleReader:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
"""
|
"""
|
||||||
Dictionary LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
|
Dictionary LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
|
||||||
Reference in New Issue
Block a user