From da0e4ec15795a6f34665fbd2fe2f1e6869cc66e4 Mon Sep 17 00:00:00 2001 From: Kiryl Date: Mon, 5 Sep 2022 17:25:32 +0300 Subject: [PATCH] Add wrapper to presets --- presets/docx_presets.json | 19 +++++++++++++++++++ src/docx_converter/html_docx_processor.py | 6 +++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/presets/docx_presets.json b/presets/docx_presets.json index 16f55c6..631776f 100644 --- a/presets/docx_presets.json +++ b/presets/docx_presets.json @@ -1,4 +1,23 @@ [ + { + "preset_name": "wrapper", + "rules": [ + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "id", + "value": "^Table of Contents\\d+" + } + ] + }, + "tag_to_wrap": "TOC" + } + ] + }, { "preset_name": "decomposer", "rules": [ diff --git a/src/docx_converter/html_docx_processor.py b/src/docx_converter/html_docx_processor.py index 7c7d2e7..7868f02 100644 --- a/src/docx_converter/html_docx_processor.py +++ b/src/docx_converter/html_docx_processor.py @@ -15,12 +15,13 @@ class HTMLDocxProcessor: def __init__(self, html_soup: BeautifulSoup, logger: BookLogger, style_processor, preset_path: str = "presets/docx_presets.json"): - self.body_tag = html_soup.body self.html_soup = html_soup + self.body_tag = html_soup.body self.logger = logger self.preset = json.load(open(preset_path)) self.style_processor = style_processor self.name2action = { + "wrapper": self._wrap_tag, "decomposer": self._decompose_tag, "replacer": self._replace_tag, "attr_replacer": self._replace_attr, @@ -58,6 +59,9 @@ class HTMLDocxProcessor: f"Check the structure of the file." f"Tag name: {tag.name}") + def _wrap_tag(self, **kwargs): + kwargs["tag"].wrap(self.html_soup.new_tag(kwargs["rule"]["tag_to_wrap"])) + @staticmethod def _decompose_tag(**kwargs): kwargs["tag"].decompose()