diff --git a/consumer.py b/consumer.py index 28d5e20..7a78525 100644 --- a/consumer.py +++ b/consumer.py @@ -14,7 +14,8 @@ from src.docx_converter.docx_solver import DocxBook from src.epub_converter.epub_solver import EpubBook -def configure_file_logger(name: str, filename: str = "logs/converter.log", filemode: str ="w+", logging_level: int = logging.INFO) -> logging.Logger: +def configure_file_logger(name: str, filename: str = "logs/converter.log", + filemode: str = "w+", logging_level: int = logging.INFO) -> logging.Logger: logger = logging.getLogger(name) folder_path = os.path.dirname(os.path.abspath(__file__)) diff --git a/presets/presets.json b/presets/presets.json index d877a3a..1ff62a8 100644 --- a/presets/presets.json +++ b/presets/presets.json @@ -81,11 +81,17 @@ "preset_name": "attr_replacer", "rules": [ { - "attr": "xlink:href", + "attr": { + "name": "xlink:href", + "value": ".*" + }, "condition": { "tags": ["^img$"] }, - "attr_to_replace": "src" + "attr_to_replace": { + "name": "src", + "value": null + } } ] }, diff --git a/src/epub_converter/html_epub_processor.py b/src/epub_converter/html_epub_processor.py index 2947e9d..7a8cd0a 100644 --- a/src/epub_converter/html_epub_processor.py +++ b/src/epub_converter/html_epub_processor.py @@ -147,10 +147,17 @@ class HtmlEpubProcessor: @staticmethod def _replace_attr(**kwargs): - attr = kwargs["rule"]["attr"] - attr_to_replace = kwargs["rule"]["attr_to_replace"] - kwargs["tag"][attr_to_replace] = kwargs["tag"][attr] - del kwargs["tag"][attr] + attr, attr_value =\ + kwargs["rule"]["attr"]["name"], kwargs["rule"]["attr"]["value"] + attr_to_replace, attr_value_to_replace =\ + kwargs["rule"]["attr_to_replace"]["name"], kwargs["rule"]["attr_to_replace"]["value"] + if attr_to_replace: + kwargs["tag"][attr_to_replace] = kwargs["tag"][attr] + if attr_value_to_replace: + kwargs["tag"].attrs[attr_to_replace] = attr_value_to_replace + del kwargs["tag"][attr] + elif attr_value_to_replace: + kwargs["tag"].attrs[attr] = attr_value_to_replace @staticmethod def _unwrap_tag(**kwargs): @@ -208,7 +215,7 @@ class HtmlEpubProcessor: elif condition_on_tag[0] == "tags": attr = rule["attr"] for tag in chapter_tag.find_all([re.compile(tag) for tag in tags], - {attr: re.compile(r".*")}): + {attr['name']: re.compile(fr"{attr['value']}")}): action(chapter_tag=chapter_tag, tag=tag, rule=rule) else: for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]): @@ -316,7 +323,7 @@ class HtmlEpubProcessor: ---------- title_str: str - chapter_tag: Tag, soup object + chapter_tag: BeautifulSoup, soup object remove_title_from_chapter: bool