diff --git a/presets/docx_presets.json b/presets/docx_presets.json index 96f861b..6d5613b 100644 --- a/presets/docx_presets.json +++ b/presets/docx_presets.json @@ -1,5 +1,5 @@ [ - { + { "preset_name": "wrapper", "rules": [ { @@ -34,7 +34,17 @@ { "name": "title", "value": "footer" - }, + } + ], + "text": null + } + }, + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ { "name": "id", "value": "^Table of Contents\\d+" @@ -104,15 +114,44 @@ "condition": { "parent_tags": ":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)", "child_tags": null, + "attrs": null, + "text": null + } + }, + { + "tags": ["^span$"], + "condition": { + "parent_tags": null, + "child_tags": null, "attrs": [ { "name": "style", "value": "(^background: #[\\da-fA-F]{6}$)|(^letter-spacing: -?[\\d.]+pt$)" - }, + } + ], + "text": null + } + }, + { + "tags": ["^span$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ { "name": "lang", "value": "^ru-RU$" - }, + } + ], + "text": null + } + }, + { + "tags": ["^span$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ { "name": "face", "value": "^Times New Roman[\\w, ]+$" @@ -148,6 +187,15 @@ "tags": ["^u$"], "condition": { "parent_tags": ":is(a)", + "child_tags": null, + "attrs": null, + "text": null + } + }, + { + "tags": ["^u$"], + "condition": { + "parent_tags": null, "child_tags": ":is(a)", "attrs": null, "text": null diff --git a/presets/epub_presets.json b/presets/epub_presets.json index 7c8f672..d30e619 100644 --- a/presets/epub_presets.json +++ b/presets/epub_presets.json @@ -11,15 +11,42 @@ { "name": "width", "value": ".*" - }, + } + ] + } + }, + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ { "name": "border", "value": ".*" - }, + } + ] + } + }, + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ { "name": "style", "value": "border.*" - }, + } + ] + } + }, + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ { "name": "bgcolor", "value": ".*" @@ -69,7 +96,7 @@ { "tags": ["^code$", "^kbd$", "^var$"], "condition": { - "parent_tags": ":not(pre)", + "parent_tags": ":not(pre, span)", "child_tags": null, "attrs": null }, @@ -99,6 +126,15 @@ } } ] + }, + { + "preset_name": "attrs_remover", + "rules": [ + { + "tags": ["^sup$"], + "condition": null + } + ] }, { "preset_name": "attr_replacer", @@ -171,4 +207,4 @@ } ] } -] \ No newline at end of file +]