diff --git a/presets/docx_presets.json b/presets/docx_presets.json new file mode 100644 index 0000000..2d6e141 --- /dev/null +++ b/presets/docx_presets.json @@ -0,0 +1,92 @@ +[ + { + "preset_name": "decomposer", + "rules": [ + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "title", + "value": "footer" + } + ] + } + } + ] + }, + { + "preset_name": "unwrapper", + "rules": [ + { + "tags": ["^span$"], + "condition": { + "parent_tags": ":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)", + "child_tags": null, + "attrs": [ + { + "name": "style", + "value": "(^background: #[\\da-fA-F]{6}$)|(^letter-spacing: -?[\\d.]+pt$)" + }, + { + "name": "lang", + "value": "^ru-RU$" + } + ] + } + }, + { + "tags": ["^font$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "face", + "value": "^Times New Roman[\\w, ]+$" + } + ] + } + }, + { + "tags": ["^p$"], + "condition": { + "parent_tags": "li", + "child_tags": null, + "attrs": null + } + }, + { + "tags": ["^a$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "name", + "value": "_GoBack" + } + ] + } + }, + { + "tags": ["^u$"], + "condition": { + "parent_tags": "a", + "child_tags": "a", + "attrs": null + } + }, + { + "tags": ["^b$"], + "condition": { + "parent_tags": ":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)", + "child_tags": null, + "attrs": null + } + } + ] + } +] \ No newline at end of file