diff --git a/preset/default_preset.json b/preset/default_preset.json new file mode 100644 index 0000000..234fd32 --- /dev/null +++ b/preset/default_preset.json @@ -0,0 +1,606 @@ +[ + { + "preset_name":"table_wrapper", + "rule":{ + "tags":[ + "^section$", + "^blockquote$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"class", + "value":"feature[1234]" + } + ], + "text":null + } + } + }, + { + "preset_name":"wrapper", + "rule":{ + "tags":[ + "^div$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"id", + "value":"^Table of Contents\\d+" + } + ], + "text":null + }, + "tag_to_wrap":{ + "name":"TOC", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"decomposer", + "rule":{ + "tags":[ + "^div$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"title", + "value":"footer" + } + ], + "text":null + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^h[6-9]$", + "^figure$", + "^section$", + "^blockquote$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"p", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^aside$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"div", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^div$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"style", + "value":"column-count: 2" + } + ], + "text":null + }, + "tag_to_replace":{ + "name":"p", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^header$", + "^footer$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"span", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^kbd$", + "^var$" + ], + "condition":{ + "parent_tags":":not(pre, span)", + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"span", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^em$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"i", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^b$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"strong", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^image$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"img", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"attr_remover", + "rule":{ + "tags":[ + "^sup$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"attr_replacer", + "rule":{ + "tags":[ + "^img$" + ], + "condition":{ + "attrs":[ + { + "name":"xlink:href", + "value":".*" + } + ] + }, + "attr_to_replace":{ + "name":"src", + "value":null + } + } + }, + { + "preset_name":"attr_replacer", + "rule":{ + "tags":[ + "^p$" + ], + "condition":{ + "attrs":[ + { + "name":"style", + "value":"column-count: 2" + } + ] + }, + "attr_to_replace":{ + "name":"class", + "value":"columns2" + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^section$", + "^blockquote$", + "^article$", + "^figcaption$", + "^main$", + "^body$", + "^html$", + "^svg$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^p$" + ], + "condition":{ + "parent_tags":"li", + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^span$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"style", + "value":"(^background: #[\\da-fA-F]{6}$)|(^letter-spacing: -?[\\d.]+pt$)" + } + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^span$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"lang", + "value":"^ru-RU$" + } + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^span$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"face", + "value":"^Times New Roman[\\w, ]+$" + } + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^p$" + ], + "condition":{ + "parent_tags":":is(li)", + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^a$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"name", + "value":"_GoBack" + } + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^u$" + ], + "condition":{ + "parent_tags":":is(a)", + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^u$" + ], + "condition":{ + "parent_tags":null, + "child_tags":":is(a)", + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^b$" + ], + "condition":{ + "parent_tags":":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)", + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^div$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"inserter", + "rule":{ + "tags":[ + "^pre$" + ], + "condition":{ + "parent_tags":null, + "child_tags":":not(:has(code, kbd, var))", + "attrs":[ + + ], + "text":null + }, + "tag_to_insert":{ + "name":"code", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"inserter", + "rule":{ + "tags":[ + "^h[1-5]$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_insert":{ + "name":"strong", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"inserter", + "rule":{ + "tags":[ + "^p$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":"\\$\\$[\\s\\S]*?\\$\\$" + }, + "tag_to_insert":{ + "name":"span", + "attrs":[ + { + "name":"class", + "value":"math-tex" + } + ] + } + } + }, + { + "preset_name":"text_replacer", + "rule":{ + "tags":[ + "^p$" + ], + "condition":{ + "text":"(\\\\nonumber\\\\\\\\\\\\noalign{\\\\pagebreak}[\\\\s\\\\S]*?)\\\\" + }, + "text_to_replace":"\\\\" + } + }, + { + "preset_name":"text_replacer", + "rule":{ + "tags":[ + "^.*$" + ], + "condition":{ + "text":" " + }, + "text_to_replace":" " + } + }, + { + "preset_name":"text_replacer", + "rule":{ + "tags":[ + "^.*$" + ], + "condition":{ + "text":" " + }, + "text_to_replace":" " + } + } +] diff --git a/preset/old_docx_presets.json b/preset/old_docx_presets.json deleted file mode 100644 index 6d5613b..0000000 --- a/preset/old_docx_presets.json +++ /dev/null @@ -1,254 +0,0 @@ -[ - { - "preset_name": "wrapper", - "rules": [ - { - "tags": ["^div$"], - "condition": { - "parent_tags": null, - "child_tags": null, - "attrs": [ - { - "name": "id", - "value": "^Table of Contents\\d+" - } - ], - "text": null - }, - "tag_to_wrap": { - "name": "TOC", - "attrs": [] - } - } - ] - }, - { - "preset_name": "decomposer", - "rules": [ - { - "tags": ["^div$"], - "condition": { - "parent_tags": null, - "child_tags": null, - "attrs": [ - { - "name": "title", - "value": "footer" - } - ], - "text": null - } - }, - { - "tags": ["^div$"], - "condition": { - "parent_tags": null, - "child_tags": null, - "attrs": [ - { - "name": "id", - "value": "^Table of Contents\\d+" - } - ], - "text": null - } - } - ] - }, - { - "preset_name": "replacer", - "rules": [ - { - "tags": ["^h[6-9]$"], - "condition": null, - "tag_to_replace": { - "name": "p", - "attrs": null - } - }, - { - "tags": ["^div$"], - "condition": { - "parent_tags": null, - "child_tags": null, - "attrs": [ - { - "name": "style", - "value": "column-count: 2" - } - ], - "text": null - }, - "tag_to_replace": { - "name": "p", - "attrs": null - } - } - ] - }, - { - "preset_name": "attr_replacer", - "rules": [ - { - "tags": ["^p$"], - "condition": { - "attrs": [ - { - "name": "style", - "value": "column-count: 2" - } - ] - }, - "attr_to_replace": { - "name": "class", - "value": "columns2" - } - } - ] - }, - { - "preset_name": "unwrapper", - "rules": [ - { - "tags": ["^span$"], - "condition": { - "parent_tags": ":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)", - "child_tags": null, - "attrs": null, - "text": null - } - }, - { - "tags": ["^span$"], - "condition": { - "parent_tags": null, - "child_tags": null, - "attrs": [ - { - "name": "style", - "value": "(^background: #[\\da-fA-F]{6}$)|(^letter-spacing: -?[\\d.]+pt$)" - } - ], - "text": null - } - }, - { - "tags": ["^span$"], - "condition": { - "parent_tags": null, - "child_tags": null, - "attrs": [ - { - "name": "lang", - "value": "^ru-RU$" - } - ], - "text": null - } - }, - { - "tags": ["^span$"], - "condition": { - "parent_tags": null, - "child_tags": null, - "attrs": [ - { - "name": "face", - "value": "^Times New Roman[\\w, ]+$" - } - ], - "text": null - } - }, - { - "tags": ["^p$"], - "condition": { - "parent_tags": ":is(li)", - "child_tags": null, - "attrs": null, - "text": null - } - }, - { - "tags": ["^a$"], - "condition": { - "parent_tags": null, - "child_tags": null, - "attrs": [ - { - "name": "name", - "value": "_GoBack" - } - ], - "text": null - } - }, - { - "tags": ["^u$"], - "condition": { - "parent_tags": ":is(a)", - "child_tags": null, - "attrs": null, - "text": null - } - }, - { - "tags": ["^u$"], - "condition": { - "parent_tags": null, - "child_tags": ":is(a)", - "attrs": null, - "text": null - } - }, - { - "tags": ["^b$"], - "condition": { - "parent_tags": ":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)", - "child_tags": null, - "attrs": null, - "text": null - } - }, - { - "tags": ["^div$"], - "condition": null - } - ] - }, - { - "preset_name": "inserter", - "rules": [ - { - "tags": ["^p$"], - "condition": { - "parent_tags": null, - "child_tags": null, - "attrs": null, - "text": "\\$\\$[\\s\\S]*?\\$\\$" - }, - "tag_to_insert": { - "name": "span", - "attrs": [ - { - "name": "class", - "value": "math-tex" - } - ] - } - } - ] - }, - { - "preset_name": "text_replacer", - "rules": [ - { - "tags": ["^p$"], - "condition": { - "text": "(\\\\nonumber\\\\\\\\\\\\noalign{\\\\pagebreak}[\\s\\S]*?)\\\\" - }, - "text_to_replace": "\\\\" - } - ] - } -] diff --git a/preset/old_epub_presets.json b/preset/old_epub_presets.json deleted file mode 100644 index 81d5498..0000000 --- a/preset/old_epub_presets.json +++ /dev/null @@ -1,158 +0,0 @@ -[ - { - "preset_name": "table_wrapper", - "rules": [ - { - "tags": ["^section$", "^blockquote$"], - "condition": { - "parent_tags": null, - "child_tags": null, - "attrs": [ - { - "name": "class", - "value": "feature[1234]" - } - ] - } - } - ] - }, - { - "preset_name": "replacer", - "rules": [ - { - "tags": ["^h[6-9]$", "^figure$", "^section$", "^blockquote$"], - "condition": null, - "tag_to_replace": { - "name": "p" - } - }, - { - "tags": ["^aside$"], - "condition": null, - "tag_to_replace": { - "name": "div" - } - }, - { - "tags": ["^header$", "^footer$"], - "condition": null, - "tag_to_replace": { - "name": "span" - } - }, - { - "tags": ["^kbd$", "^var$"], - "condition": { - "parent_tags": ":not(pre, span)", - "child_tags": null, - "attrs": null - }, - "tag_to_replace": { - "name": "span" - } - }, - { - "tags": ["^em$"], - "condition": null, - "tag_to_replace": { - "name": "i" - } - }, - { - "tags": ["^b$"], - "condition": null, - "tag_to_replace": { - "name": "strong" - } - }, - { - "tags": ["^image$"], - "condition": null, - "tag_to_replace": { - "name": "img" - } - } - ] - }, - { - "preset_name": "attr_remover", - "rules": [ - { - "tags": ["^sup$"], - "condition": null - } - ] - }, - { - "preset_name": "attr_replacer", - "rules": [ - { - "tags": ["^img$"], - "condition": { - "attrs": [ - { - "name": "xlink:href", - "value": ".*" - } - ] - }, - "attr_to_replace": { - "name": "src", - "value": null - } - } - ] - }, - { - "preset_name": "unwrapper", - "rules": [ - { - "tags": [ - "^section$", - "^blockquote$", - "^article$", - "^figcaption$", - "^main$", - "^body$", - "^html$", - "^svg$" - ], - "condition": null - }, - { - "tags": ["^p$"], - "condition": { - "parent_tags": "li", - "child_tags": null, - "attrs": null - } - } - ] - }, - { - "preset_name": "inserter", - "rules": [ - { - "tags": ["^pre$"], - "condition": { - "parent_tags": null, - "child_tags": ":not(:has(code, kbd, var))", - "attrs": null - }, - "tag_to_insert": { - "name": "code", - "attrs": [] - } - }, - { - "tags": ["^h[1-5]$"], - "condition": null, - "tag_to_insert": { - "name":"strong", - "attrs": [] - } - } - ] - } -]