diff --git a/preset/default_preset.json b/preset/default_preset.json new file mode 100644 index 0000000..234fd32 --- /dev/null +++ b/preset/default_preset.json @@ -0,0 +1,606 @@ +[ + { + "preset_name":"table_wrapper", + "rule":{ + "tags":[ + "^section$", + "^blockquote$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"class", + "value":"feature[1234]" + } + ], + "text":null + } + } + }, + { + "preset_name":"wrapper", + "rule":{ + "tags":[ + "^div$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"id", + "value":"^Table of Contents\\d+" + } + ], + "text":null + }, + "tag_to_wrap":{ + "name":"TOC", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"decomposer", + "rule":{ + "tags":[ + "^div$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"title", + "value":"footer" + } + ], + "text":null + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^h[6-9]$", + "^figure$", + "^section$", + "^blockquote$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"p", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^aside$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"div", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^div$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"style", + "value":"column-count: 2" + } + ], + "text":null + }, + "tag_to_replace":{ + "name":"p", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^header$", + "^footer$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"span", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^kbd$", + "^var$" + ], + "condition":{ + "parent_tags":":not(pre, span)", + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"span", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^em$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"i", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^b$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"strong", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"replacer", + "rule":{ + "tags":[ + "^image$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_replace":{ + "name":"img", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"attr_remover", + "rule":{ + "tags":[ + "^sup$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"attr_replacer", + "rule":{ + "tags":[ + "^img$" + ], + "condition":{ + "attrs":[ + { + "name":"xlink:href", + "value":".*" + } + ] + }, + "attr_to_replace":{ + "name":"src", + "value":null + } + } + }, + { + "preset_name":"attr_replacer", + "rule":{ + "tags":[ + "^p$" + ], + "condition":{ + "attrs":[ + { + "name":"style", + "value":"column-count: 2" + } + ] + }, + "attr_to_replace":{ + "name":"class", + "value":"columns2" + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^section$", + "^blockquote$", + "^article$", + "^figcaption$", + "^main$", + "^body$", + "^html$", + "^svg$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^p$" + ], + "condition":{ + "parent_tags":"li", + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^span$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"style", + "value":"(^background: #[\\da-fA-F]{6}$)|(^letter-spacing: -?[\\d.]+pt$)" + } + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^span$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"lang", + "value":"^ru-RU$" + } + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^span$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"face", + "value":"^Times New Roman[\\w, ]+$" + } + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^p$" + ], + "condition":{ + "parent_tags":":is(li)", + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^a$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + { + "name":"name", + "value":"_GoBack" + } + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^u$" + ], + "condition":{ + "parent_tags":":is(a)", + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^u$" + ], + "condition":{ + "parent_tags":null, + "child_tags":":is(a)", + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^b$" + ], + "condition":{ + "parent_tags":":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)", + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"unwrapper", + "rule":{ + "tags":[ + "^div$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + } + } + }, + { + "preset_name":"inserter", + "rule":{ + "tags":[ + "^pre$" + ], + "condition":{ + "parent_tags":null, + "child_tags":":not(:has(code, kbd, var))", + "attrs":[ + + ], + "text":null + }, + "tag_to_insert":{ + "name":"code", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"inserter", + "rule":{ + "tags":[ + "^h[1-5]$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":null + }, + "tag_to_insert":{ + "name":"strong", + "attrs":[ + + ] + } + } + }, + { + "preset_name":"inserter", + "rule":{ + "tags":[ + "^p$" + ], + "condition":{ + "parent_tags":null, + "child_tags":null, + "attrs":[ + + ], + "text":"\\$\\$[\\s\\S]*?\\$\\$" + }, + "tag_to_insert":{ + "name":"span", + "attrs":[ + { + "name":"class", + "value":"math-tex" + } + ] + } + } + }, + { + "preset_name":"text_replacer", + "rule":{ + "tags":[ + "^p$" + ], + "condition":{ + "text":"(\\\\nonumber\\\\\\\\\\\\noalign{\\\\pagebreak}[\\\\s\\\\S]*?)\\\\" + }, + "text_to_replace":"\\\\" + } + }, + { + "preset_name":"text_replacer", + "rule":{ + "tags":[ + "^.*$" + ], + "condition":{ + "text":" " + }, + "text_to_replace":" " + } + }, + { + "preset_name":"text_replacer", + "rule":{ + "tags":[ + "^.*$" + ], + "condition":{ + "text":" " + }, + "text_to_replace":" " + } + } +]