Merge pull request #381 from Teqniksoft/kiryl/converter_fix

Kiryl/converter fix
This commit is contained in:
Kiryl
2023-06-09 14:26:26 +03:00
committed by GitHub
3 changed files with 606 additions and 412 deletions

606
preset/default_preset.json Normal file
View File

@@ -0,0 +1,606 @@
[
{
"preset_name":"table_wrapper",
"rule":{
"tags":[
"^section$",
"^blockquote$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
{
"name":"class",
"value":"feature[1234]"
}
],
"text":null
}
}
},
{
"preset_name":"wrapper",
"rule":{
"tags":[
"^div$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
{
"name":"id",
"value":"^Table of Contents\\d+"
}
],
"text":null
},
"tag_to_wrap":{
"name":"TOC",
"attrs":[
]
}
}
},
{
"preset_name":"decomposer",
"rule":{
"tags":[
"^div$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
{
"name":"title",
"value":"footer"
}
],
"text":null
}
}
},
{
"preset_name":"replacer",
"rule":{
"tags":[
"^h[6-9]$",
"^figure$",
"^section$",
"^blockquote$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
],
"text":null
},
"tag_to_replace":{
"name":"p",
"attrs":[
]
}
}
},
{
"preset_name":"replacer",
"rule":{
"tags":[
"^aside$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
],
"text":null
},
"tag_to_replace":{
"name":"div",
"attrs":[
]
}
}
},
{
"preset_name":"replacer",
"rule":{
"tags":[
"^div$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
{
"name":"style",
"value":"column-count: 2"
}
],
"text":null
},
"tag_to_replace":{
"name":"p",
"attrs":[
]
}
}
},
{
"preset_name":"replacer",
"rule":{
"tags":[
"^header$",
"^footer$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
],
"text":null
},
"tag_to_replace":{
"name":"span",
"attrs":[
]
}
}
},
{
"preset_name":"replacer",
"rule":{
"tags":[
"^kbd$",
"^var$"
],
"condition":{
"parent_tags":":not(pre, span)",
"child_tags":null,
"attrs":[
],
"text":null
},
"tag_to_replace":{
"name":"span",
"attrs":[
]
}
}
},
{
"preset_name":"replacer",
"rule":{
"tags":[
"^em$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
],
"text":null
},
"tag_to_replace":{
"name":"i",
"attrs":[
]
}
}
},
{
"preset_name":"replacer",
"rule":{
"tags":[
"^b$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
],
"text":null
},
"tag_to_replace":{
"name":"strong",
"attrs":[
]
}
}
},
{
"preset_name":"replacer",
"rule":{
"tags":[
"^image$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
],
"text":null
},
"tag_to_replace":{
"name":"img",
"attrs":[
]
}
}
},
{
"preset_name":"attr_remover",
"rule":{
"tags":[
"^sup$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
],
"text":null
}
}
},
{
"preset_name":"attr_replacer",
"rule":{
"tags":[
"^img$"
],
"condition":{
"attrs":[
{
"name":"xlink:href",
"value":".*"
}
]
},
"attr_to_replace":{
"name":"src",
"value":null
}
}
},
{
"preset_name":"attr_replacer",
"rule":{
"tags":[
"^p$"
],
"condition":{
"attrs":[
{
"name":"style",
"value":"column-count: 2"
}
]
},
"attr_to_replace":{
"name":"class",
"value":"columns2"
}
}
},
{
"preset_name":"unwrapper",
"rule":{
"tags":[
"^section$",
"^blockquote$",
"^article$",
"^figcaption$",
"^main$",
"^body$",
"^html$",
"^svg$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
],
"text":null
}
}
},
{
"preset_name":"unwrapper",
"rule":{
"tags":[
"^p$"
],
"condition":{
"parent_tags":"li",
"child_tags":null,
"attrs":[
],
"text":null
}
}
},
{
"preset_name":"unwrapper",
"rule":{
"tags":[
"^span$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
{
"name":"style",
"value":"(^background: #[\\da-fA-F]{6}$)|(^letter-spacing: -?[\\d.]+pt$)"
}
],
"text":null
}
}
},
{
"preset_name":"unwrapper",
"rule":{
"tags":[
"^span$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
{
"name":"lang",
"value":"^ru-RU$"
}
],
"text":null
}
}
},
{
"preset_name":"unwrapper",
"rule":{
"tags":[
"^span$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
{
"name":"face",
"value":"^Times New Roman[\\w, ]+$"
}
],
"text":null
}
}
},
{
"preset_name":"unwrapper",
"rule":{
"tags":[
"^p$"
],
"condition":{
"parent_tags":":is(li)",
"child_tags":null,
"attrs":[
],
"text":null
}
}
},
{
"preset_name":"unwrapper",
"rule":{
"tags":[
"^a$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
{
"name":"name",
"value":"_GoBack"
}
],
"text":null
}
}
},
{
"preset_name":"unwrapper",
"rule":{
"tags":[
"^u$"
],
"condition":{
"parent_tags":":is(a)",
"child_tags":null,
"attrs":[
],
"text":null
}
}
},
{
"preset_name":"unwrapper",
"rule":{
"tags":[
"^u$"
],
"condition":{
"parent_tags":null,
"child_tags":":is(a)",
"attrs":[
],
"text":null
}
}
},
{
"preset_name":"unwrapper",
"rule":{
"tags":[
"^b$"
],
"condition":{
"parent_tags":":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)",
"child_tags":null,
"attrs":[
],
"text":null
}
}
},
{
"preset_name":"unwrapper",
"rule":{
"tags":[
"^div$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
],
"text":null
}
}
},
{
"preset_name":"inserter",
"rule":{
"tags":[
"^pre$"
],
"condition":{
"parent_tags":null,
"child_tags":":not(:has(code, kbd, var))",
"attrs":[
],
"text":null
},
"tag_to_insert":{
"name":"code",
"attrs":[
]
}
}
},
{
"preset_name":"inserter",
"rule":{
"tags":[
"^h[1-5]$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
],
"text":null
},
"tag_to_insert":{
"name":"strong",
"attrs":[
]
}
}
},
{
"preset_name":"inserter",
"rule":{
"tags":[
"^p$"
],
"condition":{
"parent_tags":null,
"child_tags":null,
"attrs":[
],
"text":"\\$\\$[\\s\\S]*?\\$\\$"
},
"tag_to_insert":{
"name":"span",
"attrs":[
{
"name":"class",
"value":"math-tex"
}
]
}
}
},
{
"preset_name":"text_replacer",
"rule":{
"tags":[
"^p$"
],
"condition":{
"text":"(\\\\nonumber\\\\\\\\\\\\noalign{\\\\pagebreak}[\\\\s\\\\S]*?)\\\\"
},
"text_to_replace":"\\\\"
}
},
{
"preset_name":"text_replacer",
"rule":{
"tags":[
"^.*$"
],
"condition":{
"text":" "
},
"text_to_replace":" "
}
},
{
"preset_name":"text_replacer",
"rule":{
"tags":[
"^.*$"
],
"condition":{
"text":" "
},
"text_to_replace":" "
}
}
]

View File

@@ -1,254 +0,0 @@
[
{
"preset_name": "wrapper",
"rules": [
{
"tags": ["^div$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "id",
"value": "^Table of Contents\\d+"
}
],
"text": null
},
"tag_to_wrap": {
"name": "TOC",
"attrs": []
}
}
]
},
{
"preset_name": "decomposer",
"rules": [
{
"tags": ["^div$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "title",
"value": "footer"
}
],
"text": null
}
},
{
"tags": ["^div$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "id",
"value": "^Table of Contents\\d+"
}
],
"text": null
}
}
]
},
{
"preset_name": "replacer",
"rules": [
{
"tags": ["^h[6-9]$"],
"condition": null,
"tag_to_replace": {
"name": "p",
"attrs": null
}
},
{
"tags": ["^div$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "style",
"value": "column-count: 2"
}
],
"text": null
},
"tag_to_replace": {
"name": "p",
"attrs": null
}
}
]
},
{
"preset_name": "attr_replacer",
"rules": [
{
"tags": ["^p$"],
"condition": {
"attrs": [
{
"name": "style",
"value": "column-count: 2"
}
]
},
"attr_to_replace": {
"name": "class",
"value": "columns2"
}
}
]
},
{
"preset_name": "unwrapper",
"rules": [
{
"tags": ["^span$"],
"condition": {
"parent_tags": ":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)",
"child_tags": null,
"attrs": null,
"text": null
}
},
{
"tags": ["^span$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "style",
"value": "(^background: #[\\da-fA-F]{6}$)|(^letter-spacing: -?[\\d.]+pt$)"
}
],
"text": null
}
},
{
"tags": ["^span$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "lang",
"value": "^ru-RU$"
}
],
"text": null
}
},
{
"tags": ["^span$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "face",
"value": "^Times New Roman[\\w, ]+$"
}
],
"text": null
}
},
{
"tags": ["^p$"],
"condition": {
"parent_tags": ":is(li)",
"child_tags": null,
"attrs": null,
"text": null
}
},
{
"tags": ["^a$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "name",
"value": "_GoBack"
}
],
"text": null
}
},
{
"tags": ["^u$"],
"condition": {
"parent_tags": ":is(a)",
"child_tags": null,
"attrs": null,
"text": null
}
},
{
"tags": ["^u$"],
"condition": {
"parent_tags": null,
"child_tags": ":is(a)",
"attrs": null,
"text": null
}
},
{
"tags": ["^b$"],
"condition": {
"parent_tags": ":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)",
"child_tags": null,
"attrs": null,
"text": null
}
},
{
"tags": ["^div$"],
"condition": null
}
]
},
{
"preset_name": "inserter",
"rules": [
{
"tags": ["^p$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": null,
"text": "\\$\\$[\\s\\S]*?\\$\\$"
},
"tag_to_insert": {
"name": "span",
"attrs": [
{
"name": "class",
"value": "math-tex"
}
]
}
}
]
},
{
"preset_name": "text_replacer",
"rules": [
{
"tags": ["^p$"],
"condition": {
"text": "(\\\\nonumber\\\\\\\\\\\\noalign{\\\\pagebreak}[\\s\\S]*?)\\\\"
},
"text_to_replace": "\\\\"
}
]
}
]

View File

@@ -1,158 +0,0 @@
[
{
"preset_name": "table_wrapper",
"rules": [
{
"tags": ["^section$", "^blockquote$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "class",
"value": "feature[1234]"
}
]
}
}
]
},
{
"preset_name": "replacer",
"rules": [
{
"tags": ["^h[6-9]$", "^figure$", "^section$", "^blockquote$"],
"condition": null,
"tag_to_replace": {
"name": "p"
}
},
{
"tags": ["^aside$"],
"condition": null,
"tag_to_replace": {
"name": "div"
}
},
{
"tags": ["^header$", "^footer$"],
"condition": null,
"tag_to_replace": {
"name": "span"
}
},
{
"tags": ["^kbd$", "^var$"],
"condition": {
"parent_tags": ":not(pre, span)",
"child_tags": null,
"attrs": null
},
"tag_to_replace": {
"name": "span"
}
},
{
"tags": ["^em$"],
"condition": null,
"tag_to_replace": {
"name": "i"
}
},
{
"tags": ["^b$"],
"condition": null,
"tag_to_replace": {
"name": "strong"
}
},
{
"tags": ["^image$"],
"condition": null,
"tag_to_replace": {
"name": "img"
}
}
]
},
{
"preset_name": "attr_remover",
"rules": [
{
"tags": ["^sup$"],
"condition": null
}
]
},
{
"preset_name": "attr_replacer",
"rules": [
{
"tags": ["^img$"],
"condition": {
"attrs": [
{
"name": "xlink:href",
"value": ".*"
}
]
},
"attr_to_replace": {
"name": "src",
"value": null
}
}
]
},
{
"preset_name": "unwrapper",
"rules": [
{
"tags": [
"^section$",
"^blockquote$",
"^article$",
"^figcaption$",
"^main$",
"^body$",
"^html$",
"^svg$"
],
"condition": null
},
{
"tags": ["^p$"],
"condition": {
"parent_tags": "li",
"child_tags": null,
"attrs": null
}
}
]
},
{
"preset_name": "inserter",
"rules": [
{
"tags": ["^pre$"],
"condition": {
"parent_tags": null,
"child_tags": ":not(:has(code, kbd, var))",
"attrs": null
},
"tag_to_insert": {
"name": "code",
"attrs": []
}
},
{
"tags": ["^h[1-5]$"],
"condition": null,
"tag_to_insert": {
"name":"strong",
"attrs": []
}
}
]
}
]