This commit is contained in:
Kiryl
2022-11-15 16:39:27 +03:00
parent d38455fca1
commit 8488ceb146
5 changed files with 550 additions and 0 deletions

3
preset/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
*
!.gitignore
!*.json

254
preset/docx_presets.json Normal file
View File

@@ -0,0 +1,254 @@
[
{
"preset_name": "wrapper",
"rules": [
{
"tags": ["^div$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "id",
"value": "^Table of Contents\\d+"
}
],
"text": null
},
"tag_to_wrap": {
"name": "TOC",
"attrs": []
}
}
]
},
{
"preset_name": "decomposer",
"rules": [
{
"tags": ["^div$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "title",
"value": "footer"
}
],
"text": null
}
},
{
"tags": ["^div$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "id",
"value": "^Table of Contents\\d+"
}
],
"text": null
}
}
]
},
{
"preset_name": "replacer",
"rules": [
{
"tags": ["^h[6-9]$"],
"condition": null,
"tag_to_replace": {
"name": "p",
"attrs": null
}
},
{
"tags": ["^div$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "style",
"value": "column-count: 2"
}
],
"text": null
},
"tag_to_replace": {
"name": "p",
"attrs": null
}
}
]
},
{
"preset_name": "attr_replacer",
"rules": [
{
"tags": ["^p$"],
"condition": {
"attrs": [
{
"name": "style",
"value": "column-count: 2"
}
]
},
"attr_to_replace": {
"name": "class",
"value": "columns2"
}
}
]
},
{
"preset_name": "unwrapper",
"rules": [
{
"tags": ["^span$"],
"condition": {
"parent_tags": ":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)",
"child_tags": null,
"attrs": null,
"text": null
}
},
{
"tags": ["^span$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "style",
"value": "(^background: #[\\da-fA-F]{6}$)|(^letter-spacing: -?[\\d.]+pt$)"
}
],
"text": null
}
},
{
"tags": ["^span$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "lang",
"value": "^ru-RU$"
}
],
"text": null
}
},
{
"tags": ["^span$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "face",
"value": "^Times New Roman[\\w, ]+$"
}
],
"text": null
}
},
{
"tags": ["^p$"],
"condition": {
"parent_tags": ":is(li)",
"child_tags": null,
"attrs": null,
"text": null
}
},
{
"tags": ["^a$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "name",
"value": "_GoBack"
}
],
"text": null
}
},
{
"tags": ["^u$"],
"condition": {
"parent_tags": ":is(a)",
"child_tags": null,
"attrs": null,
"text": null
}
},
{
"tags": ["^u$"],
"condition": {
"parent_tags": null,
"child_tags": ":is(a)",
"attrs": null,
"text": null
}
},
{
"tags": ["^b$"],
"condition": {
"parent_tags": ":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)",
"child_tags": null,
"attrs": null,
"text": null
}
},
{
"tags": ["^div$"],
"condition": null
}
]
},
{
"preset_name": "inserter",
"rules": [
{
"tags": ["^p$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": null,
"text": "\\$\\$[\\s\\S]*?\\$\\$"
},
"tag_to_insert": {
"name": "span",
"attrs": [
{
"name": "class",
"value": "math-tex"
}
]
}
}
]
},
{
"preset_name": "text_replacer",
"rules": [
{
"tags": ["^p$"],
"condition": {
"text": "(\\\\nonumber\\\\\\\\\\\\noalign{\\\\pagebreak}[\\s\\S]*?)\\\\"
},
"text_to_replace": "\\\\"
}
]
}
]

210
preset/epub_presets.json Normal file
View File

@@ -0,0 +1,210 @@
[
{
"preset_name": "table_wrapper",
"rules": [
{
"tags": ["^div$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "width",
"value": ".*"
}
]
}
},
{
"tags": ["^div$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "border",
"value": ".*"
}
]
}
},
{
"tags": ["^div$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "style",
"value": "border.*"
}
]
}
},
{
"tags": ["^div$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "bgcolor",
"value": ".*"
}
]
}
},
{
"tags": ["^section$", "^blockquote$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": [
{
"name": "class",
"value": "feature[1234]"
}
]
}
}
]
},
{
"preset_name": "replacer",
"rules": [
{
"tags": ["^h[6-9]$", "^figure$", "^section$", "^div$", "blockquote"],
"condition": null,
"tag_to_replace": {
"name": "p"
}
},
{
"tags": ["^aside$"],
"condition": null,
"tag_to_replace": {
"name": "div"
}
},
{
"tags": ["^header$", "^footer$"],
"condition": null,
"tag_to_replace": {
"name": "span"
}
},
{
"tags": ["^code$", "^kbd$", "^var$"],
"condition": {
"parent_tags": ":not(pre, span)",
"child_tags": null,
"attrs": null
},
"tag_to_replace": {
"name": "span"
}
},
{
"tags": ["^em$"],
"condition": null,
"tag_to_replace": {
"name": "i"
}
},
{
"tags": ["^b$"],
"condition": null,
"tag_to_replace": {
"name": "strong"
}
},
{
"tags": ["^image$"],
"condition": null,
"tag_to_replace": {
"name": "img"
}
}
]
},
{
"preset_name": "attrs_remover",
"rules": [
{
"tags": ["^sup$"],
"condition": null
}
]
},
{
"preset_name": "attr_replacer",
"rules": [
{
"tags": ["^img$"],
"condition": {
"attrs": [
{
"name": "xlink:href",
"value": ".*"
}
]
},
"attr_to_replace": {
"name": "src",
"value": null
}
}
]
},
{
"preset_name": "unwrapper",
"rules": [
{
"tags": [
"^section$",
"^blockquote$",
"^article$",
"^figcaption$",
"^main$",
"^body$",
"^html$",
"^svg$"
],
"condition": null
},
{
"tags": ["^p$"],
"condition": {
"parent_tags": "li",
"child_tags": null,
"attrs": null
}
}
]
},
{
"preset_name": "inserter",
"rules": [
{
"tags": ["^pre$"],
"condition": {
"parent_tags": null,
"child_tags": ":not(:has(code, kbd, var))",
"attrs": null
},
"tag_to_insert": {
"name": "code",
"attrs": []
}
},
{
"tags": ["^h[1-5]$"],
"condition": null,
"tag_to_insert": {
"name":"strong",
"attrs": []
}
}
]
}
]