From 8488ceb146d5eef8a29eb294a49ada762114938a Mon Sep 17 00:00:00 2001 From: Kiryl Date: Tue, 15 Nov 2022 16:39:27 +0300 Subject: [PATCH] folders --- configs/.gitignore | 2 + documentation/style_config | 81 ++++++++++++ preset/.gitignore | 3 + preset/docx_presets.json | 254 +++++++++++++++++++++++++++++++++++++ preset/epub_presets.json | 210 ++++++++++++++++++++++++++++++ 5 files changed, 550 insertions(+) create mode 100644 configs/.gitignore create mode 100644 documentation/style_config create mode 100644 preset/.gitignore create mode 100644 preset/docx_presets.json create mode 100644 preset/epub_presets.json diff --git a/configs/.gitignore b/configs/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/configs/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/documentation/style_config b/documentation/style_config new file mode 100644 index 0000000..10fcb42 --- /dev/null +++ b/documentation/style_config @@ -0,0 +1,81 @@ +config.allowedContent = { + sup: { + attributes: ['*'], + classes: ['*'] + }, + table: { + attributes: ['*'], + styles: ['*'] + }, + tr: { + attributes: ['*'], + styles: ['*'] + }, + th: { + attributes: ['*'], + classes: ['p-indent'], + styles: ['*'] + }, + td: { + attributes: ['*'], + classes: ['p-indent'], + styles: ['*'] + }, + tbody: { + attributes: ['*'], + styles: ['*'] + }, + thead: { + attributes: ['*'], + styles: ['*'] + }, + caption : {}, + img : { + attributes: ['*'], + classes: ['*'], + styles: ['*'] + }, + code : { + attributes: ['*'], + classes: ['*'], + styles: ['*'] + }, + pre : { + attributes: ['*'], + classes: ['*'], + styles: ['*'] + }, + p : { + styles: ['text-align', 'text-indent', 'border-bottom', 'border-top'], + classes: ['*'] + }, + strong : {}, + i : {}, + s : {}, + u : {}, + ul : {}, + ol : {}, + li : { + styles: ['text-align'] + }, + blockquote : {}, + span : { + attributes: ['*'], + classes: ['*'], + styles: ['*'] + }, + a : { + attributes: ['href', 'data-anchor-id', 'data-chapter-id', 'placeholder'], + classes: ['link-to-anchor'], + }, + iframe : { + attributes: ['*'], + classes: ['*'], + styles: ['*'] + }, + div : { + attributes: ['*'], + classes: ['youtube-embed-wrapper'], + styles: ['*'] + } + }; \ No newline at end of file diff --git a/preset/.gitignore b/preset/.gitignore new file mode 100644 index 0000000..c3bf4b1 --- /dev/null +++ b/preset/.gitignore @@ -0,0 +1,3 @@ +* +!.gitignore +!*.json \ No newline at end of file diff --git a/preset/docx_presets.json b/preset/docx_presets.json new file mode 100644 index 0000000..6d5613b --- /dev/null +++ b/preset/docx_presets.json @@ -0,0 +1,254 @@ +[ + { + "preset_name": "wrapper", + "rules": [ + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "id", + "value": "^Table of Contents\\d+" + } + ], + "text": null + }, + "tag_to_wrap": { + "name": "TOC", + "attrs": [] + } + } + ] + }, + { + "preset_name": "decomposer", + "rules": [ + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "title", + "value": "footer" + } + ], + "text": null + } + }, + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "id", + "value": "^Table of Contents\\d+" + } + ], + "text": null + } + } + ] + }, + { + "preset_name": "replacer", + "rules": [ + { + "tags": ["^h[6-9]$"], + "condition": null, + "tag_to_replace": { + "name": "p", + "attrs": null + } + }, + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "style", + "value": "column-count: 2" + } + ], + "text": null + }, + "tag_to_replace": { + "name": "p", + "attrs": null + } + } + ] + }, + { + "preset_name": "attr_replacer", + "rules": [ + { + "tags": ["^p$"], + "condition": { + "attrs": [ + { + "name": "style", + "value": "column-count: 2" + } + ] + }, + "attr_to_replace": { + "name": "class", + "value": "columns2" + } + } + ] + }, + { + "preset_name": "unwrapper", + "rules": [ + { + "tags": ["^span$"], + "condition": { + "parent_tags": ":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)", + "child_tags": null, + "attrs": null, + "text": null + } + }, + { + "tags": ["^span$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "style", + "value": "(^background: #[\\da-fA-F]{6}$)|(^letter-spacing: -?[\\d.]+pt$)" + } + ], + "text": null + } + }, + { + "tags": ["^span$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "lang", + "value": "^ru-RU$" + } + ], + "text": null + } + }, + { + "tags": ["^span$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "face", + "value": "^Times New Roman[\\w, ]+$" + } + ], + "text": null + } + }, + { + "tags": ["^p$"], + "condition": { + "parent_tags": ":is(li)", + "child_tags": null, + "attrs": null, + "text": null + } + }, + { + "tags": ["^a$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "name", + "value": "_GoBack" + } + ], + "text": null + } + }, + { + "tags": ["^u$"], + "condition": { + "parent_tags": ":is(a)", + "child_tags": null, + "attrs": null, + "text": null + } + }, + { + "tags": ["^u$"], + "condition": { + "parent_tags": null, + "child_tags": ":is(a)", + "attrs": null, + "text": null + } + }, + { + "tags": ["^b$"], + "condition": { + "parent_tags": ":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)", + "child_tags": null, + "attrs": null, + "text": null + } + }, + { + "tags": ["^div$"], + "condition": null + } + ] + }, + { + "preset_name": "inserter", + "rules": [ + { + "tags": ["^p$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": null, + "text": "\\$\\$[\\s\\S]*?\\$\\$" + }, + "tag_to_insert": { + "name": "span", + "attrs": [ + { + "name": "class", + "value": "math-tex" + } + ] + } + } + ] + }, + { + "preset_name": "text_replacer", + "rules": [ + { + "tags": ["^p$"], + "condition": { + "text": "(\\\\nonumber\\\\\\\\\\\\noalign{\\\\pagebreak}[\\s\\S]*?)\\\\" + }, + "text_to_replace": "\\\\" + } + ] + } +] diff --git a/preset/epub_presets.json b/preset/epub_presets.json new file mode 100644 index 0000000..d30e619 --- /dev/null +++ b/preset/epub_presets.json @@ -0,0 +1,210 @@ +[ + { + "preset_name": "table_wrapper", + "rules": [ + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "width", + "value": ".*" + } + ] + } + }, + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "border", + "value": ".*" + } + ] + } + }, + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "style", + "value": "border.*" + } + ] + } + }, + { + "tags": ["^div$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "bgcolor", + "value": ".*" + } + ] + } + }, + { + "tags": ["^section$", "^blockquote$"], + "condition": { + "parent_tags": null, + "child_tags": null, + "attrs": [ + { + "name": "class", + "value": "feature[1234]" + } + ] + } + } + ] + }, + { + "preset_name": "replacer", + "rules": [ + { + "tags": ["^h[6-9]$", "^figure$", "^section$", "^div$", "blockquote"], + "condition": null, + "tag_to_replace": { + "name": "p" + } + }, + { + "tags": ["^aside$"], + "condition": null, + "tag_to_replace": { + "name": "div" + } + }, + { + "tags": ["^header$", "^footer$"], + "condition": null, + "tag_to_replace": { + "name": "span" + } + }, + { + "tags": ["^code$", "^kbd$", "^var$"], + "condition": { + "parent_tags": ":not(pre, span)", + "child_tags": null, + "attrs": null + }, + "tag_to_replace": { + "name": "span" + } + }, + { + "tags": ["^em$"], + "condition": null, + "tag_to_replace": { + "name": "i" + } + }, + { + "tags": ["^b$"], + "condition": null, + "tag_to_replace": { + "name": "strong" + } + }, + { + "tags": ["^image$"], + "condition": null, + "tag_to_replace": { + "name": "img" + } + } + ] + }, + { + "preset_name": "attrs_remover", + "rules": [ + { + "tags": ["^sup$"], + "condition": null + } + ] + }, + { + "preset_name": "attr_replacer", + "rules": [ + { + "tags": ["^img$"], + "condition": { + "attrs": [ + { + "name": "xlink:href", + "value": ".*" + } + ] + }, + "attr_to_replace": { + "name": "src", + "value": null + } + } + ] + }, + { + "preset_name": "unwrapper", + "rules": [ + { + "tags": [ + "^section$", + "^blockquote$", + "^article$", + "^figcaption$", + "^main$", + "^body$", + "^html$", + "^svg$" + ], + "condition": null + }, + { + "tags": ["^p$"], + "condition": { + "parent_tags": "li", + "child_tags": null, + "attrs": null + } + } + ] + }, + { + "preset_name": "inserter", + "rules": [ + { + "tags": ["^pre$"], + "condition": { + "parent_tags": null, + "child_tags": ":not(:has(code, kbd, var))", + "attrs": null + }, + "tag_to_insert": { + "name": "code", + "attrs": [] + } + }, + { + "tags": ["^h[1-5]$"], + "condition": null, + "tag_to_insert": { + "name":"strong", + "attrs": [] + } + } + ] + } +]