docx presets improves

This commit is contained in:
Kiryl
2022-10-13 11:05:32 +03:00
parent a08f210d4d
commit 5a57bf4d32
2 changed files with 72 additions and 18 deletions

View File

@@ -12,9 +12,13 @@
"name": "id", "name": "id",
"value": "^Table of Contents\\d+" "value": "^Table of Contents\\d+"
} }
] ],
"text": null
}, },
"tag_to_wrap": "TOC" "tag_to_wrap": {
"name": "TOC",
"attrs": []
}
} }
] ]
}, },
@@ -35,7 +39,8 @@
"name": "id", "name": "id",
"value": "^Table of Contents\\d+" "value": "^Table of Contents\\d+"
} }
] ],
"text": null
} }
} }
] ]
@@ -46,7 +51,10 @@
{ {
"tags": ["^h[6-9]$"], "tags": ["^h[6-9]$"],
"condition": null, "condition": null,
"tag_to_replace": "p" "tag_to_replace": {
"name": "p",
"attrs": null
}
}, },
{ {
"tags": ["^div$"], "tags": ["^div$"],
@@ -58,9 +66,13 @@
"name": "style", "name": "style",
"value": "column-count: 2" "value": "column-count: 2"
} }
] ],
"text": null
}, },
"tag_to_replace": "p" "tag_to_replace": {
"name": "p",
"attrs": null
}
} }
] ]
}, },
@@ -68,12 +80,14 @@
"preset_name": "attr_replacer", "preset_name": "attr_replacer",
"rules": [ "rules": [
{ {
"attr": { "tags": ["^p$"],
"name": "style",
"value": "column-count: 2"
},
"condition": { "condition": {
"tags": ["^p$"] "attrs": [
{
"name": "style",
"value": "column-count: 2"
}
]
}, },
"attr_to_replace": { "attr_to_replace": {
"name": "class", "name": "class",
@@ -103,7 +117,8 @@
"name": "face", "name": "face",
"value": "^Times New Roman[\\w, ]+$" "value": "^Times New Roman[\\w, ]+$"
} }
] ],
"text": null
} }
}, },
{ {
@@ -111,7 +126,8 @@
"condition": { "condition": {
"parent_tags": ":is(li)", "parent_tags": ":is(li)",
"child_tags": null, "child_tags": null,
"attrs": null "attrs": null,
"text": null
} }
}, },
{ {
@@ -124,7 +140,8 @@
"name": "name", "name": "name",
"value": "_GoBack" "value": "_GoBack"
} }
] ],
"text": null
} }
}, },
{ {
@@ -132,7 +149,8 @@
"condition": { "condition": {
"parent_tags": ":is(a)", "parent_tags": ":is(a)",
"child_tags": ":is(a)", "child_tags": ":is(a)",
"attrs": null "attrs": null,
"text": null
} }
}, },
{ {
@@ -140,7 +158,8 @@
"condition": { "condition": {
"parent_tags": ":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)", "parent_tags": ":is(h1, h2, h3, h4, h5, h6, h7, h8, h9)",
"child_tags": null, "child_tags": null,
"attrs": null "attrs": null,
"text": null
} }
}, },
{ {
@@ -148,5 +167,40 @@
"condition": null "condition": null
} }
] ]
},
{
"preset_name": "inserter",
"rules": [
{
"tags": ["^p$"],
"condition": {
"parent_tags": null,
"child_tags": null,
"attrs": null,
"text": "\\$\\$[\\s\\S]*?\\$\\$"
},
"tag_to_insert": {
"name": "span",
"attrs": [
{
"name": "class",
"value": "math-tex"
}
]
}
}
]
},
{
"preset_name": "text_replacer",
"rules": [
{
"tags": ["^p$"],
"condition": {
"text": "(\\\\nonumber\\\\\\\\\\\\noalign{\\\\pagebreak}[\\s\\S]*?)\\\\"
},
"text_to_replace": "\\\\"
}
]
} }
] ]

View File

@@ -80,7 +80,8 @@ class DocxBook(BookSolver):
if __name__ == "__main__": if __name__ == "__main__":
docx_file_path = "../../books/docx/AmericanGovernment3e-WEB.docx"
docx_file_path = "../../books/docx/output.docx"
logger_object = BookLogger( logger_object = BookLogger(
name="docx", book_id=docx_file_path.split("/")[-1]) name="docx", book_id=docx_file_path.split("/")[-1])
locker = Event() locker = Event()
@@ -88,7 +89,6 @@ if __name__ == "__main__":
html_converter = Docx2LibreHtml(file_path=docx_file_path, html_converter = Docx2LibreHtml(file_path=docx_file_path,
logger=logger_object, libre_locker=locker) logger=logger_object, libre_locker=locker)
html_preprocessor = HtmlPresetsProcessor( html_preprocessor = HtmlPresetsProcessor(
logger=logger_object, preset_path="../../presets/docx_presets.json") logger=logger_object, preset_path="../../presets/docx_presets.json")
style_preprocessor = StyleReader() style_preprocessor = StyleReader()