Formatting: documentation + optimization

This commit is contained in:
Kiryl
2022-06-06 16:37:42 +03:00
parent 002316f086
commit acb2ce48c2
3 changed files with 33 additions and 43 deletions

View File

@@ -116,8 +116,8 @@ class HTMLDocxPreprocessor:
if face is not None:
face = re.sub(r",[\w,\- ]*$", "", face)
if face != LiveCartaConfig.DEFAULT_FONT_NAME and LiveCartaConfig.font_correspondence_table.get(face):
font.attrs["face"] = LiveCartaConfig.font_correspondence_table[face]
if face != LiveCartaConfig.DEFAULT_FONT_NAME and LiveCartaConfig.FONT_CORRESPONDANCE_TABLE.get(face):
font.attrs["face"] = LiveCartaConfig.FONT_CORRESPONDANCE_TABLE[face]
else:
font.attrs["face"] = LiveCartaConfig.DEFAULT_FONT_NAME
@@ -137,11 +137,11 @@ class HTMLDocxPreprocessor:
def clean_trash(self):
"""Function to remove all styles and tags we don't need."""
self._clean_tag('span', 'style', re.compile(
r'^background: #[0-9a-fA-F]{6}$'))
r'^background: #[\da-fA-F]{6}$'))
# todo: check for another languages
self._clean_tag('span', 'lang', re.compile(r'^ru-RU$'))
self._clean_tag('span', 'style', re.compile(
'^letter-spacing: -?[\d\.]+pt$'))
'^letter-spacing: -?[\d.]+pt$'))
self._clean_tag('font', 'face', re.compile(
r'^Times New Roman[\w, ]+$'))
@@ -179,13 +179,13 @@ class HTMLDocxPreprocessor:
style = p.get('style')
if style:
indent = re.search(r'text-indent: ([\d\.]{1,4})in', style)
margin_left = re.search(r'margin-left: ([\d\.]{1,4})in', style)
indent = re.search(r'text-indent: ([\d.]{1,4})in', style)
margin_left = re.search(r'margin-left: ([\d.]{1,4})in', style)
margin_right = re.search(
r'margin-right: ([\d\.]{1,4})in', style)
margin_top = re.search(r'margin-top: ([\d\.]{1,4})in', style)
r'margin-right: ([\d.]{1,4})in', style)
margin_top = re.search(r'margin-top: ([\d.]{1,4})in', style)
margin_bottom = re.search(
r'margin-bottom: ([\d\.]{1,4})in', style)
r'margin-bottom: ([\d.]{1,4})in', style)
else:
indent = None
margin_left = None
@@ -517,7 +517,7 @@ class HTMLDocxPreprocessor:
Function for gathering info about top-level chapters.
Assume:
- Headers with smallest outline(or digit in <h>) are top level chapters.
- Headers with the smallest outline(or digit in <h>) are top level chapters.
[ It is consistent with a recursive algorithm
for saving content to a resulted json structure,
which happens in header_to_json()]
@@ -560,7 +560,7 @@ class HTMLDocxPreprocessor:
Assume header(s) to be introduction if:
1. one header not numbered, before 1 numbered header
2. it is first header from the top level list and it equals to 'introductio
2. it is first header from the top level list, and it equals to 'introduction'
Returns
-------
None
@@ -665,10 +665,6 @@ class HTMLDocxPreprocessor:
Function
- process tags <li>.
- unwrap <p> tags.
Parameters
----------
body_tag: Tag, soup object
Returns
-------
None