forked from LiveCarta/BookConverter
Formatting: documentation + optimization
This commit is contained in:
@@ -116,8 +116,8 @@ class HTMLDocxPreprocessor:
|
||||
|
||||
if face is not None:
|
||||
face = re.sub(r",[\w,\- ]*$", "", face)
|
||||
if face != LiveCartaConfig.DEFAULT_FONT_NAME and LiveCartaConfig.font_correspondence_table.get(face):
|
||||
font.attrs["face"] = LiveCartaConfig.font_correspondence_table[face]
|
||||
if face != LiveCartaConfig.DEFAULT_FONT_NAME and LiveCartaConfig.FONT_CORRESPONDANCE_TABLE.get(face):
|
||||
font.attrs["face"] = LiveCartaConfig.FONT_CORRESPONDANCE_TABLE[face]
|
||||
else:
|
||||
font.attrs["face"] = LiveCartaConfig.DEFAULT_FONT_NAME
|
||||
|
||||
@@ -137,11 +137,11 @@ class HTMLDocxPreprocessor:
|
||||
def clean_trash(self):
|
||||
"""Function to remove all styles and tags we don't need."""
|
||||
self._clean_tag('span', 'style', re.compile(
|
||||
r'^background: #[0-9a-fA-F]{6}$'))
|
||||
r'^background: #[\da-fA-F]{6}$'))
|
||||
# todo: check for another languages
|
||||
self._clean_tag('span', 'lang', re.compile(r'^ru-RU$'))
|
||||
self._clean_tag('span', 'style', re.compile(
|
||||
'^letter-spacing: -?[\d\.]+pt$'))
|
||||
'^letter-spacing: -?[\d.]+pt$'))
|
||||
|
||||
self._clean_tag('font', 'face', re.compile(
|
||||
r'^Times New Roman[\w, ]+$'))
|
||||
@@ -179,13 +179,13 @@ class HTMLDocxPreprocessor:
|
||||
style = p.get('style')
|
||||
|
||||
if style:
|
||||
indent = re.search(r'text-indent: ([\d\.]{1,4})in', style)
|
||||
margin_left = re.search(r'margin-left: ([\d\.]{1,4})in', style)
|
||||
indent = re.search(r'text-indent: ([\d.]{1,4})in', style)
|
||||
margin_left = re.search(r'margin-left: ([\d.]{1,4})in', style)
|
||||
margin_right = re.search(
|
||||
r'margin-right: ([\d\.]{1,4})in', style)
|
||||
margin_top = re.search(r'margin-top: ([\d\.]{1,4})in', style)
|
||||
r'margin-right: ([\d.]{1,4})in', style)
|
||||
margin_top = re.search(r'margin-top: ([\d.]{1,4})in', style)
|
||||
margin_bottom = re.search(
|
||||
r'margin-bottom: ([\d\.]{1,4})in', style)
|
||||
r'margin-bottom: ([\d.]{1,4})in', style)
|
||||
else:
|
||||
indent = None
|
||||
margin_left = None
|
||||
@@ -517,7 +517,7 @@ class HTMLDocxPreprocessor:
|
||||
Function for gathering info about top-level chapters.
|
||||
|
||||
Assume:
|
||||
- Headers with smallest outline(or digit in <h>) are top level chapters.
|
||||
- Headers with the smallest outline(or digit in <h>) are top level chapters.
|
||||
[ It is consistent with a recursive algorithm
|
||||
for saving content to a resulted json structure,
|
||||
which happens in header_to_json()]
|
||||
@@ -560,7 +560,7 @@ class HTMLDocxPreprocessor:
|
||||
|
||||
Assume header(s) to be introduction if:
|
||||
1. one header not numbered, before 1 numbered header
|
||||
2. it is first header from the top level list and it equals to 'introductio
|
||||
2. it is first header from the top level list, and it equals to 'introduction'
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
@@ -665,10 +665,6 @@ class HTMLDocxPreprocessor:
|
||||
Function
|
||||
- process tags <li>.
|
||||
- unwrap <p> tags.
|
||||
Parameters
|
||||
----------
|
||||
body_tag: Tag, soup object
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
|
||||
Reference in New Issue
Block a user