converter fix

This commit is contained in:
shirshasa
2020-10-23 12:54:54 +03:00
parent cadb5a6e56
commit a1d7ab0a8d

View File

@@ -429,6 +429,7 @@ class HTMLPreprocessor:
"""
Function to remove digits from headers.
"""
title = re.sub(r'^(\s+)+', '', title)
title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title)
# title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title) # delete chapter numbering from the title
title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title)
@@ -458,7 +459,8 @@ class HTMLPreprocessor:
func(tag)
else:
children = list(tag.children)
self.apply_func_to_last_child(children[0], func)
if children:
self.apply_func_to_last_child(children[0], func)
def _preprocessing_headings(self):
"""
@@ -573,6 +575,11 @@ class HTMLPreprocessor:
content = list(tag.children)
# do not take into account rubbish empty tags like <a>, but don't remove them
content = [item for item in content if
(type(item) is not NavigableString and item.text != '')
or (type(item) is NavigableString)]
for i, item in enumerate(content):
if type(content[i]) is NavigableString:
cleaned = re.sub(r'(\s+)+', ' ', content[i])