forked from LiveCarta/BookConverter
converter fix
This commit is contained in:
@@ -429,6 +429,7 @@ class HTMLPreprocessor:
|
||||
"""
|
||||
Function to remove digits from headers.
|
||||
"""
|
||||
title = re.sub(r'^(\s+)+', '', title)
|
||||
title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title)
|
||||
# title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title) # delete chapter numbering from the title
|
||||
title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title)
|
||||
@@ -458,7 +459,8 @@ class HTMLPreprocessor:
|
||||
func(tag)
|
||||
else:
|
||||
children = list(tag.children)
|
||||
self.apply_func_to_last_child(children[0], func)
|
||||
if children:
|
||||
self.apply_func_to_last_child(children[0], func)
|
||||
|
||||
def _preprocessing_headings(self):
|
||||
"""
|
||||
@@ -573,6 +575,11 @@ class HTMLPreprocessor:
|
||||
|
||||
content = list(tag.children)
|
||||
|
||||
# do not take into account rubbish empty tags like <a>, but don't remove them
|
||||
content = [item for item in content if
|
||||
(type(item) is not NavigableString and item.text != '')
|
||||
or (type(item) is NavigableString)]
|
||||
|
||||
for i, item in enumerate(content):
|
||||
if type(content[i]) is NavigableString:
|
||||
cleaned = re.sub(r'(\s+)+', ' ', content[i])
|
||||
|
||||
Reference in New Issue
Block a user