forked from LiveCarta/BookConverter
converter fix
This commit is contained in:
@@ -429,6 +429,7 @@ class HTMLPreprocessor:
|
|||||||
"""
|
"""
|
||||||
Function to remove digits from headers.
|
Function to remove digits from headers.
|
||||||
"""
|
"""
|
||||||
|
title = re.sub(r'^(\s+)+', '', title)
|
||||||
title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title)
|
title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title)
|
||||||
# title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title) # delete chapter numbering from the title
|
# title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title) # delete chapter numbering from the title
|
||||||
title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title)
|
title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title)
|
||||||
@@ -458,6 +459,7 @@ class HTMLPreprocessor:
|
|||||||
func(tag)
|
func(tag)
|
||||||
else:
|
else:
|
||||||
children = list(tag.children)
|
children = list(tag.children)
|
||||||
|
if children:
|
||||||
self.apply_func_to_last_child(children[0], func)
|
self.apply_func_to_last_child(children[0], func)
|
||||||
|
|
||||||
def _preprocessing_headings(self):
|
def _preprocessing_headings(self):
|
||||||
@@ -573,6 +575,11 @@ class HTMLPreprocessor:
|
|||||||
|
|
||||||
content = list(tag.children)
|
content = list(tag.children)
|
||||||
|
|
||||||
|
# do not take into account rubbish empty tags like <a>, but don't remove them
|
||||||
|
content = [item for item in content if
|
||||||
|
(type(item) is not NavigableString and item.text != '')
|
||||||
|
or (type(item) is NavigableString)]
|
||||||
|
|
||||||
for i, item in enumerate(content):
|
for i, item in enumerate(content):
|
||||||
if type(content[i]) is NavigableString:
|
if type(content[i]) is NavigableString:
|
||||||
cleaned = re.sub(r'(\s+)+', ' ', content[i])
|
cleaned = re.sub(r'(\s+)+', ' ', content[i])
|
||||||
|
|||||||
Reference in New Issue
Block a user