From a1d7ab0a8d70cc231a1e30f8b3dfdb7804573370 Mon Sep 17 00:00:00 2001
From: shirshasa <katerinagorbac@gmail.com>
Date: Fri, 23 Oct 2020 12:54:54 +0300
Subject: [PATCH] converter fix

---
 src/html_preprocessor.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/src/html_preprocessor.py b/src/html_preprocessor.py
index 48a5baf..3298004 100644
--- a/src/html_preprocessor.py
+++ b/src/html_preprocessor.py
@@ -429,6 +429,7 @@ class HTMLPreprocessor:
         """
         Function to remove digits  from headers.
         """
+        title = re.sub(r'^(\s+)+', '', title)
         title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title)
         # title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title)  # delete chapter numbering from the title
         title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title)
@@ -458,7 +459,8 @@ class HTMLPreprocessor:
             func(tag)
         else:
             children = list(tag.children)
-            self.apply_func_to_last_child(children[0], func)
+            if children:
+                self.apply_func_to_last_child(children[0], func)
 
     def _preprocessing_headings(self):
         """
@@ -573,6 +575,11 @@ class HTMLPreprocessor:
 
                 content = list(tag.children)
 
+                # do not take into account rubbish empty tags like <a>, but don't remove them
+                content = [item for item in content if
+                           (type(item) is not NavigableString and item.text != '')
+                           or (type(item) is NavigableString)]
+
                 for i, item in enumerate(content):
                     if type(content[i]) is NavigableString:
                         cleaned = re.sub(r'(\s+)+', ' ', content[i])