diff --git a/src/book.py b/src/book.py index ccc7b55..4e706b3 100644 --- a/src/book.py +++ b/src/book.py @@ -562,7 +562,11 @@ class Book: 'Some ting went wrong with footnotes after libra conversion' footnotes = [] + for i, (anc_tag, cont_tag) in enumerate(zip(footnote_anchors, footnote_content)): + if cont_tag.find('a').attrs.get('href') is None: + cont_tag.a.decompose() + continue assert anc_tag['name'] == cont_tag.find('a')['href'][1:], \ 'Something went wrong with footnotes after libra conversion' @@ -575,9 +579,10 @@ class Book: # extra digits in footnotes from documents downloaded from livecarta a_text = cont_tag.a.text - sup = cont_tag.find_all('p')[0].find('sup') - if sup and sup.text == a_text: - sup.decompose() + if len(cont_tag.find_all('p')): + sup = cont_tag.find_all('p')[0].find('sup') + if sup and sup.text == a_text: + sup.decompose() cont_tag.a.decompose() unicode_string = '' @@ -612,7 +617,10 @@ class Book: for img in img_tags: img_name = img.attrs.get('src') - img_path = pathlib.Path(f'{self.html_path.parent}/{img_name}') + if (len(img_name) >= 3 ) and img_name[:3] == '../': + img_name = img_name[3:] + + img_path = pathlib.Path(f'{self.html_path.parent}', f'{img_name}') if self.access is not None: link = self.access.send_image(img_path, self.book_id) @@ -1050,8 +1058,8 @@ class Book: if __name__ == "__main__": folder = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - file = pathlib.Path(os.path.join(folder, 'html/82/82.html')) - out_path = pathlib.Path(os.path.join(folder, 'json/82.json')) + file = pathlib.Path(os.path.join(folder, 'html/ch13/Ch_13_edit.html')) + out_path = pathlib.Path(os.path.join(folder, 'json/ch13.json')) book = Book(html_path=file, output_path=out_path) book.convert_from_html(logging_format='%(asctime)s - %(levelname)s - %(message)s')