Merge pull request #102 from Teqniksoft/converter-fix

Converter fix
This commit is contained in:
gregory-p
2020-08-12 16:09:17 +03:00
committed by GitHub

View File

@@ -562,7 +562,11 @@ class Book:
'Some ting went wrong with footnotes after libra conversion'
footnotes = []
for i, (anc_tag, cont_tag) in enumerate(zip(footnote_anchors, footnote_content)):
if cont_tag.find('a').attrs.get('href') is None:
cont_tag.a.decompose()
continue
assert anc_tag['name'] == cont_tag.find('a')['href'][1:], \
'Something went wrong with footnotes after libra conversion'
@@ -575,9 +579,10 @@ class Book:
# extra digits in footnotes from documents downloaded from livecarta
a_text = cont_tag.a.text
sup = cont_tag.find_all('p')[0].find('sup')
if sup and sup.text == a_text:
sup.decompose()
if len(cont_tag.find_all('p')):
sup = cont_tag.find_all('p')[0].find('sup')
if sup and sup.text == a_text:
sup.decompose()
cont_tag.a.decompose()
unicode_string = ''
@@ -611,7 +616,10 @@ class Book:
for img in img_tags:
img_name = img.attrs.get('src')
img_path = pathlib.Path(f'{self.html_path.parent}/{img_name}')
if (len(img_name) >= 3 ) and img_name[:3] == '../':
img_name = img_name[3:]
img_path = pathlib.Path(f'{self.html_path.parent}', f'{img_name}')
if self.access is not None:
link = self.access.send_image(img_path, self.book_id)
@@ -1051,8 +1059,8 @@ class Book:
if __name__ == "__main__":
folder = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
file = pathlib.Path(os.path.join(folder, 'html/82/82.html'))
out_path = pathlib.Path(os.path.join(folder, 'json/82.json'))
file = pathlib.Path(os.path.join(folder, 'html/ch13/Ch_13_edit.html'))
out_path = pathlib.Path(os.path.join(folder, 'json/ch13.json'))
book = Book(html_path=file, output_path=out_path)
book.convert_from_html(logging_format='%(asctime)s - %(levelname)s - %(message)s')