From 2c014ae36eafcf7728c9e2ae8b1d4a8c9dc027ce Mon Sep 17 00:00:00 2001 From: shirshasa Date: Thu, 16 Jul 2020 15:31:02 +0300 Subject: [PATCH] LAW-3262 - extra digits in footnotes from documents downloaded from livecarta --- src/book.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/book.py b/src/book.py index e72f97c..ccc7b55 100644 --- a/src/book.py +++ b/src/book.py @@ -564,7 +564,7 @@ class Book: footnotes = [] for i, (anc_tag, cont_tag) in enumerate(zip(footnote_anchors, footnote_content)): assert anc_tag['name'] == cont_tag.find('a')['href'][1:], \ - 'Some ting went wrong with footnotes after libra conversion' + 'Something went wrong with footnotes after libra conversion' new_tag = BeautifulSoup(features='lxml').new_tag('sup') new_tag['class'] = 'footnote-element' @@ -573,7 +573,13 @@ class Book: new_tag.string = '*' anc_tag.replace_with(new_tag) + # extra digits in footnotes from documents downloaded from livecarta + a_text = cont_tag.a.text + sup = cont_tag.find_all('p')[0].find('sup') + if sup and sup.text == a_text: + sup.decompose() cont_tag.a.decompose() + unicode_string = '' for child in cont_tag.children: if type(child) is NavigableString: @@ -586,15 +592,8 @@ class Book: content = self._clean_footnote_content(unicode_string) cont_tag.decompose() - # new_tag = BeautifulSoup(features="lxml").new_tag('div') - # new_tag['class'] = 'footnote-element' - # new_tag['data-id'] = f'"{i}"' - # new_tag['id'] = f'footnote-{i}' - # new_tag.string = content - # footnotes.append(str(new_tag)) footnotes.append(content) - # i += 1 self.footnotes = footnotes