forked from LiveCarta/BookConverter
add new fixture for creating new user
This commit is contained in:
@@ -111,6 +111,7 @@ class HTMLPreprocessor:
|
||||
table.decompose()
|
||||
|
||||
def _change_table_of_contents(self):
|
||||
self._change_table_of_contents()
|
||||
tables = self.body_tag.find_all("div", id=re.compile(r'^Table of Contents\d+'))
|
||||
for table in tables:
|
||||
table.wrap(self.html_soup.new_tag("TOC"))
|
||||
@@ -138,7 +139,6 @@ class HTMLPreprocessor:
|
||||
|
||||
self._font_to_span()
|
||||
# self._remove_table_of_contents()
|
||||
self._change_table_of_contents()
|
||||
|
||||
def _process_paragraph(self):
|
||||
"""
|
||||
@@ -303,10 +303,12 @@ class HTMLPreprocessor:
|
||||
footnotes = []
|
||||
|
||||
for i, (anc_tag, cont_tag) in enumerate(zip(footnote_anchors, footnote_content)):
|
||||
if cont_tag.find('a').attrs.get('href') is None:
|
||||
true_a_tag = cont_tag.find('a', {'class': 'sdfootnotesym-western'})
|
||||
if true_a_tag.attrs.get('href') is None:
|
||||
cont_tag.a.decompose()
|
||||
continue
|
||||
assert anc_tag['name'] == cont_tag.find('a')['href'][1:], \
|
||||
|
||||
assert anc_tag['name'] == true_a_tag['href'][1:], \
|
||||
'Something went wrong with footnotes after libra conversion'
|
||||
|
||||
new_tag = BeautifulSoup(features='lxml').new_tag('sup')
|
||||
@@ -317,12 +319,13 @@ class HTMLPreprocessor:
|
||||
anc_tag.replace_with(new_tag)
|
||||
|
||||
# extra digits in footnotes from documents downloaded from livecarta
|
||||
a_text = cont_tag.a.text
|
||||
a_text = true_a_tag.text
|
||||
if len(cont_tag.find_all('p')):
|
||||
sup = cont_tag.find_all('p')[0].find('sup')
|
||||
if sup and sup.text == a_text:
|
||||
sup.decompose()
|
||||
cont_tag.a.decompose()
|
||||
for tag_a in cont_tag.find_all('a'):
|
||||
tag_a.decompose()
|
||||
|
||||
unicode_string = ''
|
||||
for child in cont_tag.children:
|
||||
@@ -405,6 +408,9 @@ class HTMLPreprocessor:
|
||||
# outline_level = tag.name[-1] # TODO: add prediction of the outline level
|
||||
# TODO: escape from recounting paragraphs every time
|
||||
elif tag.name == "p":
|
||||
link_name = tag.a.attrs['name']
|
||||
toc_item = self.body_tag.find("a", {'href': '#' + link_name})
|
||||
# TODO: if it is needed, check existence of the link in toc
|
||||
if tag in self.body_tag.find_all("p"):
|
||||
new_tag = BeautifulSoup(features="lxml").new_tag("h" + outline_level)
|
||||
text = tag.text
|
||||
|
||||
Reference in New Issue
Block a user