forked from LiveCarta/BookConverter
add new fixture for creating new user
This commit is contained in:
@@ -111,6 +111,7 @@ class HTMLPreprocessor:
|
|||||||
table.decompose()
|
table.decompose()
|
||||||
|
|
||||||
def _change_table_of_contents(self):
|
def _change_table_of_contents(self):
|
||||||
|
self._change_table_of_contents()
|
||||||
tables = self.body_tag.find_all("div", id=re.compile(r'^Table of Contents\d+'))
|
tables = self.body_tag.find_all("div", id=re.compile(r'^Table of Contents\d+'))
|
||||||
for table in tables:
|
for table in tables:
|
||||||
table.wrap(self.html_soup.new_tag("TOC"))
|
table.wrap(self.html_soup.new_tag("TOC"))
|
||||||
@@ -138,7 +139,6 @@ class HTMLPreprocessor:
|
|||||||
|
|
||||||
self._font_to_span()
|
self._font_to_span()
|
||||||
# self._remove_table_of_contents()
|
# self._remove_table_of_contents()
|
||||||
self._change_table_of_contents()
|
|
||||||
|
|
||||||
def _process_paragraph(self):
|
def _process_paragraph(self):
|
||||||
"""
|
"""
|
||||||
@@ -303,10 +303,12 @@ class HTMLPreprocessor:
|
|||||||
footnotes = []
|
footnotes = []
|
||||||
|
|
||||||
for i, (anc_tag, cont_tag) in enumerate(zip(footnote_anchors, footnote_content)):
|
for i, (anc_tag, cont_tag) in enumerate(zip(footnote_anchors, footnote_content)):
|
||||||
if cont_tag.find('a').attrs.get('href') is None:
|
true_a_tag = cont_tag.find('a', {'class': 'sdfootnotesym-western'})
|
||||||
|
if true_a_tag.attrs.get('href') is None:
|
||||||
cont_tag.a.decompose()
|
cont_tag.a.decompose()
|
||||||
continue
|
continue
|
||||||
assert anc_tag['name'] == cont_tag.find('a')['href'][1:], \
|
|
||||||
|
assert anc_tag['name'] == true_a_tag['href'][1:], \
|
||||||
'Something went wrong with footnotes after libra conversion'
|
'Something went wrong with footnotes after libra conversion'
|
||||||
|
|
||||||
new_tag = BeautifulSoup(features='lxml').new_tag('sup')
|
new_tag = BeautifulSoup(features='lxml').new_tag('sup')
|
||||||
@@ -317,12 +319,13 @@ class HTMLPreprocessor:
|
|||||||
anc_tag.replace_with(new_tag)
|
anc_tag.replace_with(new_tag)
|
||||||
|
|
||||||
# extra digits in footnotes from documents downloaded from livecarta
|
# extra digits in footnotes from documents downloaded from livecarta
|
||||||
a_text = cont_tag.a.text
|
a_text = true_a_tag.text
|
||||||
if len(cont_tag.find_all('p')):
|
if len(cont_tag.find_all('p')):
|
||||||
sup = cont_tag.find_all('p')[0].find('sup')
|
sup = cont_tag.find_all('p')[0].find('sup')
|
||||||
if sup and sup.text == a_text:
|
if sup and sup.text == a_text:
|
||||||
sup.decompose()
|
sup.decompose()
|
||||||
cont_tag.a.decompose()
|
for tag_a in cont_tag.find_all('a'):
|
||||||
|
tag_a.decompose()
|
||||||
|
|
||||||
unicode_string = ''
|
unicode_string = ''
|
||||||
for child in cont_tag.children:
|
for child in cont_tag.children:
|
||||||
@@ -405,6 +408,9 @@ class HTMLPreprocessor:
|
|||||||
# outline_level = tag.name[-1] # TODO: add prediction of the outline level
|
# outline_level = tag.name[-1] # TODO: add prediction of the outline level
|
||||||
# TODO: escape from recounting paragraphs every time
|
# TODO: escape from recounting paragraphs every time
|
||||||
elif tag.name == "p":
|
elif tag.name == "p":
|
||||||
|
link_name = tag.a.attrs['name']
|
||||||
|
toc_item = self.body_tag.find("a", {'href': '#' + link_name})
|
||||||
|
# TODO: if it is needed, check existence of the link in toc
|
||||||
if tag in self.body_tag.find_all("p"):
|
if tag in self.body_tag.find_all("p"):
|
||||||
new_tag = BeautifulSoup(features="lxml").new_tag("h" + outline_level)
|
new_tag = BeautifulSoup(features="lxml").new_tag("h" + outline_level)
|
||||||
text = tag.text
|
text = tag.text
|
||||||
|
|||||||
Reference in New Issue
Block a user