add converting block quotes

- from 1 cell tabel (due to first step libra conversion) to blockquote tag
This commit is contained in:
shirshasa
2020-05-25 15:49:14 +03:00
parent f543758fdd
commit c8fbe3c520

View File

@@ -444,6 +444,42 @@ class Book:
self.tables_amount = len(tables)
def _process_quotes(self):
"""
Function to process block quotes.
After docx to html conversion block quotes are stored inside table with 1 cell.
All text is wrapped in a <i> tag.
Such tables will be replaced with <blockquote> tags.
<table cellpadding=\"7\" cellspacing=\"0\" width=\"614\">
<col width=\"600\"/>
<tr>
<td width=\"600\">
<p style=\"text-align: justify;\"><i>aaaaa</i></p>
<p style=\"text-align: justify;\"><br/></p>
</td>
</tr>
</table>
"""
tables = self.body_tag.find_all("table")
for table in tables:
trs = table.find_all("tr")
tds = table.find_all("td")
if len(trs) == 1 and len(tds) == 1 and tds[0].get('width') == '600':
td = tds[0]
paragraphs = td.find_all("p")
has_i_tag_or_br = [(p.i, p.br) for p in paragraphs]
has_i_tag_or_br = [x[0] is not None or x[1] is not None
for x in has_i_tag_or_br]
if all(has_i_tag_or_br):
new_div = BeautifulSoup(features='lxml').new_tag('blockquote')
for p in paragraphs:
new_div.append(p)
table.replaceWith(new_div)
# def _process_quotes(self):
# """
# Function to process <dl> tags. All tags will be replaced with <blockquote> tags.
@@ -715,6 +751,9 @@ class Book:
self._process_tables()
self.log(f'{self.tables_amount} tables have been processed.')
self.log('Block quotes processing.')
self._process_quotes()
self.log('Footnotes processing.')
self._process_footnotes()
self.log(f'{len(self.footnotes)} footnotes have been processed.')