From c8fbe3c5202dd5e8a4f44ca4f24418c900ea655e Mon Sep 17 00:00:00 2001 From: shirshasa Date: Mon, 25 May 2020 15:49:14 +0300 Subject: [PATCH] add converting block quotes - from 1 cell tabel (due to first step libra conversion) to blockquote tag --- src/book.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/book.py b/src/book.py index 7c19a59..b751220 100644 --- a/src/book.py +++ b/src/book.py @@ -444,6 +444,42 @@ class Book: self.tables_amount = len(tables) + def _process_quotes(self): + """ + Function to process block quotes. + After docx to html conversion block quotes are stored inside table with 1 cell. + All text is wrapped in a tag. + Such tables will be replaced with
tags. + + + + + + +
+

aaaaa

+


+
+ + """ + tables = self.body_tag.find_all("table") + for table in tables: + trs = table.find_all("tr") + tds = table.find_all("td") + if len(trs) == 1 and len(tds) == 1 and tds[0].get('width') == '600': + td = tds[0] + paragraphs = td.find_all("p") + has_i_tag_or_br = [(p.i, p.br) for p in paragraphs] + has_i_tag_or_br = [x[0] is not None or x[1] is not None + for x in has_i_tag_or_br] + + if all(has_i_tag_or_br): + new_div = BeautifulSoup(features='lxml').new_tag('blockquote') + for p in paragraphs: + new_div.append(p) + + table.replaceWith(new_div) + # def _process_quotes(self): # """ # Function to process
tags. All tags will be replaced with
tags. @@ -715,6 +751,9 @@ class Book: self._process_tables() self.log(f'{self.tables_amount} tables have been processed.') + self.log('Block quotes processing.') + self._process_quotes() + self.log('Footnotes processing.') self._process_footnotes() self.log(f'{len(self.footnotes)} footnotes have been processed.')