add converting block quotes

- from 1 cell tabel (due to first step libra conversion) to blockquote tag
2020-05-25 15:49:14 +03:00
parent f543758fdd
commit c8fbe3c520
1 changed files with 39 additions and 0 deletions
--- a/src/book.py
+++ b/src/book.py
@@ -444,6 +444,42 @@ class Book:

        self.tables_amount = len(tables)

+    def _process_quotes(self):
+        """
+            Function to process block quotes.
+            After docx to html conversion block quotes are stored inside table with 1 cell.
+            All text is wrapped in a <i> tag.
+            Such tables will be replaced with <blockquote> tags.
+
+            <table cellpadding=\"7\" cellspacing=\"0\" width=\"614\">
+                <col width=\"600\"/>
+                <tr>
+                    <td width=\"600\">
+                        <p style=\"text-align: justify;\"><i>aaaaa</i></p>
+                        <p style=\"text-align: justify;\"><br/></p>
+                    </td>
+                </tr>
+            </table>
+
+        """
+        tables = self.body_tag.find_all("table")
+        for table in tables:
+            trs = table.find_all("tr")
+            tds = table.find_all("td")
+            if len(trs) == 1 and len(tds) == 1 and tds[0].get('width') == '600':
+                td = tds[0]
+                paragraphs = td.find_all("p")
+                has_i_tag_or_br = [(p.i, p.br) for p in paragraphs]
+                has_i_tag_or_br = [x[0] is not None or x[1] is not None
+                                   for x in has_i_tag_or_br]
+
+                if all(has_i_tag_or_br):
+                    new_div = BeautifulSoup(features='lxml').new_tag('blockquote')
+                    for p in paragraphs:
+                        new_div.append(p)
+
+                    table.replaceWith(new_div)
+
    # def _process_quotes(self):
    #     """
    #     Function to process <dl> tags. All tags will be replaced with <blockquote> tags.
@@ -715,6 +751,9 @@ class Book:
            self._process_tables()
            self.log(f'{self.tables_amount} tables have been processed.')

+            self.log('Block quotes processing.')
+            self._process_quotes()
+
            self.log('Footnotes processing.')
            self._process_footnotes()
            self.log(f'{len(self.footnotes)} footnotes have been processed.')