fix quotes processing

2020-04-10 17:05:05 +03:00
parent 5c56c8c2fa
commit 9dea72ca31
1 changed files with 40 additions and 17 deletions
--- a/src/book.py
+++ b/src/book.py
@@ -362,13 +362,31 @@ class Book:

            if style:
                indent = re.search(r'text-indent: ([\d\.]{1,4})in', style)
+                margin_left = re.search(r'margin-left: ([\d\.]{1,4})in', style)
+                margin_right= re.search(r'margin-right: ([\d\.]{1,4})in', style)
+                margin_top = re.search(r'margin-top: ([\d\.]{1,4})in', style)
+                margin_bottom = re.search(r'margin-bottom: ([\d\.]{1,4})in', style)
            else:
                indent = None
+                margin_left = None
+                margin_right = None
+                margin_top = None
+                margin_bottom = None
+
+            if margin_left and margin_right and margin_top and margin_bottom and \
+                    margin_left.group(1) == '0.6' and margin_right.group(1) == '0.6' and \
+                    margin_top.group(1) == '0.14' and margin_bottom.group(1) == '0.11':
+                blockquote = BeautifulSoup(features='lxml').new_tag('blockquote')
+                blockquote.append(BeautifulSoup(features='lxml').new_tag('p'))
+            else:
+                blockquote = None

            p.attrs = {}
            style = ''
+
            if align is not None and align != self.DEFAULT_ALIGN_STYLE:
                style += f'text-align: {align};'
+
            if indent is not None:
                indent = indent.group(1)
                style += f'text-indent: {indent}in;'
@@ -376,6 +394,11 @@ class Book:
            if style:
                p.attrs['style'] = style

+            if blockquote:
+                blockquote.p.attrs = p.attrs
+                blockquote.p.string = p.text
+                p.replace_with(blockquote)
+
    def _process_two_columns(self):
        """
        Function to process paragraphs which has two columns layout.
@@ -387,20 +410,20 @@ class Book:
                    child["class"] = "columns2"
            div.unwrap()

-    def _process_quotes(self):
-        """
-        Function to process <dl> tags. All tags will be replaced with <blockquote> tags.
-        """
-        dls = self.body_tag.find_all('dl')
-
-        for dl in dls:
-            pars = dl.find_all('p')
-            for p in pars:
-                p.wrap(BeautifulSoup(features='lxml').new_tag('blockquote'))
-            new_div = BeautifulSoup(features='lxml').new_tag('div')
-            for p in pars:
-                new_div.append(p.parent)
-            dl.replaceWith(new_div)
+    # def _process_quotes(self):
+    #     """
+    #     Function to process <dl> tags. All tags will be replaced with <blockquote> tags.
+    #     """
+    #     dls = self.body_tag.find_all('dl')
+    #
+    #     for dl in dls:
+    #         pars = dl.find_all('p')
+    #         for p in pars:
+    #             p.wrap(BeautifulSoup(features='lxml').new_tag('blockquote'))
+    #         new_div = BeautifulSoup(features='lxml').new_tag('div')
+    #         for p in pars:
+    #             new_div.append(p.parent)
+    #         dl.replaceWith(new_div)

    @staticmethod
    def _clean_footnote_content(content):
@@ -575,7 +598,7 @@ class Book:
            self._preprocessing_headings()
            self._process_paragraph()
            self._process_two_columns()
-            self._process_quotes()
+            # self._process_quotes()

            self.log('Footnotes processing.')
            self._process_footnotes()
@@ -766,8 +789,8 @@ class Book:

 if __name__ == "__main__":
    folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    file_path = pathlib.Path(os.path.join(folder_path, 'html/11/11.html'))
-    out_path = pathlib.Path(os.path.join(folder_path, 'json/11.json'))
+    file_path = pathlib.Path(os.path.join(folder_path, 'html/0/quote.html'))
+    out_path = pathlib.Path(os.path.join(folder_path, 'json/quote.json'))

    logging_format = '%(asctime)s - %(levelname)s - %(message)s'