quick fix

2020-08-28 13:44:43 +03:00
parent f27eefb96b
commit b66ef6296f
3 changed files with 14 additions and 18 deletions
--- a/src/book.py
+++ b/src/book.py
@@ -32,7 +32,7 @@ class Book:
                                        main_logger=main_logger)
        self.book_api_wrapper = BookApiWrapper(access, self.logger_object, book_id)

-        assert BookConfig.SUPPORTED_LEVELS == len(BookConfig.SUPPORTED_HEADERS), \
+        assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \
            "Length of headers doesn't match allowed levels."

    def save_docx(self, content):
--- a/src/html_preprocessor.py
+++ b/src/html_preprocessor.py
@@ -5,7 +5,7 @@ import re
 from shutil import copyfile

 from bs4 import BeautifulSoup, NavigableString
-from config import BookConfig, BookLogger, BookApiWrapper
+from config import LawCartaConfig, BookLogger, BookApiWrapper


 class HTMLPreprocessor:
@@ -49,8 +49,8 @@ class HTMLPreprocessor:
    @classmethod
    def convert_pt_to_px(cls, value):
        value = int(value)
-        if value == BookConfig.WORD_DEFAULT_FONT_SIZE:
-            return BookConfig.LAWCARTA_DEFAULT_FONT_SIZE
+        if value == LawCartaConfig.WORD_DEFAULT_FONT_SIZE:
+            return LawCartaConfig.LAWCARTA_DEFAULT_FONT_SIZE
        else:
            return value

@@ -70,7 +70,7 @@ class HTMLPreprocessor:
        size = size.group(1)
        new_size = cls.convert_pt_to_px(size)

-        if new_size == BookConfig.LAWCARTA_DEFAULT_FONT_SIZE:
+        if new_size == LawCartaConfig.LAWCARTA_DEFAULT_FONT_SIZE:
            return ""

        return re.sub(size + "pt", str(new_size) + "px", style)
@@ -178,7 +178,7 @@ class HTMLPreprocessor:
            p.attrs = {}
            style = ''

-            if align is not None and align != BookConfig.DEFAULT_ALIGN_STYLE:
+            if align is not None and align != LawCartaConfig.DEFAULT_ALIGN_STYLE:
                style += f'text-align: {align};'

            if indent is not None:
@@ -280,10 +280,6 @@ class HTMLPreprocessor:
            tag.string = tag.text.replace('\u200c', '')
            tag['href'] = tag.attrs.get('href').replace('%E2%80%8C', '')

-        # %E2%80%8C
-        for tag in a_tags_with_href:
-            print(tag)
-
    @staticmethod
    def _clean_footnote_content(content):
        content = content.strip()
@@ -433,7 +429,7 @@ class HTMLPreprocessor:
        """
        Function to convert all lower level headings to p tags
        """
-        pattern = f'^h[{BookConfig.SUPPORTED_LEVELS + 1}-9]$'
+        pattern = f'^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
        header_tags = self.body_tag.find_all(re.compile(pattern))
        for tag in header_tags:
            tag.name = 'p'
@@ -521,8 +517,8 @@ class HTMLPreprocessor:
            if title == "":
                tag.unwrap()
            else:
-                assert tag.name in BookConfig.SUPPORTED_HEADERS, \
-                    f'Preprocessing went wrong, there is still h{BookConfig.SUPPORTED_LEVELS + 1}-h9 headings.'
+                assert tag.name in LawCartaConfig.SUPPORTED_HEADERS, \
+                    f'Preprocessing went wrong, there is still h{LawCartaConfig.SUPPORTED_LEVELS + 1}-h9 headings.'
                # if tag.name in ["h4", "h5", "h6"]:
                #     tag.name = "h3" # All the lower level headings will be transformed to h3 headings

--- a/src/json_converter.py
+++ b/src/json_converter.py
@@ -4,7 +4,7 @@ import codecs
 import json

 from copy import copy
-from config import BookConfig
+from config import LawCartaConfig


 class JSONConverter:
@@ -34,7 +34,7 @@ class JSONConverter:

        :param ind: Index of header in content list.
        """
-        if self.content[ind].name in BookConfig.SUPPORTED_HEADERS:
+        if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS:
            title = self.content[ind].text
            curr_outline = int(re.sub(r"^h", "", self.content[ind].name))  # extract outline from tag
            result = {
@@ -47,7 +47,7 @@ class JSONConverter:

            while ind < len(self.content):
                # 1. next tag is a header
-                if self.content[ind].name in BookConfig.SUPPORTED_HEADERS:
+                if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS:
                    outline = int(re.sub(r"^h", "", self.content[ind].name))
                    # - recursion step until h_i > h_initial
                    if outline > curr_outline:
@@ -100,13 +100,13 @@ class JSONConverter:
            while ind < len(self.content):
                res = {}

-                if self.content[ind].name in BookConfig.SUPPORTED_HEADERS:
+                if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS:
                    res, ind = self.header_to_json(ind)

                else:
                    chapter_title = f'Untitled chapter {ch_num}'
                    chapter = []
-                    while ind < len(self.content) and self.content[ind].name not in BookConfig.SUPPORTED_HEADERS:
+                    while ind < len(self.content) and self.content[ind].name not in LawCartaConfig.SUPPORTED_HEADERS:
                        if not self._is_empty_p_tag(self.content[ind]):
                            chapter.append(self.format_html(str(self.content[ind])))
                        ind += 1