From b66ef6296fe8fdfbabaffacfde36e5a4c6ab3f0f Mon Sep 17 00:00:00 2001 From: shirshasa Date: Fri, 28 Aug 2020 13:44:43 +0300 Subject: [PATCH] quick fix --- src/book.py | 2 +- src/html_preprocessor.py | 20 ++++++++------------ src/json_converter.py | 10 +++++----- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/src/book.py b/src/book.py index 6587901..38efb1c 100644 --- a/src/book.py +++ b/src/book.py @@ -32,7 +32,7 @@ class Book: main_logger=main_logger) self.book_api_wrapper = BookApiWrapper(access, self.logger_object, book_id) - assert BookConfig.SUPPORTED_LEVELS == len(BookConfig.SUPPORTED_HEADERS), \ + assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \ "Length of headers doesn't match allowed levels." def save_docx(self, content): diff --git a/src/html_preprocessor.py b/src/html_preprocessor.py index d3d1cd9..3c0d783 100644 --- a/src/html_preprocessor.py +++ b/src/html_preprocessor.py @@ -5,7 +5,7 @@ import re from shutil import copyfile from bs4 import BeautifulSoup, NavigableString -from config import BookConfig, BookLogger, BookApiWrapper +from config import LawCartaConfig, BookLogger, BookApiWrapper class HTMLPreprocessor: @@ -49,8 +49,8 @@ class HTMLPreprocessor: @classmethod def convert_pt_to_px(cls, value): value = int(value) - if value == BookConfig.WORD_DEFAULT_FONT_SIZE: - return BookConfig.LAWCARTA_DEFAULT_FONT_SIZE + if value == LawCartaConfig.WORD_DEFAULT_FONT_SIZE: + return LawCartaConfig.LAWCARTA_DEFAULT_FONT_SIZE else: return value @@ -70,7 +70,7 @@ class HTMLPreprocessor: size = size.group(1) new_size = cls.convert_pt_to_px(size) - if new_size == BookConfig.LAWCARTA_DEFAULT_FONT_SIZE: + if new_size == LawCartaConfig.LAWCARTA_DEFAULT_FONT_SIZE: return "" return re.sub(size + "pt", str(new_size) + "px", style) @@ -178,7 +178,7 @@ class HTMLPreprocessor: p.attrs = {} style = '' - if align is not None and align != BookConfig.DEFAULT_ALIGN_STYLE: + if align is not None and align != LawCartaConfig.DEFAULT_ALIGN_STYLE: style += f'text-align: {align};' if indent is not None: @@ -280,10 +280,6 @@ class HTMLPreprocessor: tag.string = tag.text.replace('\u200c', '') tag['href'] = tag.attrs.get('href').replace('%E2%80%8C', '') - # %E2%80%8C - for tag in a_tags_with_href: - print(tag) - @staticmethod def _clean_footnote_content(content): content = content.strip() @@ -433,7 +429,7 @@ class HTMLPreprocessor: """ Function to convert all lower level headings to p tags """ - pattern = f'^h[{BookConfig.SUPPORTED_LEVELS + 1}-9]$' + pattern = f'^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$' header_tags = self.body_tag.find_all(re.compile(pattern)) for tag in header_tags: tag.name = 'p' @@ -521,8 +517,8 @@ class HTMLPreprocessor: if title == "": tag.unwrap() else: - assert tag.name in BookConfig.SUPPORTED_HEADERS, \ - f'Preprocessing went wrong, there is still h{BookConfig.SUPPORTED_LEVELS + 1}-h9 headings.' + assert tag.name in LawCartaConfig.SUPPORTED_HEADERS, \ + f'Preprocessing went wrong, there is still h{LawCartaConfig.SUPPORTED_LEVELS + 1}-h9 headings.' # if tag.name in ["h4", "h5", "h6"]: # tag.name = "h3" # All the lower level headings will be transformed to h3 headings diff --git a/src/json_converter.py b/src/json_converter.py index 0e88ad7..714184e 100644 --- a/src/json_converter.py +++ b/src/json_converter.py @@ -4,7 +4,7 @@ import codecs import json from copy import copy -from config import BookConfig +from config import LawCartaConfig class JSONConverter: @@ -34,7 +34,7 @@ class JSONConverter: :param ind: Index of header in content list. """ - if self.content[ind].name in BookConfig.SUPPORTED_HEADERS: + if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS: title = self.content[ind].text curr_outline = int(re.sub(r"^h", "", self.content[ind].name)) # extract outline from tag result = { @@ -47,7 +47,7 @@ class JSONConverter: while ind < len(self.content): # 1. next tag is a header - if self.content[ind].name in BookConfig.SUPPORTED_HEADERS: + if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS: outline = int(re.sub(r"^h", "", self.content[ind].name)) # - recursion step until h_i > h_initial if outline > curr_outline: @@ -100,13 +100,13 @@ class JSONConverter: while ind < len(self.content): res = {} - if self.content[ind].name in BookConfig.SUPPORTED_HEADERS: + if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS: res, ind = self.header_to_json(ind) else: chapter_title = f'Untitled chapter {ch_num}' chapter = [] - while ind < len(self.content) and self.content[ind].name not in BookConfig.SUPPORTED_HEADERS: + while ind < len(self.content) and self.content[ind].name not in LawCartaConfig.SUPPORTED_HEADERS: if not self._is_empty_p_tag(self.content[ind]): chapter.append(self.format_html(str(self.content[ind]))) ind += 1