diff --git a/src/css_reader.py b/src/css_reader.py index e19935a..bd2dc17 100644 --- a/src/css_reader.py +++ b/src/css_reader.py @@ -23,9 +23,9 @@ sizes_px = ['10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px', '17p '35px', '36px', '37px', '38px', '39px', '40px', '41px', '42px', '43px', '44px', '45px', '46px', '47px', '48px', '49px', '50px', '64px', '72px'] -list_types = ['circle', 'disc', 'armenian','decimal', - 'decimal-leading-zero', 'georgian', 'lower-alpha','lower-latin', - 'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none' ] +list_types = ['circle', 'disc', 'armenian', 'decimal', + 'decimal-leading-zero', 'georgian', 'lower-alpha', 'lower-latin', + 'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none'] def convert_font_size(value): @@ -132,6 +132,8 @@ LIVECARTA_STYLE_ATTRS_MAPPING = { """ LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = { (property, value): tag } + +

, , , etc to_remove = check_style_to_be_tag(self.style) new_tags = [] - for i, (p, v) in enumerate(to_remove): - s = f'{p}:{v};' + for i, (attr, value) in enumerate(to_remove): + s = f'{attr}:{value};' self.style = self.style.replace(s, '') self.style = self.style.strip() if i == 0: - self.tag.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)] + self.tag.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)] new_tags.append(self.tag) else: - name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)] + name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)] new_tag = BeautifulSoup(features='lxml').new_tag(name) new_tags[-1].wrap(new_tag) new_tags.append(new_tag) @@ -267,34 +269,34 @@ class TagStyleConverter: return top_tag @staticmethod - def wrap_p_to_save_style_attrs(t): + def wrap_span_in_p_to_save_style_attrs(tag): styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['text-align', 'text-indent']] - if t.name == 'p' and t.attrs.get('style'): - check = [attr in t.attrs.get('style') for attr in styles_cant_be_in_p] - if any(check): - t.name = 'span' + if tag.name == 'p' and tag.attrs.get('style'): + styles_to_be_saved = [attr in tag.attrs.get('style') for attr in styles_cant_be_in_p] + if any(styles_to_be_saved): + tag.name = 'span' p_tag = BeautifulSoup(features='lxml').new_tag('p') - old_style = t.attrs['style'] - new_style = '' + span_style = tag.attrs['style'] + p_style = '' possible_p_attrs_regexp = re.compile(r'(text-align:(\w+);)|(text-indent:(\w+);)') - has_p_style_attrs = re.search(possible_p_attrs_regexp, old_style) + has_p_style_attrs = re.search(possible_p_attrs_regexp, span_style) if has_p_style_attrs: if has_p_style_attrs.group(1): - new_style += has_p_style_attrs.group(1) - old_style = old_style.replace(has_p_style_attrs.group(1), '') + p_style += has_p_style_attrs.group(1) + span_style = span_style.replace(has_p_style_attrs.group(1), '') if has_p_style_attrs.group(3): - new_style += has_p_style_attrs.group(3) - old_style = old_style.replace(has_p_style_attrs.group(3), '') + p_style += has_p_style_attrs.group(3) + span_style = span_style.replace(has_p_style_attrs.group(3), '') - p_tag.attrs['style'] = new_style + p_tag.attrs['style'] = p_style li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)') - has_li_style_attr = re.search(li_attrs_regexp, old_style) - old_style = old_style if not has_li_style_attr else old_style.replace(has_li_style_attr.group(1), '') - t.attrs['style'] = old_style - t.wrap(p_tag) + has_li_style_attr = re.search(li_attrs_regexp, span_style) + span_style = span_style if not has_li_style_attr else span_style.replace(has_li_style_attr.group(1), '') + tag.attrs['style'] = span_style + tag.wrap(p_tag) @staticmethod def add_span_to_save_style_attrs_in_li(t): @@ -354,39 +356,38 @@ class TagStyleConverter: t.attrs['style'] = style if not has_li_style_attr else style.replace(has_li_style_attr.group(1), '') def convert_initial_tag(self): - del self.tag.attrs['livecarta_id'] self.tag = self.change_attrs_with_corresponding_tags() - self.wrap_p_to_save_style_attrs(self.tag) + self.wrap_span_in_p_to_save_style_attrs(self.tag) self.add_span_to_save_style_attrs_in_li(self.tag) self.add_span_to_save_style_attrs_in_ul_ol(self.tag) self.add_span_to_save_style_attrs(self.tag) return self.tag -def add_inline_style_to_html_soup(soup1, css_text): +def add_inline_style_to_html_soup(soup1: BeautifulSoup, css_text: str): css_text = css_text.replace('@namespace epub "http://www.idpf.org/2007/ops";', '') livecarta_tmp_ids = [] h_regex = f'(^h[1-9]$)' could_have_style_in_livecarta_regexp = re.compile('(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|' + h_regex) - elements_with_possible_style_attr = soup1.find_all(could_have_style_in_livecarta_regexp) - for i, x in enumerate(elements_with_possible_style_attr): + tags_with_possible_style_attr = soup1.find_all(could_have_style_in_livecarta_regexp) + for i, x in enumerate(tags_with_possible_style_attr): x.attrs['livecarta_id'] = i livecarta_tmp_ids.append(i) - html_with_inline_style = transform(str(soup1), css_text=css_text, - remove_classes=False, - external_styles=False, - allow_network=False, - disable_validation=True) + html_with_inline_style: str = transform(str(soup1), css_text=css_text, + remove_classes=False, + external_styles=False, + allow_network=False, + disable_validation=True) soup2 = BeautifulSoup(html_with_inline_style, features='lxml') for i in livecarta_tmp_ids: tag = soup1.find(attrs={'livecarta_id': i}) tag_with_style = soup2.find(attrs={'livecarta_id': i}) + del tag.attrs['livecarta_id'] if tag_with_style.attrs.get('style'): style_converter = TagStyleConverter(tag, tag_with_style) style_converter.convert_initial_tag() - else: - del tag.attrs['livecarta_id'] + return soup1 diff --git a/src/docx_solver.py b/src/docx_solver.py index b7c5be6..e001ef9 100644 --- a/src/docx_solver.py +++ b/src/docx_solver.py @@ -8,7 +8,7 @@ from threading import Event from bs4 import BeautifulSoup from html_docx_preprocessor import HTMLDocxPreprocessor from libra_html2json_converter import LibraHTML2JSONConverter -from src.book_solver import BookSolver +from solver import BookSolver class DocxBook(BookSolver): diff --git a/src/epub_converter.py b/src/epub_converter.py index ba4ca0e..375020e 100644 --- a/src/epub_converter.py +++ b/src/epub_converter.py @@ -28,10 +28,28 @@ class EpubConverter: self.access = access self.logger: BookLogger = logger self.ebooklib_book = epub.read_epub(file) + + self.href2soup_html: Dict[str, BeautifulSoup] = {} # main container for all epub .xhtml files + self.href2subchapter_ids = defaultdict(list) # enumerate all subchapter id for each file + self.added_to_toc_hrefs = set() # enumerate all file paths that where added to TOC + + # toc tree structure stored as adj.list (NavPoint to list of NavPoints) + # key = -1 for top level NavPoints + self.adjacency_list: Dict[Union[NavPoint, -1], Union[list, None]] = {} + + # container for all chapters soup objects + # here soup object is only part of the .xhtml file + self.href_chapter_id2soup_html: Dict[tuple, BeautifulSoup] = {} + self.internal_anchors = set() + self.id_anchor_exist_in_nav_points = False # flag to be updated while ebooklib.toc is parsed + self.href2img_bytes = {} # file path to bytes + self.old_image_path2_aws_path = {} # file path from to generated aws path + self.footnotes_contents: List[str] = [] # to be sent on server as is + self.noterefs: List[Tag] = [] # start of the footnote + self.footnotes: List[Tag] = [] # end of the footnote + self.logger.log('Image processing.') - self.href2img_bytes = {} - self.old_image_path2_aws_path = {} for x in chain(self.ebooklib_book.get_items_of_type(ebooklib.ITEM_IMAGE), self.ebooklib_book.get_items_of_type(ebooklib.ITEM_COVER)): file_name = x.file_name @@ -39,8 +57,7 @@ class EpubConverter: self.href2img_bytes[file_name] = content self.logger.log('HTML files reading.') - self.id_anchor_exist_in_nav_points = False - self.href2soup_html: Dict[str, BeautifulSoup] = self.build_href2soup_content() + self.href2soup_html = self.build_href2soup_content() self.logger.log('CSS files processing.') self.css_href2content, self.html_href2css_href = self.build_css_content() @@ -48,9 +65,6 @@ class EpubConverter: self.add_css_styles2soup() self.logger.log('Footnotes processing.') - self.footnotes_contents: List[str] = [] - self.noterefs = [] - self.footnotes: List[Tag] = [] for href in self.href2soup_html: content, noterefs, footnotes_tags = preprocess_footnotes(self.href2soup_html[href], self.href2soup_html) @@ -65,19 +79,18 @@ class EpubConverter: self.logger.log(f'Added {len(self.footnotes_contents)} footnotes.') self.logger.log('TOC processing.') - self.href2subchapter_ids = defaultdict(list) - self.added_to_toc_hrefs = set() - self.adjacency_list: Dict[Union[NavPoint, -1], Union[list, None]] = {} # nav_point2nav_points self.build_adjacency_list_from_toc(self.ebooklib_book.toc) # build simple toc from spine if needed - if not self.is_toc_valid(): + if self.is_toc_empty(): self.build_adjacency_list_from_spine() not_added = [x for x in self.href2soup_html if x not in self.added_to_toc_hrefs] self.logger.log(f'Html documents not added to TOC: {not_added}.') self.add_not_added_files_to_adjacency_list(not_added) + self.logger.log(f'Html internal links and structure processing.') + self.label_chapters_ids_with_tmp_id() self.process_html_soup_structure_to_line() # used only after parsed toc, ids from toc needed self.process_internal_links() - self.href_chapter_id2soup_html: Dict[tuple, BeautifulSoup] = {} + self.logger.log(f'Building chapters content.') self.define_chapters_content() def build_href2soup_content(self) -> Dict[str, BeautifulSoup]: @@ -129,7 +142,7 @@ class EpubConverter: for href in self.href2soup_html: if self.html_href2css_href.get(href): css: str = self.css_href2content[self.html_href2css_href[href]] - content = self.href2soup_html[href] + content: BeautifulSoup = self.href2soup_html[href] content = add_inline_style_to_html_soup(content, css) self.href2soup_html[href] = content @@ -142,7 +155,7 @@ class EpubConverter: def build_adjacency_list_from_toc(self, element, lvl=0): """ - self.adjacency_list builds based on TOC nested structure + self.adjacency_list builds based on TOC nested structure, got from self.ebooklib_book.toc key = -1 if root, value = None if leaf @@ -152,29 +165,29 @@ class EpubConverter: if isinstance(element, Link): # todo: check if link exists - node = NavPoint(element) - if node.id: + nav_point = NavPoint(element) + if nav_point.id: self.id_anchor_exist_in_nav_points = True - self.href2subchapter_ids[node.href].append(node.id) - self.adjacency_list[node] = None - self.added_to_toc_hrefs.add(node.href) - return node + self.href2subchapter_ids[nav_point.href].append(nav_point.id) + self.adjacency_list[nav_point] = None + self.added_to_toc_hrefs.add(nav_point.href) + return nav_point elif isinstance(element, tuple): first, second = element assert isinstance(first, Section) - node = NavPoint(first) - if node.id: + nav_point = NavPoint(first) + if nav_point.id: self.id_anchor_exist_in_nav_points = True - self.href2subchapter_ids[node.href].append(node.id) + self.href2subchapter_ids[nav_point.href].append(nav_point.id) sub_nodes = [] for i in second: sub_nodes.append(self.build_adjacency_list_from_toc(i, lvl + 1)) - self.adjacency_list[node] = sub_nodes - self.added_to_toc_hrefs.add(node.href) - return node + self.adjacency_list[nav_point] = sub_nodes + self.added_to_toc_hrefs.add(nav_point.href) + return nav_point elif isinstance(element, list) and (lvl == 0): sub_nodes = [] @@ -186,10 +199,10 @@ class EpubConverter: else: assert 0, f'Error. Element is not tuple/Link instance: {type(element)}' - def is_toc_valid(self): + def is_toc_empty(self): if (self.ebooklib_book.toc is None) or (self.adjacency_list.get(-1) is None): - return False - return True + return True + return False def build_adjacency_list_from_spine(self): manifest_id2href = self.build_manifest_id2href() @@ -197,18 +210,17 @@ class EpubConverter: -1: [] } for id_, _ in self.ebooklib_book.spine: - node = NavPoint(Section(manifest_id2href[id_], manifest_id2href[id_])) - self.adjacency_list[-1].append(node) - self.added_to_toc_hrefs.add(node.href) + nav_point = NavPoint(Section(manifest_id2href[id_], manifest_id2href[id_])) + self.adjacency_list[-1].append(nav_point) + self.added_to_toc_hrefs.add(nav_point.href) def add_not_added_files_to_adjacency_list(self, not_added): for i, file in enumerate(not_added): - node = NavPoint(Section(f'To check #{i}, filename: {file}', file)) - self.adjacency_list[-1].append(node) + nav_point = NavPoint(Section(f'To check #{i}, filename: {file}', file)) + self.adjacency_list[-1].append(nav_point) self.added_to_toc_hrefs.add(file) - def process_html_soup_structure_to_line(self): - # mark + def label_chapters_ids_with_tmp_id(self): for href in self.href2soup_html: ids = self.href2subchapter_ids[href] for i in ids: @@ -219,6 +231,7 @@ class EpubConverter: new_h.attrs['id'] = i tag.insert_before(new_h) + def process_html_soup_structure_to_line(self): # go to line structure for href in self.href2soup_html: soup = self.href2soup_html[href] @@ -236,18 +249,31 @@ class EpubConverter: new_anchor_span.string = "\xa0" return new_anchor_span - def match_href_to_path_from_toc(self, href, href_in_link, internal_link_tag): - dir_name = os.path.dirname(href) + def _match_href_to_path_from_toc(self, cur_file_path, href_in_link, internal_link_tag): + """ + TOC: a/b/c.xhtml + + b/c.xhtml -> a/b/c.xhtml + c.xhtml -> a/b/c.xhtml + + Used to find full path to file that is parsed from tag link + + :param cur_file_path: path to current file with tag link + :param href_in_link: filename got from tag link, like file1.xhtml + :param internal_link_tag: tag object that is parsed now + :return: + """ + dir_name = os.path.dirname(cur_file_path) normed_path = os.path.normpath(os.path.join(dir_name, href_in_link)).replace('\\', '/') full_path = [path for path in self.added_to_toc_hrefs if normed_path in path] if not full_path: - self.logger.log(f'Error in {href} file. No {normed_path} file found in added to TOC documents. ' + self.logger.log(f'Error in {cur_file_path} file. No {normed_path} file found in added to TOC documents. ' f'While processing href in {internal_link_tag}.') internal_link_tag.attrs['converter-mark'] = 'bad-link' return None if len(full_path) > 1: - self.logger.log(f'Warning in {href}. Multiple paths found {full_path} for file {href_in_link}' + self.logger.log(f'Warning in {cur_file_path}. Multiple paths found {full_path} for file {href_in_link}' f' while {internal_link_tag} processing. The first one will be chosen.') return full_path[0] @@ -272,7 +298,7 @@ class EpubConverter: for internal_link_tag in soup.find_all('a', {'href': internal_link_reg1}): a_tag_href = internal_link_tag.attrs['href'] # find full path - a_tag_href_matched_to_toc = self.match_href_to_path_from_toc(toc_href, a_tag_href, internal_link_tag) + a_tag_href_matched_to_toc = self._match_href_to_path_from_toc(toc_href, a_tag_href, internal_link_tag) if not a_tag_href_matched_to_toc: continue new_id = self._create_unique_id(a_tag_href_matched_to_toc, '') @@ -291,9 +317,12 @@ class EpubConverter: soup = self.href2soup_html[toc_href] for internal_link_tag in soup.find_all('a', {'href': internal_link_reg2}): a_tag_href, a_tag_id = internal_link_tag.attrs['href'].split('#') - a_tag_href = a_tag_href or toc_href # find full path - a_tag_href_matched_to_toc = self.match_href_to_path_from_toc(toc_href, a_tag_href, internal_link_tag) + if a_tag_href: + a_tag_href_matched_to_toc = self._match_href_to_path_from_toc(toc_href, a_tag_href, + internal_link_tag) + else: + a_tag_href_matched_to_toc = os.path.normpath(toc_href).replace('\\', '/') if not a_tag_href_matched_to_toc: continue new_id = self._create_unique_id(a_tag_href_matched_to_toc, a_tag_id) @@ -326,7 +355,7 @@ class EpubConverter: f' Should be anchor with new id={new_id} in {a_tag_href_matched_to_toc} file.' f' Old id={a_tag_id}') - def build_one_chapter(self, node): + def build_one_chapter(self, nav_point): """ Updates self.href_chapter_id2soup_html (mapping from (href,id) to chapter content/html soup object) @@ -339,34 +368,34 @@ class EpubConverter: and id of the next chapter/subchapter """ - if node.id: - soup = self.href2soup_html[node.href] - chapter_tags = get_tags_between_chapter_marks(first_id=node.id, href=node.href, html_soup=soup) + if nav_point.id: + soup = self.href2soup_html[nav_point.href] + chapter_tags = get_tags_between_chapter_marks(first_id=nav_point.id, href=nav_point.href, html_soup=soup) new_tree = BeautifulSoup('', 'html.parser') for tag in chapter_tags: new_tree.append(tag) - self.href_chapter_id2soup_html[(node.href, node.id)] = new_tree + self.href_chapter_id2soup_html[(nav_point.href, nav_point.id)] = new_tree - if self.adjacency_list.get(node): - for sub_node in self.adjacency_list[node]: + if self.adjacency_list.get(nav_point): + for sub_node in self.adjacency_list[nav_point]: self.build_one_chapter(sub_node) def define_chapters_content(self): - nav_points = self.adjacency_list[-1] + top_level_nav_points = self.adjacency_list[-1] if self.id_anchor_exist_in_nav_points: - for point in nav_points: + for point in top_level_nav_points: self.build_one_chapter(point) - def node2livecarta_chapter_item(self, node: NavPoint, lvl=1) -> ChapterItem: - title = node.title - if node.id: - content: BeautifulSoup = self.href_chapter_id2soup_html[(node.href, node.id)] + def node2livecarta_chapter_item(self, nav_point: NavPoint, lvl=1) -> ChapterItem: + title = nav_point.title + if nav_point.id: + content: BeautifulSoup = self.href_chapter_id2soup_html[(nav_point.href, nav_point.id)] else: - content: BeautifulSoup = self.href2soup_html[node.href] + content: BeautifulSoup = self.href2soup_html[nav_point.href] self.old_image_path2_aws_path = update_src_links_in_images(content, self.href2img_bytes, - path_to_html=node.href, + path_to_html=nav_point.href, access=self.access, path2aws_path=self.old_image_path2_aws_path) @@ -376,8 +405,8 @@ class EpubConverter: sub_nodes = [] # warning! not EpubHtmlItems won;t be added to chapter - if self.adjacency_list.get(node): - for sub_node in self.adjacency_list[node]: + if self.adjacency_list.get(nav_point): + for sub_node in self.adjacency_list[nav_point]: sub_chapter_item = self.node2livecarta_chapter_item(sub_node, lvl + 1) sub_nodes.append(sub_chapter_item) diff --git a/src/epub_solver.py b/src/epub_solver.py index 75609fe..0ded25b 100644 --- a/src/epub_solver.py +++ b/src/epub_solver.py @@ -1,5 +1,5 @@ from epub_converter import EpubConverter -from src.book_solver import BookSolver +from solver import BookSolver class EpubBook(BookSolver): diff --git a/src/html_docx_preprocessor.py b/src/html_docx_preprocessor.py index 18b688c..989085c 100644 --- a/src/html_docx_preprocessor.py +++ b/src/html_docx_preprocessor.py @@ -8,7 +8,7 @@ from typing import List from bs4 import BeautifulSoup, NavigableString, Tag from livecarta_config import LawCartaConfig -from src.util.helpers import BookLogger, BookStatusWrapper +from util.helpers import BookLogger, BookStatusWrapper class HTMLDocxPreprocessor: diff --git a/src/solver.py b/src/solver.py new file mode 100644 index 0000000..d86b29c --- /dev/null +++ b/src/solver.py @@ -0,0 +1,139 @@ +""" This is Main Abstract class for solving a task of a book conversion + +Having an id of coming book, gets book from server, runs conversion. +In parallel it updates status of a book conversion on admin panel. +Finally sends result to server. +Result is a json, JSON schema in book_schema.json +""" + +import codecs +import json +import logging +import os +import pathlib +from abc import abstractmethod, ABCMeta + +from livecarta_config import LawCartaConfig +from util.helpers import BookLogger, BookStatusWrapper + + +class BookSolver: + __metaclass__ = ABCMeta + + def __init__(self, book_id=0, access=None, main_logger=None, logging_format='%(asctime)s - %(levelname)s - %(message)s'): + self.book_type = None + self.book_id = book_id + self.access = access + self.file_path = None # path to book file, appears after downloading from server + self.output_path = None # path to json file + self.logger_object = BookLogger(name=f'{__name__}_{self.book_id}', + logging_format=logging_format, + book_id=book_id, + main_logger=main_logger) + self.status_wrapper = BookStatusWrapper(access, self.logger_object, book_id) + + assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \ + "Length of headers doesn't match allowed levels." + + def save_book_file(self, content): + """ + Save binary content of file to .docx/.epub. + :param content: binary content of the file. + """ + folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + folder_path = os.path.join(folder_path, f'{self.book_type}/{self.book_id}') + pathlib.Path(folder_path).mkdir(parents=True, exist_ok=True) + + file_path = os.path.join(folder_path, f'{self.book_id}.{self.book_type}') + try: + with open(file_path, 'wb+') as file: + file.write(content) + self.logger_object.log(f'File was saved to folder: {folder_path}.') + except Exception as exc: + self.logger_object.log(f"Error in writing {self.book_type} file.", logging.ERROR) + self.logger_object.log_error_to_main_log() + raise exc + + self.file_path = pathlib.Path(file_path) + + def get_book_file(self): + """ + Method for getting and saving book from server. + """ + try: + self.logger_object.log(f'Start receiving file from server. URL: {self.access.url}/doc-convert/{self.book_id}/file') + content = self.access.get_doc(self.book_id) + self.logger_object.log('File was received from server.') + self.save_book_file(content) + except FileNotFoundError as f_err: + self.logger_object.log("Can't get docx from server.", logging.ERROR) + self.logger_object.log_error_to_main_log() + raise f_err + except Exception as exc: + raise exc + + def check_output_directory(self): + if self.output_path is None: + folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + output_path = os.path.join(folder_path, f'json/{self.book_id}.json') + self.output_path = output_path + + self.output_path = pathlib.Path(self.output_path) + self.logger_object.log(f'Output file path: {self.output_path}') + + pathlib.Path(self.output_path).parent.mkdir(parents=True, exist_ok=True) + self.output_path.touch(exist_ok=True) + + def write_to_json(self, content: dict): + self.check_output_directory() + try: + with codecs.open(self.output_path, 'w', encoding='utf-8') as f: + json.dump(content, f, ensure_ascii=False) + self.logger_object.log(f'Data has been saved to .json file: {self.output_path}') + except Exception as exc: + self.logger_object.log('Error has occurred while writing json file.' + str(exc), logging.ERROR) + + def send_json_content_to_server(self, content: dict): + try: + self.access.send_book(self.book_id, content) + self.logger_object.log(f'JSON data has been sent to server.') + except Exception as exc: + self.logger_object.log('Error has occurred while sending json content.', logging.ERROR) + self.logger_object.log_error_to_main_log() + self.status_wrapper.set_error() + raise exc + + @abstractmethod + def get_converted_book(self): + self.logger_object.log('Beginning of processing json output.') + self.status_wrapper.set_generating() + return {} + + def test_conversion(self): + self.logger_object.log('Beginning of the test.') + + folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + folder_path = os.path.join(folder_path, f'{self.book_type}') + file_path = os.path.join(folder_path, f'{self.book_id}.{self.book_type}') + self.file_path = pathlib.Path(file_path) + self.logger_object.log(f'Test on {self.book_type}: {self.file_path}') + content_dict = self.get_converted_book() + self.write_to_json(content_dict) + self.logger_object.log('End of the test.') + + def conversion(self): + try: + self.logger_object.log(f'Beginning of conversion from .{self.book_type} to .json.') + self.get_book_file() + self.status_wrapper.set_processing() + content_dict = self.get_converted_book() + self.write_to_json(content_dict) + self.send_json_content_to_server(content_dict) + self.logger_object.log(f'End of the conversion to LawCarta format. Check {self.output_path}.') + + except Exception as exc: + self.status_wrapper.set_error() + self.logger_object.log('Error has occurred while conversion.', logging.ERROR) + self.logger_object.log_error_to_main_log(str(exc)) + raise exc +