From 5039417a0f28f2d74ee59aa1500ab44e093e945c Mon Sep 17 00:00:00 2001
From: Kiryl <kiryl.miatselitsa@teqniksoft.com>
Date: Wed, 1 Jun 2022 16:18:12 +0300
Subject: [PATCH] Modify local consumer.py

---
 consumer.py                          | 48 +++++++++++------
 src/epub_converter/epub_converter.py | 77 +++++++++++++++++-----------
 2 files changed, 80 insertions(+), 45 deletions(-)

diff --git a/consumer.py b/consumer.py
index d1cacc1..4c67d6e 100644
--- a/consumer.py
+++ b/consumer.py
@@ -23,36 +23,48 @@ def configure_file_logger(name, filename='logs/converter.log', filemode='w+',
     file_handler = logging.FileHandler(file_path, mode=filemode)
     logger.addHandler(file_handler)
 
-    file_format = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(message)s [%(filename)s:%(lineno)d in %(funcName)s]')
+    file_format = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(message)s '
+                                        '[%(filename)s:%(lineno)d in %(funcName)s]')
     file_handler.setFormatter(file_format)
     logger.setLevel(logging_level)
     return logger
 
-def convert_book(book_type: [DocxBook, EpubBook], book_id, logger, params: dict,):
-    logger.info(f'Start processing book-{book_id}.')
 
+def local_convert_book(book_type: [DocxBook, EpubBook], book_id, logger, params: dict):
+    logger.info(f'Start processing book-{book_id}.')
+    try:
+        json_file_path = 'json/9781614382264.json'
+        book = book_type(book_id=book_id, main_logger=logger, **params)
+        book.conversion_local(json_file_path)
+    except Exception as exc:
+        raise exc
+    logger.info(f'Book-{book_id} has been proceeded.')
+
+
+def convert_book(book_type: [DocxBook, EpubBook], book_id, logger, params: dict):
+    logger.info(f'Start processing book-{book_id}.')
     try:
         book = book_type(book_id=book_id, main_logger=logger, **params)
-        # book.conversion_local('9781641051217')
         book.conversion()
     except Exception as exc:
         raise exc
-
     logger.info(f'Book-{book_id} has been proceeded.')
 
-def callback(ch, method, properties, body, logger, libra_locker):
+
+def callback(ch, method, properties, body, logger, libre_locker):
     print(f'Message: {body}.')
     logger.info(f'Message: {body}.')
     try:
         data = json.loads(body)
         assert 'apiURL' in data, 'No apiURL field in received message.'
-        assert data.get('fileExtension') in ['epub', 'docx'], 'Wrong book type received.'
+        assert data.get('fileExtension') in [
+            'epub', 'docx'], 'Wrong book type received.'
 
         book_params = {
             'access': Access(url=data['apiURL']),
         }
         if data.get('fileExtension') == 'docx':
-            book_params.update({'libra_locker': libra_locker})
+            book_params.update({'libre_locker': libre_locker})
 
         params = {
             'book_type': EpubBook if data.get('fileExtension') == 'epub' else DocxBook,
@@ -75,6 +87,7 @@ def callback(ch, method, properties, body, logger, libra_locker):
     finally:
         pass
 
+
 def server_run():
     logger = configure_file_logger('consumer')
 
@@ -87,25 +100,30 @@ def server_run():
     port = conf_param.get('port') or pika.ConnectionParameters().DEFAULT_PORT
     channel = None
     try:
-        credentials = pika.PlainCredentials(username=conf_param['username'], password=conf_param['password'])
-        parameters = pika.ConnectionParameters(host=host, port=port, credentials=credentials)
+        credentials = pika.PlainCredentials(
+            username=conf_param['username'], password=conf_param['password'])
+        parameters = pika.ConnectionParameters(
+            host=host, port=port, credentials=credentials)
         connection = pika.BlockingConnection(parameters)
         channel = connection.channel()
     except Exception as exc:
-        logger.log(logging.ERROR, f'Problems with queue connection.\n' + str(exc))
+        logger.log(logging.ERROR,
+                   f'Problems with queue connection.\n' + str(exc))
         raise exc
 
     try:
-        channel.queue_declare(queue=conf_param['queue'], durable=True, arguments={'x-max-priority': 10})
+        channel.queue_declare(queue=conf_param['queue'], durable=True, arguments={
+                              'x-max-priority': 10})
     except ValueError as exc:
-        logger.log(logging.ERROR, f'Queue {conf_param["queue"]} is not declared.')
+        logger.log(logging.ERROR,
+                   f'Queue {conf_param["queue"]} is not declared.')
         raise exc
 
     locker = Event()
     locker.set()
     channel.basic_consume(queue=conf_param['queue'],
                           auto_ack=True,
-                          on_message_callback=partial(callback, logger=logger, libra_locker=locker))
+                          on_message_callback=partial(callback, logger=logger, libre_locker=locker))
     logger.info('Connection has been established.')
     print('Waiting for messages...')
     logger.info('Waiting for messages...')
@@ -114,4 +132,4 @@ def server_run():
 
 
 if __name__ == '__main__':
-    server_run()
\ No newline at end of file
+    server_run()
diff --git a/src/epub_converter/epub_converter.py b/src/epub_converter/epub_converter.py
index 17f41a2..7e5e389 100644
--- a/src/epub_converter/epub_converter.py
+++ b/src/epub_converter/epub_converter.py
@@ -18,16 +18,16 @@ from src.util.helpers import BookLogger
 from src.livecarta_config import LiveCartaConfig
 from src.data_objects import ChapterItem, NavPoint
 from src.epub_converter.css_reader import build_css_content, convert_html_soup_with_css_style
-from src.epub_converter.html_epub_preprocessor import unwrap_structural_tags, get_tags_between_chapter_marks, prepare_title, prepare_content, \
-    update_images_src_links, preprocess_footnotes
+from src.epub_converter.html_epub_preprocessor import unwrap_structural_tags, get_tags_between_chapter_marks,\
+    prepare_title, prepare_content, update_images_src_links, preprocess_footnotes
 
 
 class EpubConverter:
-    def __init__(self, file, access=None, logger=None):
-        self.file = file
+    def __init__(self, file_path, access=None, logger=None):
+        self.file_path = file_path
         self.access = access
         self.logger: BookLogger = logger
-        self.ebooklib_book = epub.read_epub(file)
+        self.ebooklib_book = epub.read_epub(file_path)
 
         # main container for all epub .xhtml files
         self.html_href2html_body_soup: Dict[str, BeautifulSoup] = {}
@@ -66,6 +66,7 @@ class EpubConverter:
         self.logger.log('HTML files reading.')
         self.html_href2html_body_soup: Dict[str,
                                             BeautifulSoup] = self.build_href2soup_content()
+        # TODO Presets
 
         self.logger.log('CSS files processing.')
         self.html_href2css_href, self.css_href2css_content = self.build_html_and_css_relations()
@@ -122,18 +123,25 @@ class EpubConverter:
             join(html_folder, path_to_css_from_html)).replace('\\', '/')
         css_obj = self.ebooklib_book.get_item_with_href(path_to_css_from_root)
         if "@import" in str(css_obj.content):
-            path_to_css_from_root = "css/" + re.search('"(.*)"', str(css_obj.content)).group(1)
-            css_obj = self.ebooklib_book.get_item_with_href(path_to_css_from_root)
+            path_to_css_from_root = "css/" + \
+                re.search('"(.*)"', str(css_obj.content)).group(1)
+            css_obj = self.ebooklib_book.get_item_with_href(
+                path_to_css_from_root)
         assert css_obj, f'Css style {css_href} was not in manifest.'
         css_content: str = css_obj.get_content().decode()
         return css_content
 
-    def build_html_and_css_relations(self):
+    def build_html_and_css_relations(self) -> tuple[dict, dict]:
         """
-        This function is designed to get 2 dictionaries:
+        Function is designed to get 2 dictionaries:
         The first is css_href2css_content. It is created to connect href of css to content of css
-        The second is html_href2css_href. It is created to connect href of html to css files(hrefs of them) which are used on this html
+        The second is html_href2css_href. It is created to connect href of html to css files(hrefs of them
+        ) which are used on this html
         ...2... = key2value
+        Returns
+        ----------
+        html_href2css_href, css_href2css_content: tuple[dict, dict]
+            dictionary: href of html to related css files, dictionary: css files to related css content
 
         """
         # dictionary: href of html to related css files
@@ -160,8 +168,7 @@ class EpubConverter:
                 html_href2css_href[html_href].append(f'href{i}')
                 css_href2css_content[f'href{i}'] = build_css_content(
                     css_content)
-
-        return html_href2css_href, css_href2css_content,
+        return html_href2css_href, css_href2css_content
 
     def add_css_styles_to_html_soup(self):
         """
@@ -178,22 +185,24 @@ class EpubConverter:
                 content = convert_html_soup_with_css_style(content, css)
                 self.html_href2html_body_soup[html_href] = content
 
-    def build_manifest_id2html_href(self):
-        links = dict()
-        for item in self.ebooklib_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
-            links[item.id] = item.file_name
-
-        return links
-
-    def build_adjacency_list_from_toc(self, element, lvl=0):
+    def build_adjacency_list_from_toc(self, element: [Link, tuple, list], lvl=0):
         """
+        Function
         self.adjacency_list builds based on TOC nested structure, got from self.ebooklib.toc
 
         key = -1 if root(top chapters),
         value = None if leaf(least chapters)
+        Parameters
+        ----------
+        element: [Link, tuple, list]
+            element that appears in TOC(usually parsed from nav.ncx)
+        lvl: int
+            level of node
 
-        :param element: [Link, tuple, list] - element that appears in TOC(usually parsed from nav.ncx)
-        :param lvl: level of depth
+        Returns
+        ----------
+        None
+            built adjacency list
 
         """
         if isinstance(element, Link):
@@ -250,6 +259,12 @@ class EpubConverter:
             return True
         return False
 
+    def build_manifest_id2html_href(self) -> dict:
+        links = dict()
+        for item in self.ebooklib_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
+            links[item.id] = item.file_name
+        return links
+
     def build_adjacency_list_from_spine(self):
         manifest_id2html_href = self.build_manifest_id2html_href()
         self.adjacency_list = {
@@ -316,7 +331,7 @@ class EpubConverter:
 
         Returns
         -------
-        full_path[0]: s
+        full_path[0]: str
             prepared content
 
         """
@@ -453,6 +468,8 @@ class EpubConverter:
         Returns
         -------
         None
+            built chapter
+
         """
         if nav_point.id:
             soup = self.html_href2html_body_soup[nav_point.href]
@@ -487,7 +504,7 @@ class EpubConverter:
                                                                     path_to_html=nav_point.href,
                                                                     access=self.access,
                                                                     path2aws_path=self.book_image_src_path2aws_path,
-                                                                    book_id=self.file.stem if hasattr(self.file, 'stem') else 'book_id')
+                                                                    book_id=self.file_path.stem if hasattr(self.file_path, 'stem') else 'book_id')
 
         is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
         title_preprocessed = prepare_title(title)
@@ -525,12 +542,12 @@ class EpubConverter:
 
 
 if __name__ == "__main__":
-    filename = '9781614382264'
-    logger_object = BookLogger(name='epub', book_id=filename)
+    epub_file_path = '../../epub/9781614382264.epub'
+    logger_object = BookLogger(
+        name='epub', book_id=epub_file_path.split('/')[-1])
 
-    json_converter = EpubConverter(f'../../epub/{filename}.epub',
-                                   logger=logger_object)
+    json_converter = EpubConverter(epub_file_path, logger=logger_object)
     content_dict = json_converter.convert_to_dict()
 
-    with codecs.open(f'../../json/{filename}.json', 'w', encoding='utf-8') as f:
-        json.dump(content_dict, f, ensure_ascii=False)
\ No newline at end of file
+    with codecs.open(epub_file_path.replace('epub', 'json'), 'w', encoding='utf-8') as f_json:
+        json.dump(content_dict, f_json, ensure_ascii=False)