diff --git a/configs/.gitignore b/config/.gitignore similarity index 100% rename from configs/.gitignore rename to config/.gitignore diff --git a/consumer.py b/consumer.py index fc5b54b..4f39741 100644 --- a/consumer.py +++ b/consumer.py @@ -15,30 +15,30 @@ from src.docx_converter.docx_solver import DocxBook from src.epub_converter.epub_solver import EpubBook -def local_convert_book(book_type: [DocxBook, EpubBook], book_id: int, logger: logging.Logger, params: dict): - logger.info(f"Start processing book-{book_id}.") +def local_convert_book(book_type: [DocxBook, EpubBook], book_id: int, main_logger: logging.Logger, params: dict): + main_logger.info(f"Start processing book-{book_id}.") try: json_file_path = "books/json/9781614382264.json" - book = book_type(book_id=book_id, main_logger=logger, **params) + book = book_type(book_id=book_id, main_logger=main_logger, **params) book.conversion(json_file_path) except Exception as exc: raise exc - logger.info(f"Book-{book_id} has been proceeded.") + main_logger.info(f"Book-{book_id} has been proceeded.") -def convert_book(book_type: [DocxBook, EpubBook], book_id: int, logger: logging.Logger, params: Dict[str, Access]): - logger.info(f"Start processing book-{book_id}.") +def convert_book(book_type: [DocxBook, EpubBook], book_id: int, main_logger: logging.Logger, params: Dict[str, Access]): + main_logger.info(f"Start processing book-{book_id}.") try: - book = book_type(book_id=book_id, main_logger=logger, **params) + book = book_type(book_id=book_id, main_logger=main_logger, **params) book.conversion() except Exception as exc: raise exc - logger.info(f"Book-{book_id} has been proceeded.") + main_logger.info(f"Book-{book_id} has been proceeded.") -def callback(ch, method, properties, body: bytes, logger: logging.Logger, libre_locker: Event): +def callback(ch, method, properties, body: bytes, main_logger: logging.Logger, libre_locker: Event): print(f"Message: {body}.") - logger.info(f"Message: {body}.") + main_logger.info(f"Message: {body}.") try: data = json.loads(body) assert "apiURL" in data, "No apiURL field in received message." @@ -54,7 +54,7 @@ def callback(ch, method, properties, body: bytes, logger: logging.Logger, libre_ params = { "book_type": EpubBook if data.get("fileExtension") == "epub" else DocxBook, "book_id": data["id"], - "logger": logger, + "main_logger": main_logger, "params": book_params } @@ -64,9 +64,9 @@ def callback(ch, method, properties, body: bytes, logger: logging.Logger, libre_ # print(f"Active threads: {active_count()}.") except Exception as exc: if hasattr(exc, "message"): - logger.error(f"{sys.exc_info()[0]}: {exc.message}") + main_logger.error(f"{sys.exc_info()[0]}: {exc.message}") else: - logger.error(f"{sys.exc_info()[0]}: {str(exc)}") + main_logger.error(f"{sys.exc_info()[0]}: {str(exc)}") finally: pass @@ -78,7 +78,7 @@ def server_run(): try: folder_path = os.path.dirname(os.path.abspath(__file__)) config_path = Path(os.path.join( - folder_path, "configs/queue_config.json")) + folder_path, "config/queue_config.json")) with open(config_path, "r") as f: conf_param = json.load(f) @@ -104,7 +104,7 @@ def server_run(): locker.set() channel.basic_consume(queue=conf_param["queue"], auto_ack=True, - on_message_callback=partial(callback, logger=logger_object, libre_locker=locker)) + on_message_callback=partial(callback, main_logger=logger_object, libre_locker=locker)) logger_object.info("Connection has been established.") print("Waiting for messages...") logger_object.info("Waiting for messages...") diff --git a/src/access.py b/src/access.py index e6f74f6..379f47c 100644 --- a/src/access.py +++ b/src/access.py @@ -42,7 +42,7 @@ class Access: def set_credentials(self, url: str): folder_path: str = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) - config_path: str = os.path.join(folder_path, "configs/api_config.json") + config_path: str = os.path.join(folder_path, "config/api_config.json") with open(config_path, "r") as f: params: Dict[str, str] = json.load(f) diff --git a/src/book_solver.py b/src/book_solver.py index 3430e47..42124f1 100644 --- a/src/book_solver.py +++ b/src/book_solver.py @@ -28,10 +28,10 @@ class BookSolver: self.preset_path = None self.book_path = None # path to book file, appears after downloading from server self.book_output_path = None # path to json file - self.logger_object = BookLogger(name=f"{__name__}_{self.book_id}") - self.logger_object.configure_book_logger(book_id=book_id) + self.book_logger = BookLogger(name=f"{__name__}_{self.book_id}") + self.book_logger.configure_book_logger(book_id=book_id) self.status_wrapper = BookStatusWrapper( - access, self.logger_object, book_id) + access, self.book_logger, book_id) assert LiveCartaConfig.SUPPORTED_LEVELS == len(LiveCartaConfig.SUPPORTED_HEADERS), \ "Length of headers doesn't match allowed levels." @@ -64,12 +64,12 @@ class BookSolver: try: with open(file_path, "wb+") as file: file.write(content) - self.logger_object.log( + self.book_logger.log( f"File was saved to folder: {folder_path}.") except Exception as exc: - self.logger_object.log( + self.book_logger.log( f"Error in writing {self.book_type} file.", logging.ERROR) - self.logger_object.log_error_to_main_log() + self.book_logger.log_error_to_main_log() raise exc return file_path @@ -86,9 +86,9 @@ class BookSolver: # self.preset_path = pathlib.Path( # str(self.save_file(content, path_to_save="preset", file_type="json"))) except FileNotFoundError as f_err: - self.logger_object.log( + self.book_logger.log( "Can't get preset file from server.", logging.ERROR) - self.logger_object.log_error_to_main_log() + self.book_logger.log_error_to_main_log() raise f_err except Exception as exc: raise exc @@ -96,17 +96,17 @@ class BookSolver: def get_book_file(self): """Method for getting and saving book from server""" try: - self.logger_object.log(f"Start receiving book file from server. URL:" + self.book_logger.log(f"Start receiving book file from server. URL:" f" {self.access.url}/doc-convert/{self.book_id}/file") content = self.access.get_file( file_path=f"{self.access.url}/doc-convert/{self.book_id}/file") - self.logger_object.log("Book file was received from server.") + self.book_logger.log("Book file was received from server.") self.book_path = pathlib.Path(self.save_file( content, path_to_save=f"books/{self.book_type}", file_type=self.book_type)) except FileNotFoundError as f_err: - self.logger_object.log( + self.book_logger.log( "Can't get book file from server.", logging.ERROR) - self.logger_object.log_error_to_main_log() + self.book_logger.log_error_to_main_log() raise f_err except Exception as exc: raise exc @@ -120,7 +120,7 @@ class BookSolver: self.book_output_path = output_path self.book_output_path = pathlib.Path(self.book_output_path) - self.logger_object.log(f"Output file path: {self.book_output_path}") + self.book_logger.log(f"Output file path: {self.book_output_path}") pathlib.Path(self.book_output_path).parent.mkdir( parents=True, exist_ok=True) @@ -131,27 +131,27 @@ class BookSolver: try: with codecs.open(self.book_output_path, "w", encoding="utf-8") as f: json.dump(content, f, ensure_ascii=False) - self.logger_object.log( + self.book_logger.log( f"Data has been saved to .json file: {self.book_output_path}") except Exception as exc: - self.logger_object.log( + self.book_logger.log( "Error has occurred while writing .json file." + str(exc), logging.ERROR) def send_json_content_to_server(self, content: Dict[str, List[Dict[str, Union[List, str]]]]): """Function sends json_content to site""" try: self.access.send_book(self.book_id, content) - self.logger_object.log(f"JSON data has been sent to server.") + self.book_logger.log(f"JSON data has been sent to server.") except Exception as exc: - self.logger_object.log( + self.book_logger.log( "Error has occurred while sending json content.", logging.ERROR) - self.logger_object.log_error_to_main_log() + self.book_logger.log_error_to_main_log() self.status_wrapper.set_error() raise exc @abstractmethod def get_converted_book(self) -> Dict[str, List[Dict[str, Union[List, str]]]]: - self.logger_object.log("Beginning of processing .json output.") + self.book_logger.log("Beginning of processing .json output.") self.status_wrapper.set_generating() return {} @@ -165,23 +165,23 @@ class BookSolver: try: self.get_preset_file() self.get_book_file() - self.logger_object.log( + self.book_logger.log( f"Beginning of conversion from .{self.book_type} to .json.") self.status_wrapper.set_processing() content_dict: Dict[str, List[Dict[Union[str, List]]]] = self.get_converted_book() # todo add delete of preset path [os.remove(path) for path in [self.book_path]] - self.logger_object.log("Beginning of processing .json output.") + self.book_logger.log("Beginning of processing .json output.") self.status_wrapper.set_generating() self.write_to_json(content_dict) self.send_json_content_to_server(content_dict) - self.logger_object.log( + self.book_logger.log( f"End of the conversion to LiveCarta format. Check {self.book_output_path}.") except Exception as exc: self.status_wrapper.set_error() - self.logger_object.log( + self.book_logger.log( "Error has occurred while conversion.", logging.ERROR) - self.logger_object.log_error_to_main_log(str(exc)) + self.book_logger.log_error_to_main_log(str(exc)) raise exc def conversion_local(self, file_path: str): @@ -192,17 +192,17 @@ class BookSolver: """ try: - self.logger_object.log( + self.book_logger.log( f"Data has been downloaded from {file_path} file") self.status_wrapper.set_processing() with codecs.open(file_path, "r", encoding="utf-8") as f_json: content_dict = json.load(f_json) - self.logger_object.log("Beginning of processing .json output.") + self.book_logger.log("Beginning of processing .json output.") self.status_wrapper.set_generating() self.send_json_content_to_server(content_dict) - self.logger_object.log(f"Sent a file to server. Check LiveCarta.") + self.book_logger.log(f"Sent a file to server. Check LiveCarta.") except Exception as exc: self.status_wrapper.set_error() - self.logger_object.log( + self.book_logger.log( "Error has occurred while reading json file." + str(exc), logging.ERROR) - self.logger_object.log_error_to_main_log(str(exc)) + self.book_logger.log_error_to_main_log(str(exc)) diff --git a/src/docx_converter/docx_solver.py b/src/docx_converter/docx_solver.py index 3356ec9..d8f2f02 100644 --- a/src/docx_converter/docx_solver.py +++ b/src/docx_converter/docx_solver.py @@ -39,41 +39,41 @@ class DocxBook(BookSolver): # 1. Converts docx to html with LibreOffice try: html_converter = Docx2LibreHtml(self.book_id, self.book_path, self.access, - self.logger_object, self.libre_locker) + self.book_logger, self.libre_locker) except Exception as exc: - self.logger_object.log( + self.book_logger.log( "Error has occurred while converting .docx to .html.", logging.ERROR) - self.logger_object.log_error_to_main_log() + self.book_logger.log_error_to_main_log() self.status_wrapper.set_error() raise exc # 2. Parses and cleans html, gets list of tags, gets footnotes try: html_preprocessor = HtmlPresetsProcessor( - logger=self.logger_object, preset_path="preset/docx_presets.json") + logger=self.book_logger, preset_path="preset/docx_presets.json") style_preprocessor = StyleReader() html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, - logger=self.logger_object, + logger=self.book_logger, html_preprocessor=html_preprocessor, style_preprocessor=style_preprocessor) bs_tags, footnotes, top_level_headers = html_processor.process_html( self.access, html_converter.html_path, self.book_id) except Exception as exc: - self.logger_object.log( + self.book_logger.log( "Error has occurred while processing .html", logging.ERROR) - self.logger_object.log_error_to_main_log() + self.book_logger.log_error_to_main_log() self.status_wrapper.set_error() raise exc # 3. Parses from line structure to nested structure with JSONConverter try: json_converter = LibreHtml2JsonConverter(bs_tags, footnotes, top_level_headers, - self.logger_object) + self.book_logger) content_dict = json_converter.convert_to_dict() except Exception as exc: - self.logger_object.log( + self.book_logger.log( "Error has occurred while converting .html to .json", logging.ERROR) - self.logger_object.log_error_to_main_log() + self.book_logger.log_error_to_main_log() self.status_wrapper.set_error() raise exc return content_dict @@ -82,24 +82,24 @@ class DocxBook(BookSolver): if __name__ == "__main__": docx_file_path = f"../../books/docx/3cd6f561b8d7ee6a510c783784c9d018.docx" - logger_object = BookLogger(name="epub") - logger_object.configure_book_logger(book_id=docx_file_path.split("/")[-1]) + book_logger = BookLogger(name="epub") + book_logger.configure_book_logger(book_id=docx_file_path.split("/")[-1]) locker = Event() locker.set() html_converter = Docx2LibreHtml(file_path=docx_file_path, - logger=logger_object, libre_locker=locker) + logger=book_logger, libre_locker=locker) html_preprocessor = HtmlPresetsProcessor( - logger=logger_object, preset_path="../../preset/docx_presets.json") + logger=book_logger, preset_path="../../preset/docx_presets.json") style_preprocessor = StyleReader() - html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=logger_object, + html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=book_logger, html_preprocessor=html_preprocessor, style_preprocessor=style_preprocessor) content, footnotes, top_level_headers = html_processor.process_html( html_path=html_converter.html_path, book_id=html_converter.book_id) json_converter = LibreHtml2JsonConverter( - content, footnotes, top_level_headers, logger_object) + content, footnotes, top_level_headers, book_logger) content_dict = json_converter.convert_to_dict() with codecs.open(docx_file_path.replace("docx", "json"), "w", encoding="utf-8") as f: diff --git a/src/epub_converter/epub_converter.py b/src/epub_converter/epub_converter.py index 206111f..461b203 100644 --- a/src/epub_converter/epub_converter.py +++ b/src/epub_converter/epub_converter.py @@ -24,7 +24,7 @@ class EpubConverter: style_processor: StyleReader = None, html_processor: HtmlEpubProcessor = None): self.book_path = book_path self.access = access - self.logger: BookLogger = logger + self.book_logger: BookLogger = logger self.ebooklib_book = epub.read_epub(book_path) self.style_processor = style_processor self.html_processor = html_processor @@ -57,52 +57,52 @@ class EpubConverter: self.noterefs: List[Tag] = [] # start of the footnote self.footnotes: List[Tag] = [] # end of the footnote - self.logger.log("HTML files reading.") + self.book_logger.log("HTML files reading.") self.html_href2html_body_soup: Dict[str, BeautifulSoup] = self.build_href2soup_content() - self.logger.log("CSS inline style processing.") + self.book_logger.log("CSS inline style processing.") [self.style_processor.process_inline_styles_in_html_soup( self.html_href2html_body_soup[html_href]) for html_href in self.html_href2html_body_soup] - self.logger.log("CSS files processing.") + self.book_logger.log("CSS files processing.") self.html_href2css_href, self.css_href2css_content = self.build_html_and_css_relations() - self.logger.log("CSS styles fusion(inline+file).") + self.book_logger.log("CSS styles fusion(inline+file).") self.add_css_styles_to_html_soup() - self.logger.log("Image processing.") + self.book_logger.log("Image processing.") for x in chain(self.ebooklib_book.get_items_of_type(ebooklib.ITEM_IMAGE), self.ebooklib_book.get_items_of_type(ebooklib.ITEM_COVER)): file_name = x.file_name content = x.content self.img_href2img_bytes[file_name] = content - self.logger.log("Footnotes processing.") + self.book_logger.log("Footnotes processing.") for href in self.html_href2html_body_soup: self.footnotes_contents, self.noterefs, self.footnotes =\ preprocess_footnotes( self.html_href2html_body_soup[href], self.html_href2html_body_soup) - self.logger.log(f"Added {len(self.footnotes_contents)} footnotes.") + self.book_logger.log(f"Added {len(self.footnotes_contents)} footnotes.") - self.logger.log("TOC processing.") + self.book_logger.log("TOC processing.") self.build_adjacency_list_from_toc(self.ebooklib_book.toc) # build simple toc from spine if needed if self.is_toc_empty(): self.build_adjacency_list_from_spine() not_added = [ x for x in self.html_href2html_body_soup if x not in self.hrefs_added_to_toc] - self.logger.log(f"Html documents not added to TOC: {not_added}.") - self.logger.log(f"Add documents not added to TOC.") + self.book_logger.log(f"Html documents not added to TOC: {not_added}.") + self.book_logger.log(f"Add documents not added to TOC.") self.add_not_added_files_to_adjacency_list(not_added) - self.logger.log(f"Label subchapters with converter tag.") + self.book_logger.log(f"Label subchapters with converter tag.") self.label_subchapters_with_lc_tag() - self.logger.log(f"Process html internal links.") + self.book_logger.log(f"Process html internal links.") self.process_internal_links() - self.logger.log( + self.book_logger.log( f"Check if converter-chapter-marks are on the same level.") self.chapter_marks_are_same_level() - self.logger.log(f"Define chapters content.") + self.book_logger.log(f"Define chapters content.") self.define_chapters_with_content() - self.logger.log(f"Converting html_nodes to LiveCarta chapter items.") + self.book_logger.log(f"Converting html_nodes to LiveCarta chapter items.") def build_href2soup_content(self) -> Dict[str, BeautifulSoup]: # using EpubElements @@ -341,13 +341,13 @@ class EpubConverter: full_path = [ href_from_toc for href_from_toc in self.hrefs_added_to_toc if normed_path in href_from_toc] if not full_path: - self.logger.log(f"Error in {cur_file_path} file. No {normed_path} file found in added to TOC documents. " + self.book_logger.log(f"Error in {cur_file_path} file. No {normed_path} file found in added to TOC documents. " f"While processing href in {internal_link_tag}.") internal_link_tag.attrs["converter-mark"] = "bad-link" return None if len(full_path) > 1: - self.logger.log(f"Warning in {cur_file_path}. Multiple paths found {full_path} for file {href_in_link}" + self.book_logger.log(f"Warning in {cur_file_path}. Multiple paths found {full_path} for file {href_in_link}" f" while {internal_link_tag} processing. The first one will be chosen.") return full_path[0] @@ -433,7 +433,7 @@ class EpubConverter: anchor_html_content.find_all(attrs={"id": id_}) # if link is a footnote if anchor_tags: if len(anchor_tags) > 1: - self.logger.log(f"Warning in {html_href_from_toc}: multiple anchors:" + self.book_logger.log(f"Warning in {html_href_from_toc}: multiple anchors:" f"{len(anchor_tags)} found.\n" f"{anchor_tags}\n" f"While processing {internal_link_tag}") @@ -446,7 +446,7 @@ class EpubConverter: del internal_link_tag.attrs["href"] else: internal_link_tag.attrs["converter-mark"] = "bad-link" - self.logger.log(f"Error in {html_href_from_toc}." + self.book_logger.log(f"Error in {html_href_from_toc}." f" While processing {internal_link_tag} no anchor found." f" Should be anchor with new id={new_unique_id} in" f" {html_href_of_anchor} file." @@ -563,11 +563,11 @@ class EpubConverter: if nav_point.id else self.html_href2html_body_soup[nav_point.href] indent: str = " " * lvl - self.logger.log(indent + f"Chapter: {title} is processing.") + self.book_logger.log(indent + f"Chapter: {title} is processing.") is_chapter: bool = lvl <= LiveCartaConfig.SUPPORTED_LEVELS - self.logger.log(indent + "Process title.") + self.book_logger.log(indent + "Process title.") title_preprocessed: str = self.html_processor.prepare_title(title) - self.logger.log(indent + "Process content.") + self.book_logger.log(indent + "Process content.") content_preprocessed: Union[Tag, BeautifulSoup] = self.html_processor.prepare_content( title_preprocessed, content, remove_title_from_chapter=is_chapter) @@ -597,8 +597,8 @@ class EpubConverter: chapter = self.html_node_to_livecarta_chapter_item(tl_nav_point) top_level_chapters.append(chapter) top_level_dict_chapters = [x.to_dict() for x in top_level_chapters] - self.logger.log(f"Anchors found: {len(self.internal_anchors)}.") - self.logger.log("End conversion.") + self.book_logger.log(f"Anchors found: {len(self.internal_anchors)}.") + self.book_logger.log("End conversion.") return { "content": top_level_dict_chapters, diff --git a/src/epub_converter/epub_solver.py b/src/epub_converter/epub_solver.py index 1992aa3..ebd678d 100644 --- a/src/epub_converter/epub_solver.py +++ b/src/epub_converter/epub_solver.py @@ -35,17 +35,17 @@ class EpubBook(BookSolver): # Parses and cleans html, gets list of tags, gets footnotes try: html_preprocessor = HtmlPresetsProcessor( - logger=self.logger_object, preset_path="preset/epub_presets.json") - html_processor = HtmlEpubProcessor(logger=self.logger_object, + logger=self.book_logger, preset_path="preset/epub_presets.json") + html_processor = HtmlEpubProcessor(logger=self.book_logger, html_preprocessor=html_preprocessor) except Exception as exc: - self.logger_object.log( + self.book_logger.log( "Error has occurred while processing .html", logging.ERROR) - self.logger_object.log_error_to_main_log() + self.book_logger.log_error_to_main_log() self.status_wrapper.set_error() raise exc json_converter = EpubConverter( - self.book_path, access=self.access, logger=self.logger_object, + self.book_path, access=self.access, logger=self.book_logger, style_processor=style_preprocessor, html_processor=html_processor) content_dict = json_converter.convert_to_dict() return content_dict diff --git a/src/util/check_dirs.py b/src/util/check_dirs.py index b98cb31..f7a0af0 100644 --- a/src/util/check_dirs.py +++ b/src/util/check_dirs.py @@ -20,7 +20,7 @@ def check_dir(dir_path: str): if __name__ == "__main__": folders = parse_args().folders if not folders: - folders = ["books/epub", "books/docx", "books/html", "books/json", "logs", "configs"] + folders = ["books/epub", "books/docx", "books/html", "books/json", "logs", "config"] folder_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) folders = [os.path.join(folder_path, folder) for folder in folders] diff --git a/src/util/helpers.py b/src/util/helpers.py index 615078b..e4e63a6 100644 --- a/src/util/helpers.py +++ b/src/util/helpers.py @@ -27,20 +27,21 @@ class ColoredFormatter(logging.Formatter): return logging.Formatter.format(self, record) +def generate_file_path(filename: str): + folder_path = os.path.dirname(os.path.abspath(os.path.join(__file__ ,"../.."))) + folder_path = os.path.join(folder_path, f"logs/{time.strftime('%d-%m-%Y_%H-00')}/") + if not os.path.exists(folder_path): + os.makedirs(folder_path) + file_path = os.path.join(folder_path, filename) + return file_path + + class MainLogger: def __init__(self, name: str): self.main_logger = logging.getLogger(name) - def generate_file_path(self, filename: str): - folder_path = os.path.dirname(os.path.abspath(os.path.join(__file__ ,"../.."))) - folder_path = os.path.join(folder_path, f"logs/{time.strftime('%d-%m-%Y_%H-00')}/") - if not os.path.exists(folder_path): - os.makedirs(folder_path) - file_path = os.path.join(folder_path, filename) - return file_path - def configure_main_logger(self, filemode: str = "w+", logging_level: int = logging.INFO) -> logging.Logger: - file_path = self.generate_file_path("converter.log") + file_path = generate_file_path("converter.log") file_handler = logging.FileHandler(file_path, mode=filemode) self.main_logger.addHandler(file_handler) @@ -52,10 +53,10 @@ class MainLogger: return self.main_logger -class BookLogger(MainLogger): - def __init__(self, name: str): +class BookLogger: + def __init__(self, name: str, main_logger=None): """ - Method for Logger configuration. Logger will write to file. + Method for Logger configuration. Logger will write to file that descript book. Parameters ---------- name: str @@ -72,13 +73,13 @@ class BookLogger(MainLogger): format of record in log file """ - super().__init__(name) self.book_logger = logging.getLogger(name) self.book_logger.propagate = False + self.main_logger = main_logger def configure_book_logger(self, book_id: Union[int, str], filemode: str = "w+", logging_level: int = logging.INFO): - file_path = self.generate_file_path(f"{book_id}.log") + file_path = generate_file_path(f"{book_id}.log") book_logger_format: str = "%(asctime)s - %(levelname)s - %(message)s" \ " [%(filename)s:%(lineno)d in %(funcName)s]"