Annotations for Docx Converter

This commit is contained in:
Kiryl
2022-08-05 12:36:39 +03:00
parent 2122fb82fa
commit 18642ec5fd
6 changed files with 86 additions and 92 deletions

View File

@@ -1,12 +1,15 @@
import re
import logging
from copy import copy
from typing import List, Tuple, Dict, Union
from bs4 import Tag
from src.livecarta_config import LiveCartaConfig
class LibreHTML2JSONConverter:
def __init__(self, content, footnotes, top_level_headers, logger_object, book_api_status=None):
def __init__(self, content: List[Tag], footnotes: List[str], top_level_headers: List[Dict[str, Union[str, bool]]],
logger_object, book_api_status=None):
self.content_dict = None
self.content = content
self.footnotes = footnotes
@@ -33,7 +36,7 @@ class LibreHTML2JSONConverter:
return new_text
# TODO: rethink the function structure without indexes.
def header_to_livecarta_chapter_item(self, ind) -> (dict, int):
def header_to_livecarta_chapter_item(self, ind: int) -> Union[Tuple[Dict[str, Union[str, List]], int], str]:
"""
Function process header and collects all content for it.
Parameters
@@ -90,7 +93,7 @@ class LibreHTML2JSONConverter:
return ""
@staticmethod
def _is_empty_p_tag(tag):
def _is_empty_p_tag(tag: Tag) -> bool:
if tag.name != "p":
return False
@@ -102,7 +105,6 @@ class LibreHTML2JSONConverter:
text = re.sub(r"\s+", "", temp_tag.text)
if text:
return False
return True
def convert_to_dict(self):
@@ -148,9 +150,7 @@ class LibreHTML2JSONConverter:
# Add is_introduction field to json structure
# after deleting content before toc, some chapters can be deleted
if self.top_level_headers:
same_first_titles = self.top_level_headers[0]["title"] == json_strc[0]["title"]
is_first_header_introduction = not self.top_level_headers[0]["should_be_numbered"]
json_strc[0]["is_introduction"] = is_first_header_introduction
self.content_dict = {