forked from LiveCarta/BookConverter
Annotations for Docx Converter
This commit is contained in:
@@ -1,12 +1,15 @@
|
||||
import re
|
||||
import logging
|
||||
from copy import copy
|
||||
from typing import List, Tuple, Dict, Union
|
||||
from bs4 import Tag
|
||||
|
||||
from src.livecarta_config import LiveCartaConfig
|
||||
|
||||
|
||||
class LibreHTML2JSONConverter:
|
||||
def __init__(self, content, footnotes, top_level_headers, logger_object, book_api_status=None):
|
||||
def __init__(self, content: List[Tag], footnotes: List[str], top_level_headers: List[Dict[str, Union[str, bool]]],
|
||||
logger_object, book_api_status=None):
|
||||
self.content_dict = None
|
||||
self.content = content
|
||||
self.footnotes = footnotes
|
||||
@@ -33,7 +36,7 @@ class LibreHTML2JSONConverter:
|
||||
return new_text
|
||||
|
||||
# TODO: rethink the function structure without indexes.
|
||||
def header_to_livecarta_chapter_item(self, ind) -> (dict, int):
|
||||
def header_to_livecarta_chapter_item(self, ind: int) -> Union[Tuple[Dict[str, Union[str, List]], int], str]:
|
||||
"""
|
||||
Function process header and collects all content for it.
|
||||
Parameters
|
||||
@@ -90,7 +93,7 @@ class LibreHTML2JSONConverter:
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _is_empty_p_tag(tag):
|
||||
def _is_empty_p_tag(tag: Tag) -> bool:
|
||||
if tag.name != "p":
|
||||
return False
|
||||
|
||||
@@ -102,7 +105,6 @@ class LibreHTML2JSONConverter:
|
||||
text = re.sub(r"\s+", "", temp_tag.text)
|
||||
if text:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def convert_to_dict(self):
|
||||
@@ -148,9 +150,7 @@ class LibreHTML2JSONConverter:
|
||||
# Add is_introduction field to json structure
|
||||
# after deleting content before toc, some chapters can be deleted
|
||||
if self.top_level_headers:
|
||||
same_first_titles = self.top_level_headers[0]["title"] == json_strc[0]["title"]
|
||||
is_first_header_introduction = not self.top_level_headers[0]["should_be_numbered"]
|
||||
|
||||
json_strc[0]["is_introduction"] = is_first_header_introduction
|
||||
|
||||
self.content_dict = {
|
||||
|
||||
Reference in New Issue
Block a user