Little annot fix

This commit is contained in:
Kiryl
2022-09-05 17:25:02 +03:00
parent 9e31d3152c
commit 6bd440a600
2 changed files with 13 additions and 11 deletions

View File

@@ -1,7 +1,7 @@
import re import re
import json import json
import pathlib import pathlib
from typing import List, Dict, Union from typing import List, Tuple, Dict, Union
from bs4 import BeautifulSoup, Tag, NavigableString from bs4 import BeautifulSoup, Tag, NavigableString
from src.util.helpers import BookLogger from src.util.helpers import BookLogger
@@ -26,8 +26,6 @@ class HTMLDocxProcessor:
"attr_replacer": self._replace_attr, "attr_replacer": self._replace_attr,
"unwrapper": self._unwrap_tag "unwrapper": self._unwrap_tag
} }
self.top_level_headers = None
self.content = list()
def _process_toc_links(self): def _process_toc_links(self):
"""Function to extract nodes which contains TOC links, remove links from file and detect headers.""" """Function to extract nodes which contains TOC links, remove links from file and detect headers."""
@@ -95,7 +93,7 @@ class HTMLDocxProcessor:
Function do action with tags Function do action with tags
Parameters Parameters
---------- ----------
body_tag: BeautifulSoup body_tag: Tag
Tag & contents of the chapter tag Tag & contents of the chapter tag
rules: List[Dict[str, Union[List[str], str, Dict[str, Union[List[Dict[str, str]], int, str]]]]] rules: List[Dict[str, Union[List[str], str, Dict[str, Union[List[Dict[str, str]], int, str]]]]]
list of conditions when fire function list of conditions when fire function
@@ -430,7 +428,10 @@ class HTMLDocxProcessor:
ind = self.content.index(toc_tag) + 1 ind = self.content.index(toc_tag) + 1
self.content = self.content[ind:] self.content = self.content[ind:]
def process_html(self, access=None, html_path: pathlib.Path = "", book_id: int = 0): def process_html(self,
access=None,
html_path: pathlib.Path = "",
book_id: int = 0) -> Tuple[List[Tag], List[str], List[Dict[str, Union[str, bool]]]]:
"""Process html code to satisfy LiveCarta formatting.""" """Process html code to satisfy LiveCarta formatting."""
self.logger.log("Beginning of processing .html file.") self.logger.log("Beginning of processing .html file.")
@@ -470,13 +471,14 @@ class HTMLDocxProcessor:
f"{len(self.images)} images have been processed.") f"{len(self.images)} images have been processed.")
self.logger.log("Footnotes processing.") self.logger.log("Footnotes processing.")
self.footnotes = process_footnotes(self.body_tag) self.footnotes: List[str] = process_footnotes(self.body_tag)
self.logger.log( self.logger.log(
f"{len(self.footnotes)} footnotes have been processed.") f"{len(self.footnotes)} footnotes have been processed.")
self._process_div() self._process_div()
self.top_level_headers = self._get_top_level_headers() self.top_level_headers: List[Dict[str, Union[str, bool]]]\
= self._get_top_level_headers()
self._mark_introduction_headers() self._mark_introduction_headers()
self._process_headings() self._process_headings()

View File

@@ -77,17 +77,17 @@ class StylePreprocessor:
if has_style_attrs.group(1): if has_style_attrs.group(1):
multiplier = 5.76 if is_indent else 0.16 multiplier = 5.76 if is_indent else 0.16
size_value = float(size_value.replace("%", "")) * multiplier size_value = float(size_value.replace("%", "")) * multiplier
return str(size_value)+'px' return str(size_value) + "px"
elif has_style_attrs.group(3): elif has_style_attrs.group(3):
multiplier = 18 if is_indent else 16 multiplier = 18 if is_indent else 16
size_value = float(size_value.replace("em", "")) * multiplier size_value = float(size_value.replace("em", "")) * multiplier
return str(size_value)+'px' return str(size_value) + "px"
elif has_style_attrs.group(5): elif has_style_attrs.group(5):
size_value = float(size_value.replace("pt", "")) * 4/3 size_value = float(size_value.replace("pt", "")) * 4/3
return str(size_value)+'px' return str(size_value) + "px"
elif has_style_attrs.group(7): elif has_style_attrs.group(7):
size_value = float(size_value.replace("in", "")) * 96 size_value = float(size_value.replace("in", "")) * 96
return str(size_value)+'px' return str(size_value) + "px"
else: else:
return "" return ""
return size_value return size_value