forked from LiveCarta/BookConverter
Little annot fix
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import pathlib
|
import pathlib
|
||||||
from typing import List, Dict, Union
|
from typing import List, Tuple, Dict, Union
|
||||||
from bs4 import BeautifulSoup, Tag, NavigableString
|
from bs4 import BeautifulSoup, Tag, NavigableString
|
||||||
|
|
||||||
from src.util.helpers import BookLogger
|
from src.util.helpers import BookLogger
|
||||||
@@ -26,8 +26,6 @@ class HTMLDocxProcessor:
|
|||||||
"attr_replacer": self._replace_attr,
|
"attr_replacer": self._replace_attr,
|
||||||
"unwrapper": self._unwrap_tag
|
"unwrapper": self._unwrap_tag
|
||||||
}
|
}
|
||||||
self.top_level_headers = None
|
|
||||||
self.content = list()
|
|
||||||
|
|
||||||
def _process_toc_links(self):
|
def _process_toc_links(self):
|
||||||
"""Function to extract nodes which contains TOC links, remove links from file and detect headers."""
|
"""Function to extract nodes which contains TOC links, remove links from file and detect headers."""
|
||||||
@@ -95,7 +93,7 @@ class HTMLDocxProcessor:
|
|||||||
Function do action with tags
|
Function do action with tags
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
body_tag: BeautifulSoup
|
body_tag: Tag
|
||||||
Tag & contents of the chapter tag
|
Tag & contents of the chapter tag
|
||||||
rules: List[Dict[str, Union[List[str], str, Dict[str, Union[List[Dict[str, str]], int, str]]]]]
|
rules: List[Dict[str, Union[List[str], str, Dict[str, Union[List[Dict[str, str]], int, str]]]]]
|
||||||
list of conditions when fire function
|
list of conditions when fire function
|
||||||
@@ -430,7 +428,10 @@ class HTMLDocxProcessor:
|
|||||||
ind = self.content.index(toc_tag) + 1
|
ind = self.content.index(toc_tag) + 1
|
||||||
self.content = self.content[ind:]
|
self.content = self.content[ind:]
|
||||||
|
|
||||||
def process_html(self, access=None, html_path: pathlib.Path = "", book_id: int = 0):
|
def process_html(self,
|
||||||
|
access=None,
|
||||||
|
html_path: pathlib.Path = "",
|
||||||
|
book_id: int = 0) -> Tuple[List[Tag], List[str], List[Dict[str, Union[str, bool]]]]:
|
||||||
"""Process html code to satisfy LiveCarta formatting."""
|
"""Process html code to satisfy LiveCarta formatting."""
|
||||||
self.logger.log("Beginning of processing .html file.")
|
self.logger.log("Beginning of processing .html file.")
|
||||||
|
|
||||||
@@ -470,13 +471,14 @@ class HTMLDocxProcessor:
|
|||||||
f"{len(self.images)} images have been processed.")
|
f"{len(self.images)} images have been processed.")
|
||||||
|
|
||||||
self.logger.log("Footnotes processing.")
|
self.logger.log("Footnotes processing.")
|
||||||
self.footnotes = process_footnotes(self.body_tag)
|
self.footnotes: List[str] = process_footnotes(self.body_tag)
|
||||||
self.logger.log(
|
self.logger.log(
|
||||||
f"{len(self.footnotes)} footnotes have been processed.")
|
f"{len(self.footnotes)} footnotes have been processed.")
|
||||||
|
|
||||||
self._process_div()
|
self._process_div()
|
||||||
|
|
||||||
self.top_level_headers = self._get_top_level_headers()
|
self.top_level_headers: List[Dict[str, Union[str, bool]]]\
|
||||||
|
= self._get_top_level_headers()
|
||||||
self._mark_introduction_headers()
|
self._mark_introduction_headers()
|
||||||
|
|
||||||
self._process_headings()
|
self._process_headings()
|
||||||
|
|||||||
@@ -77,17 +77,17 @@ class StylePreprocessor:
|
|||||||
if has_style_attrs.group(1):
|
if has_style_attrs.group(1):
|
||||||
multiplier = 5.76 if is_indent else 0.16
|
multiplier = 5.76 if is_indent else 0.16
|
||||||
size_value = float(size_value.replace("%", "")) * multiplier
|
size_value = float(size_value.replace("%", "")) * multiplier
|
||||||
return str(size_value)+'px'
|
return str(size_value) + "px"
|
||||||
elif has_style_attrs.group(3):
|
elif has_style_attrs.group(3):
|
||||||
multiplier = 18 if is_indent else 16
|
multiplier = 18 if is_indent else 16
|
||||||
size_value = float(size_value.replace("em", "")) * multiplier
|
size_value = float(size_value.replace("em", "")) * multiplier
|
||||||
return str(size_value)+'px'
|
return str(size_value) + "px"
|
||||||
elif has_style_attrs.group(5):
|
elif has_style_attrs.group(5):
|
||||||
size_value = float(size_value.replace("pt", "")) * 4/3
|
size_value = float(size_value.replace("pt", "")) * 4/3
|
||||||
return str(size_value)+'px'
|
return str(size_value) + "px"
|
||||||
elif has_style_attrs.group(7):
|
elif has_style_attrs.group(7):
|
||||||
size_value = float(size_value.replace("in", "")) * 96
|
size_value = float(size_value.replace("in", "")) * 96
|
||||||
return str(size_value)+'px'
|
return str(size_value) + "px"
|
||||||
else:
|
else:
|
||||||
return ""
|
return ""
|
||||||
return size_value
|
return size_value
|
||||||
|
|||||||
Reference in New Issue
Block a user