forked from LiveCarta/BookConverter
Change workong process of docx body
This commit is contained in:
@@ -13,16 +13,15 @@ from src.inline_style_processor import modify_html_soup_with_css_styles
|
|||||||
class HtmlDocxProcessor:
|
class HtmlDocxProcessor:
|
||||||
def __init__(self, logger: BookLogger, html_soup: BeautifulSoup, html_preprocessor, style_preprocessor):
|
def __init__(self, logger: BookLogger, html_soup: BeautifulSoup, html_preprocessor, style_preprocessor):
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.html_soup = html_soup
|
self.body_tag: BeautifulSoup = BeautifulSoup(str(html_soup.body))
|
||||||
self.html_preprocessor = html_preprocessor
|
self.html_preprocessor = html_preprocessor
|
||||||
self.style_preprocessor = style_preprocessor
|
self.style_preprocessor = style_preprocessor
|
||||||
self.content: List[Tag] = []
|
self.content: List[Tag] = []
|
||||||
|
|
||||||
def _font_to_span(self):
|
def _font_to_span(self):
|
||||||
for font in self.html_soup.find_all("font"):
|
for font in self.body_tag.find_all("font"):
|
||||||
font.name = "span"
|
font.name = "span"
|
||||||
|
|
||||||
|
|
||||||
def _process_hrefs(self):
|
def _process_hrefs(self):
|
||||||
a_tags_with_href = self.body_tag.find_all(
|
a_tags_with_href = self.body_tag.find_all(
|
||||||
"a", {"href": re.compile("^.*http.+")})
|
"a", {"href": re.compile("^.*http.+")})
|
||||||
@@ -205,10 +204,9 @@ class HtmlDocxProcessor:
|
|||||||
else:
|
else:
|
||||||
h_tag.unwrap()
|
h_tag.unwrap()
|
||||||
|
|
||||||
|
|
||||||
def delete_content_before_toc(self):
|
def delete_content_before_toc(self):
|
||||||
# remove all tag upper the <TOC> only in content !!! body tag is not updated
|
# remove all tag upper the <TOC> only in content !!! body tag is not updated
|
||||||
toc_tag = self.html_soup.new_tag("TOC")
|
toc_tag = self.body_tag.new_tag("TOC")
|
||||||
if toc_tag in self.content:
|
if toc_tag in self.content:
|
||||||
ind = self.content.index(toc_tag) + 1
|
ind = self.content.index(toc_tag) + 1
|
||||||
self.content = self.content[ind:]
|
self.content = self.content[ind:]
|
||||||
@@ -225,12 +223,10 @@ class HtmlDocxProcessor:
|
|||||||
|
|
||||||
self.logger.log("Inline style reading.")
|
self.logger.log("Inline style reading.")
|
||||||
self.style_preprocessor.process_inline_styles_in_html_soup(
|
self.style_preprocessor.process_inline_styles_in_html_soup(
|
||||||
self.html_soup)
|
self.body_tag)
|
||||||
|
|
||||||
self.logger.log("Inline style processing.")
|
self.logger.log("Inline style processing.")
|
||||||
self.html_soup = modify_html_soup_with_css_styles(self.html_soup)
|
self.body_tag = modify_html_soup_with_css_styles(self.body_tag)
|
||||||
|
|
||||||
self.body_tag = self.html_soup.body
|
|
||||||
|
|
||||||
self.logger.log("Image processing.")
|
self.logger.log("Image processing.")
|
||||||
images = process_images(access, path_to_html=html_path,
|
images = process_images(access, path_to_html=html_path,
|
||||||
@@ -257,9 +253,9 @@ class HtmlDocxProcessor:
|
|||||||
|
|
||||||
self.logger.log(f".html using presets processing.")
|
self.logger.log(f".html using presets processing.")
|
||||||
_process_presets(html_preprocessor=self.html_preprocessor,
|
_process_presets(html_preprocessor=self.html_preprocessor,
|
||||||
html_soup=self.html_soup)
|
html_soup=self.body_tag)
|
||||||
|
|
||||||
self.content = self.body_tag.find_all(recursive=False)
|
self.content = self.body_tag.body.find_all(recursive=False)
|
||||||
# delete text before table of content if exists
|
# delete text before table of content if exists
|
||||||
self.delete_content_before_toc()
|
self.delete_content_before_toc()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user