forked from LiveCarta/BookConverter
Change workong process of docx body
This commit is contained in:
@@ -13,16 +13,15 @@ from src.inline_style_processor import modify_html_soup_with_css_styles
|
||||
class HtmlDocxProcessor:
|
||||
def __init__(self, logger: BookLogger, html_soup: BeautifulSoup, html_preprocessor, style_preprocessor):
|
||||
self.logger = logger
|
||||
self.html_soup = html_soup
|
||||
self.body_tag: BeautifulSoup = BeautifulSoup(str(html_soup.body))
|
||||
self.html_preprocessor = html_preprocessor
|
||||
self.style_preprocessor = style_preprocessor
|
||||
self.content: List[Tag] = []
|
||||
|
||||
def _font_to_span(self):
|
||||
for font in self.html_soup.find_all("font"):
|
||||
for font in self.body_tag.find_all("font"):
|
||||
font.name = "span"
|
||||
|
||||
|
||||
def _process_hrefs(self):
|
||||
a_tags_with_href = self.body_tag.find_all(
|
||||
"a", {"href": re.compile("^.*http.+")})
|
||||
@@ -205,10 +204,9 @@ class HtmlDocxProcessor:
|
||||
else:
|
||||
h_tag.unwrap()
|
||||
|
||||
|
||||
def delete_content_before_toc(self):
|
||||
# remove all tag upper the <TOC> only in content !!! body tag is not updated
|
||||
toc_tag = self.html_soup.new_tag("TOC")
|
||||
toc_tag = self.body_tag.new_tag("TOC")
|
||||
if toc_tag in self.content:
|
||||
ind = self.content.index(toc_tag) + 1
|
||||
self.content = self.content[ind:]
|
||||
@@ -225,12 +223,10 @@ class HtmlDocxProcessor:
|
||||
|
||||
self.logger.log("Inline style reading.")
|
||||
self.style_preprocessor.process_inline_styles_in_html_soup(
|
||||
self.html_soup)
|
||||
self.body_tag)
|
||||
|
||||
self.logger.log("Inline style processing.")
|
||||
self.html_soup = modify_html_soup_with_css_styles(self.html_soup)
|
||||
|
||||
self.body_tag = self.html_soup.body
|
||||
self.body_tag = modify_html_soup_with_css_styles(self.body_tag)
|
||||
|
||||
self.logger.log("Image processing.")
|
||||
images = process_images(access, path_to_html=html_path,
|
||||
@@ -257,9 +253,9 @@ class HtmlDocxProcessor:
|
||||
|
||||
self.logger.log(f".html using presets processing.")
|
||||
_process_presets(html_preprocessor=self.html_preprocessor,
|
||||
html_soup=self.html_soup)
|
||||
html_soup=self.body_tag)
|
||||
|
||||
self.content = self.body_tag.find_all(recursive=False)
|
||||
self.content = self.body_tag.body.find_all(recursive=False)
|
||||
# delete text before table of content if exists
|
||||
self.delete_content_before_toc()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user