diff --git a/src/docx_converter/html_docx_processor.py b/src/docx_converter/html_docx_processor.py index 7868f02..b515a37 100644 --- a/src/docx_converter/html_docx_processor.py +++ b/src/docx_converter/html_docx_processor.py @@ -137,57 +137,6 @@ class HTMLDocxProcessor: for tag in body_tag.find_all([re.compile(tag) for tag in tags]): action(body_tag=body_tag, tag=tag, rule=rule) - def _process_paragraph(self): - """Function to process
tags (text-align and text-indent value).""" - # todo debug and remove if inline is enough - paragraphs = self.body_tag.find_all("p") - - for p in paragraphs: - # libre converts some \n into
with 2
- # there we remove 1 unnecessary
- brs = p.find_all("br")
- text = p.text
-
- if brs and text == "\n\n" and len(brs) == 2:
- brs[0].decompose()
-
- indent_should_be_added = False
- if text and ((text[0:1] == "\t") or (text[:2] == "\n\t")):
- indent_should_be_added = True
-
- align = p.get("align")
- style = p.get("style")
-
- if style:
- indent = re.search(r"text-indent: ([\d.]{1,4})in", style)
- margin_left = re.search(r"margin-left: ([\d.]{1,4})in", style)
- margin_right = re.search(
- r"margin-right: ([\d.]{1,4})in", style)
- margin_top = re.search(r"margin-top: ([\d.]{1,4})in", style)
- margin_bottom = re.search(
- r"margin-bottom: ([\d.]{1,4})in", style)
- else:
- indent = margin_left = margin_right = \
- margin_top = margin_bottom = None
-
- if margin_left and margin_right and margin_top and margin_bottom and \
- margin_left.group(1) == "0.6" and margin_right.group(1) == "0.6" and \
- margin_top.group(1) == "0.14" and margin_bottom.group(1) == "0.11":
- p.wrap(BeautifulSoup(features="lxml").new_tag("blockquote"))
-
- p.attrs = {}
- style = ""
-
- if align is not None and align != LiveCartaConfig.DEFAULT_ALIGN_STYLE:
- style += f"text-align: {align};"
-
- if indent is not None or indent_should_be_added:
- # indent = indent.group(1)
- style += f"text-indent: {LiveCartaConfig.INDENT};"
-
- if style:
- p.attrs["style"] = style
-
def _process_quotes(self):
"""
Function to process block quotes.
@@ -247,10 +196,8 @@ class HTMLDocxProcessor:
if match:
size = match.group(1)
units = match.group(2)
-
if units == "pt":
size = self.convert_pt_to_px(size)
-
sizes.append(float(size))
width = td.get("width")
td.attrs = {}
@@ -259,7 +206,6 @@ class HTMLDocxProcessor:
if sizes:
border_size = sum(sizes) / len(sizes)
table.attrs["border"] = f"{border_size:.2}"
-
self.tables_amount = len(tables)
def _process_hrefs(self):
@@ -278,13 +224,6 @@ class HTMLDocxProcessor:
tag.string = tag.text.replace("\u200b", "") # zero-width-space
tag["href"] = tag.attrs.get("href").replace("%E2%80%8C", "")
- def _process_div(self):
- # todo unwrapper
- """Function to process