Wrote documentation for every func/class in .py

2021-12-10 10:53:40 +03:00
parent ef3502cd0a
commit 4b1109e6b4
13 changed files with 198 additions and 172 deletions
--- a/src/docx_converter/html_docx_preprocessor.py
+++ b/src/docx_converter/html_docx_preprocessor.py
@@ -35,9 +35,7 @@ class HTMLDocxPreprocessor:
                tag.unwrap()

    def _clean_underline_links(self):
-        """
-        Function cleans meaningless <u> tags before links.
-        """
+        """Function cleans meaningless <u> tags before links."""
        underlines = self.body_tag.find_all("u")
        for u in underlines:
            if u.find_all('a'):
@@ -79,9 +77,7 @@ class HTMLDocxPreprocessor:
        return re.sub(size + "pt", str(new_size) + "px", style)

    def _font_to_span(self):
-        """
-        Function to convert <font> tag to <span>. If font style is default, then remove this tag.
-        """
+        """Function to convert <font> tag to <span>. If font style is default, then remove this tag."""
        fonts = self.body_tag.find_all("font")
        for font in fonts:
            face = font.get("face")
@@ -119,9 +115,7 @@ class HTMLDocxPreprocessor:
            self.content = self.content[ind:]

    def clean_trash(self):
-        """
-        Function to remove all styles and tags we don't need.
-        """
+        """Function to remove all styles and tags we don't need."""
        self._clean_tag('span', 'style', re.compile(r'^background: #[0-9a-fA-F]{6}$'))
        self._clean_tag('span', 'lang', re.compile(r'^ru-RU$'))  # todo: check for another languages
        self._clean_tag('span', 'style', re.compile('^letter-spacing: -?[\d\.]+pt$'))
@@ -140,9 +134,7 @@ class HTMLDocxPreprocessor:
            table.decompose()

    def _process_paragraph(self):
-        """
-        Function to process <p> tags (text-align and text-indent value).
-        """
+        """Function to process <p> tags (text-align and text-indent value)."""
        paragraphs = self.body_tag.find_all('p')

        for p in paragraphs:
@@ -193,9 +185,7 @@ class HTMLDocxPreprocessor:
                p.attrs['style'] = style

    def _process_two_columns(self):
-        """
-        Function to process paragraphs which has two columns layout.
-        """
+        """Function to process paragraphs which has two columns layout."""
        two_columns = self.body_tag.find_all("div", style="column-count: 2")
        for div in two_columns:
            for child in div.children:
@@ -204,9 +194,7 @@ class HTMLDocxPreprocessor:
            div.unwrap()

    def _process_tables(self):
-        """
-        Function to process tables. Set "border" attribute.
-        """
+        """Function to process tables. Set "border" attribute."""
        tables = self.body_tag.find_all("table")
        for table in tables:
            tds = table.find_all("td")
@@ -296,9 +284,7 @@ class HTMLDocxPreprocessor:
        return content.strip()

    def _process_footnotes(self):
-        """
-        Function returns list of footnotes and delete them from html_soup.
-        """
+        """Function returns list of footnotes and delete them from html_soup."""
        footnote_anchors = self.body_tag.find_all('a', class_='sdfootnoteanc')
        footnote_content = self.body_tag.find_all('div', id=re.compile(r'^sdfootnote\d+$'))
        footnote_amt = len(footnote_anchors)
@@ -404,9 +390,7 @@ class HTMLDocxPreprocessor:
            div.decompose()

    def _process_div(self):
-        """
-        Function to process <div> tags. All the tags will be deleted from file, all content of the tags will stay.
-        """
+        """Function to process <div> tags. All the tags will be deleted from file, all content of the tags will stay."""
        divs = self.body_tag.find_all("div")

        for div in divs:
@@ -423,9 +407,7 @@ class HTMLDocxPreprocessor:
        return len(toc_links) > 0

    def _process_toc_links(self):
-        """
-        Function to extract nodes which contains TOC links, remove links from file and detect headers.
-        """
+        """Function to extract nodes which contains TOC links, remove links from file and detect headers."""
        toc_links = self.body_tag.find_all("a", {'name': re.compile(r'^_Toc\d+')})
        headers = [link.parent for link in toc_links]
        outline_level = "1"  # All the unknown outlines will be predicted as <h1>
@@ -448,13 +430,11 @@ class HTMLDocxPreprocessor:

    @staticmethod
    def clean_title_from_numbering(title: str):
-        """
-        Function to remove digits  from headers.
-        """
+        """Function to remove digits  from headers."""
        title = re.sub(r'^(\s+)+', '', title)
        title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title)
        # title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title)  # delete chapter numbering from the title
-        title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title)
+        title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title) # delete chapter I, (ABC) from the title
        return title

    @staticmethod
@@ -485,9 +465,7 @@ class HTMLDocxPreprocessor:
                self.apply_func_to_last_child(children[0], func)

    def _preprocessing_headings(self):
-        """
-        Function to convert all lower level headings to p tags
-        """
+        """Function to convert all lower level headings to p tags"""
        pattern = f'^h[{LiveCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
        header_tags = self.body_tag.find_all(re.compile(pattern))
        for tag in header_tags:
@@ -561,9 +539,7 @@ class HTMLDocxPreprocessor:
                self.top_level_headers[i]['should_be_numbered'] = True

    def _process_headings(self):
-        """
-        Function to process tags <h>.
-        """
+        """Function to process tags <h>."""
        header_tags = self.body_tag.find_all(re.compile("^h[1-9]$"))

        # 1. remove <b>, <span>
@@ -634,9 +610,7 @@ class HTMLDocxPreprocessor:
            il_tag.p.unwrap()

    def process_html(self, access, html_path, book_id):
-        """
-        Process html code to satisfy LiveCarta formatting.
-        """
+        """Process html code to satisfy LiveCarta formatting."""
        try:
            self.logger_object.log(f'Processing TOC and headers.')
            self._process_toc_links()