From 9b4ecfd63c6c361f3d21f7d3eca41190bbb136c1 Mon Sep 17 00:00:00 2001 From: Kiryl Date: Mon, 27 Jun 2022 19:13:39 +0300 Subject: [PATCH] Add function - insert certain tags in parent tags --- src/epub_converter/html_epub_preprocessor.py | 87 ++++---------------- 1 file changed, 15 insertions(+), 72 deletions(-) diff --git a/src/epub_converter/html_epub_preprocessor.py b/src/epub_converter/html_epub_preprocessor.py index 60cc91e..450d776 100644 --- a/src/epub_converter/html_epub_preprocessor.py +++ b/src/epub_converter/html_epub_preprocessor.py @@ -210,28 +210,6 @@ def _remove_headings_content(content_tag, title_of_chapter: str): break -# todo remove -def _process_lists(chapter_tag: BeautifulSoup): - """ - Function - - process tags
  • . - - unwrap

    tags. - Parameters - ---------- - chapter_tag: Tag, soup object - - Returns - ------- - None - - """ - li_tags = chapter_tag.find_all("li") - for li_tag in li_tags: - if li_tag.p: - li_tag.attrs.update(li_tag.p.attrs) - li_tag.p.unwrap() - - def _preprocess_table(chapter_tag: BeautifulSoup): """Function to preprocess tables and tags(td|th|tr): style""" tables = chapter_tag.find_all("table") @@ -257,53 +235,20 @@ def _preprocess_table(chapter_tag: BeautifulSoup): table.attrs["border"] = "1" -def _preprocess_code_tags(chapter_tag: BeautifulSoup): - """ - Function - - transform , , tags into span - - add code style to this tags (if there is no) - Parameters - ---------- - chapter_tag: Tag, soup object - - Returns - ------- - None - - """ - for code in chapter_tag.find_all(re.compile("code|kbd|var")): - if not code.parent.name == "pre": - code.name = "span" - if not code.attrs.get("style"): - code.attrs["style"] = "font-size: 14px; font-family: courier new,courier,monospace;" - continue - - - -def _preprocess_pre_tags(chapter_tag: BeautifulSoup): - """ - Function preprocessing

     tags
    -    Wrap string of the tag with  if its necessary
    -    Parameters
    -    ----------
    -    chapter_tag: Tag, soup object
    -
    -    Returns
    -    ----------
    -    None
    -        Modified chapter tag
    -
    -    """
    -    for pre in chapter_tag.find_all("pre"):
    -        if pre.find_all("code|kbd|var"):
    -            continue
    -        else:
    -            code = chapter_tag.new_tag("code")
    -            # insert all items that was in pre to code and remove from pre
    -            for content in reversed(pre.contents):
    -                code.insert(0, content.extract())
    -            # wrap code with items
    -            pre.append(code)
    +def _insert_tags_in_parents(chapter_tag):
    +    parent_tag2condition = {parent[0]: parent[1] for parent in LiveCartaConfig.INSERT_TAG_IN_PARENT_TAG.keys()}
    +    for parent_tag_name, condition in parent_tag2condition.items():
    +        for parent_tag in chapter_tag.select(parent_tag_name):
    +            if parent_tag.select(condition):
    +                continue
    +            else:
    +                tag_to_insert = chapter_tag.new_tag(
    +                    LiveCartaConfig.INSERT_TAG_IN_PARENT_TAG[(parent_tag_name, condition)])
    +                # insert all items that was in pre to code and remove from pre
    +                for content in reversed(parent_tag.contents):
    +                    tag_to_insert.insert(0, content.extract())
    +                # wrap code with items
    +                parent_tag.append(tag_to_insert)
     
     
     def _class_removing(chapter_tag):
    @@ -353,10 +298,8 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
             _remove_headings_content(content_tag, title_str)
     
         # 4. processing tags (
  • , , ,
    , 
    , ) - _process_lists(content_tag) # todo regex _preprocess_table(content_tag) - _preprocess_code_tags(content_tag) # todo regex - _preprocess_pre_tags(content_tag) # todo regex + _insert_tags_in_parents(content_tag) # 5. remove classes that weren't created by converter _class_removing(content_tag)