Change documentation

This commit is contained in:
Kiryl
2022-03-28 13:24:52 +03:00
parent 2798a93def
commit 8473ff443a

View File

@@ -84,13 +84,13 @@ def preprocess_table(body_tag: BeautifulSoup):
r"[^-]width: ?(\d+\.?\d*)(p[tx])", style) r"[^-]width: ?(\d+\.?\d*)(p[tx])", style)
if width_match: if width_match:
size = width_match.group(1) size = width_match.group(1)
units = width_match.group(2)
width = size+'px' width = size+'px'
t_tag.attrs['width'] = t_tag.get('width') or width t_tag.attrs['width'] = t_tag.get('width') or width
if t_tag.attrs.get('style'): if t_tag.attrs.get('style'):
t_tag.attrs['style'] = t_tag.attrs['style'].replace('border:0;', '') t_tag.attrs['style'] = t_tag.attrs['style'].replace(
'border:0;', '')
elif t_tag.attrs.get('style') == '': elif t_tag.attrs.get('style') == '':
del t_tag.attrs['style'] del t_tag.attrs['style']
@@ -259,9 +259,7 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
def unwrap_structural_tags(body_tag): def unwrap_structural_tags(body_tag):
""" """Main function that works with structure of html. Make changes inplace.
Main function that works with structure of html.
Make changes inplace.
1. Extracts tags that are not needed 1. Extracts tags that are not needed
@@ -434,14 +432,23 @@ def unwrap_structural_tags(body_tag):
def get_tags_between_chapter_marks(first_id, href, html_soup): def get_tags_between_chapter_marks(first_id, href, html_soup):
""" """After processing on a first_id that corresponds to current chapter,
After processing on a first_id that corresponds to current chapter,
from initial html_soup all tags from current chapter are extracted from initial html_soup all tags from current chapter are extracted
:param first_id: id that point where a chapter starts. A Tag with class: 'converter-chapter-mark' Parameters
:param href: name of current chapter's file ----------
:param html_soup: soup object of current file first_id :
:return: list [Tag, NavigableString]; chapter's tags Id that point where a chapter starts. A Tag with class: 'converter-chapter-mark'
href :
Name of current chapter's file
html_soup :
Soup object of current file
Returns
-------
tags : list [Tag, NavigableString]
Chapter's tags
""" """
marked_tags = html_soup.find( marked_tags = html_soup.find(
attrs={'id': first_id, 'class': 'converter-chapter-mark'}) attrs={'id': first_id, 'class': 'converter-chapter-mark'})
@@ -604,12 +611,27 @@ def prepare_title(title_of_chapter: str) -> str:
def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_from_chapter: bool) -> str: def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_from_chapter: bool) -> str:
""" """Function finalise processing/cleaning content
Function finalise processing/cleaning content Parameters
----------
title_str : str
content_tag : BeautifulSoup
remove_title_from_chapter : bool
Steps
----------
1. cleaning \n 1. cleaning \n
2. heading removal 2. heading removal
3. processing tags 3. processing tags
4. class removal 4. class removal
Returns
-------
str
Prepared content
""" """
# 0. cleaning \n # 0. cleaning \n
to_remove = [] to_remove = []