forked from LiveCarta/BookConverter
epub converter: add table processing, class removal
This commit is contained in:
@@ -76,6 +76,12 @@ LIVECARTA_STYLE_ATTRS = {
|
|||||||
'vertical-align': ['super'], # <sup>
|
'vertical-align': ['super'], # <sup>
|
||||||
'color': [],
|
'color': [],
|
||||||
'background-color': [],
|
'background-color': [],
|
||||||
|
'width': [],
|
||||||
|
'border-top-width': [],
|
||||||
|
'border-right-width': [],
|
||||||
|
'border-left-width': [],
|
||||||
|
'border-bottom-width': [],
|
||||||
|
'border': []
|
||||||
}
|
}
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@@ -85,7 +91,7 @@ Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING shou
|
|||||||
to suit livecarta style convention.
|
to suit livecarta style convention.
|
||||||
"""
|
"""
|
||||||
LIVECARTA_STYLE_ATTRS_MAPPING = {
|
LIVECARTA_STYLE_ATTRS_MAPPING = {
|
||||||
'text-indent': lambda x: LawCartaConfig.INDENT,
|
'text-indent': lambda x: LawCartaConfig.INDENT if x != '0' else '',
|
||||||
'font-variant': lambda x: x,
|
'font-variant': lambda x: x,
|
||||||
'text-align': lambda x: x,
|
'text-align': lambda x: x,
|
||||||
'font': lambda x: '',
|
'font': lambda x: '',
|
||||||
@@ -93,6 +99,10 @@ LIVECARTA_STYLE_ATTRS_MAPPING = {
|
|||||||
'font-size': convert_font_size,
|
'font-size': convert_font_size,
|
||||||
'color': lambda x: LawCartaConfig.HTML42LIVECARTA_COLORS.get(str2color_name(x), ''),
|
'color': lambda x: LawCartaConfig.HTML42LIVECARTA_COLORS.get(str2color_name(x), ''),
|
||||||
'background-color': lambda x: LawCartaConfig.HTML42LIVECARTA_COLORS.get(str2color_name(x), ''),
|
'background-color': lambda x: LawCartaConfig.HTML42LIVECARTA_COLORS.get(str2color_name(x), ''),
|
||||||
|
'border-top-width': lambda x: x if x != '0' else '',
|
||||||
|
'border-right-width': lambda x: x if x != '0' else '',
|
||||||
|
'border-left-width': lambda x: x if x != '0' else '',
|
||||||
|
'border-bottom-width': lambda x: x if x != '0' else '',
|
||||||
}
|
}
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@@ -162,7 +172,7 @@ def clean_css(css):
|
|||||||
def add_inline_style_to_html_soup(soup1, css_text):
|
def add_inline_style_to_html_soup(soup1, css_text):
|
||||||
livecarta_tmp_ids = []
|
livecarta_tmp_ids = []
|
||||||
h_regex = f'(^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$)'
|
h_regex = f'(^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$)'
|
||||||
for i, x in enumerate(soup1.find_all(re.compile('(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)' + h_regex))):
|
for i, x in enumerate(soup1.find_all(re.compile('(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|' + h_regex))):
|
||||||
x.attrs['livecarta_id'] = i
|
x.attrs['livecarta_id'] = i
|
||||||
livecarta_tmp_ids.append(i)
|
livecarta_tmp_ids.append(i)
|
||||||
|
|
||||||
|
|||||||
@@ -51,8 +51,44 @@ def preprocess_figure():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def preprocess_table():
|
def preprocess_table(body_tag: BeautifulSoup):
|
||||||
pass
|
tables = body_tag.find_all("table")
|
||||||
|
for table in tables:
|
||||||
|
tds = table.find_all("td")
|
||||||
|
|
||||||
|
border_sizes = []
|
||||||
|
for td in tds:
|
||||||
|
style = td.get('style')
|
||||||
|
width = ''
|
||||||
|
if style:
|
||||||
|
border_match = re.search(r"border: ?(\d+\.?\d*)(p[tx])", style) or\
|
||||||
|
re.search(r"border-top-width: ?(\d+\.?\d*)(p[tx])", style) or\
|
||||||
|
re.search(r"border-left-width: ?(\d+\.?\d*)(p[tx])", style) or \
|
||||||
|
re.search(r"border-right-width: ?(\d+\.?\d*)(p[tx])", style) or \
|
||||||
|
re.search(r"border-bottom-width: ?(\d+\.?\d*)(p[tx])", style)
|
||||||
|
|
||||||
|
if border_match:
|
||||||
|
size = border_match.group(1)
|
||||||
|
units = border_match.group(2)
|
||||||
|
border_sizes.append(float(size))
|
||||||
|
|
||||||
|
width_match = re.search(r"[^-]width: ?(\d+\.?\d*)(p[tx])", style)
|
||||||
|
|
||||||
|
if width_match:
|
||||||
|
size = width_match.group(1)
|
||||||
|
units = width_match.group(2)
|
||||||
|
width = size+'px'
|
||||||
|
|
||||||
|
width = td.get('width') or width
|
||||||
|
|
||||||
|
td.attrs = {}
|
||||||
|
if width:
|
||||||
|
td.attrs['width'] = width
|
||||||
|
|
||||||
|
if border_sizes:
|
||||||
|
border_size = sum(border_sizes) / len(border_sizes)
|
||||||
|
print(border_size)
|
||||||
|
table.attrs['border'] = f'{border_size:.2}'
|
||||||
|
|
||||||
|
|
||||||
def _process_lists(body_tag):
|
def _process_lists(body_tag):
|
||||||
@@ -275,6 +311,11 @@ def prepare_title_and_content(title, content_tag: BeautifulSoup):
|
|||||||
clean_headings_content(content_tag, title_str)
|
clean_headings_content(content_tag, title_str)
|
||||||
_process_lists(content_tag)
|
_process_lists(content_tag)
|
||||||
_preprocessing_headings(content_tag)
|
_preprocessing_headings(content_tag)
|
||||||
|
preprocess_table(content_tag)
|
||||||
|
# 2. class removal
|
||||||
|
for tag in content_tag.find_all(recursive=True):
|
||||||
|
if hasattr(tag, 'attrs') and tag.attrs.get('class'):
|
||||||
|
del tag.attrs['class']
|
||||||
|
|
||||||
# content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
|
# content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
|
||||||
title_str = clean_title_from_numbering(title_str)
|
title_str = clean_title_from_numbering(title_str)
|
||||||
|
|||||||
Reference in New Issue
Block a user