forked from LiveCarta/BookConverter
add table processing to converter
This commit is contained in:
51
src/book.py
51
src/book.py
@@ -48,6 +48,7 @@ class Book:
|
||||
self.footnotes = list()
|
||||
self.images = list()
|
||||
self.content_dict = dict()
|
||||
self.tables_amount = 0
|
||||
|
||||
assert self.SUPPORTED_LEVELS == len(self.SUPPORTED_HEADERS), \
|
||||
"Length of headers doesn't match allowd levels."
|
||||
@@ -266,7 +267,11 @@ class Book:
|
||||
u[0].unwrap()
|
||||
|
||||
@classmethod
|
||||
def convert_pt_to_px(cls, style):
|
||||
def convert_pt_to_px(cls, value):
|
||||
return round(cls.FONT_CONVERT_RATIO * float(value))
|
||||
|
||||
@classmethod
|
||||
def convert_font_pt_to_px(cls, style):
|
||||
"""
|
||||
Method converts point in the font-size to pixels.
|
||||
|
||||
@@ -279,7 +284,7 @@ class Book:
|
||||
return style
|
||||
|
||||
size = size.group(1)
|
||||
new_size = round(cls.FONT_CONVERT_RATIO * float(size))
|
||||
new_size = cls.convert_pt_to_px(size)
|
||||
|
||||
if new_size == cls.LAWCARTA_DEFAULT_FONT_SIZE:
|
||||
return ""
|
||||
@@ -298,7 +303,7 @@ class Book:
|
||||
font.attrs = {}
|
||||
font.name = "span"
|
||||
if style:
|
||||
style = self.convert_pt_to_px(style)
|
||||
style = self.convert_font_pt_to_px(style)
|
||||
if style != "":
|
||||
font.attrs["style"] = style
|
||||
if face is not None:
|
||||
@@ -402,6 +407,42 @@ class Book:
|
||||
child["class"] = "columns2"
|
||||
div.unwrap()
|
||||
|
||||
def _process_tables(self):
|
||||
"""
|
||||
Function to process tables. Set "border" attribute.
|
||||
"""
|
||||
tables = self.body_tag.find_all("table")
|
||||
for table in tables:
|
||||
tds = table.find_all("td")
|
||||
|
||||
sizes = []
|
||||
for td in tds:
|
||||
style = td.get('style')
|
||||
|
||||
if style:
|
||||
match = re.search(r"border: ?(\d+\.?\d*)(p[tx])", style)
|
||||
|
||||
if match:
|
||||
size = match.group(1)
|
||||
units = match.group(2)
|
||||
|
||||
if units == "pt":
|
||||
size = self.convert_pt_to_px(size)
|
||||
|
||||
sizes.append(float(size))
|
||||
|
||||
width = td.get('width')
|
||||
|
||||
td.attrs = {}
|
||||
if width:
|
||||
td.attrs['width'] = width
|
||||
|
||||
if sizes:
|
||||
border_size = sum(sizes)/len(sizes)
|
||||
table.attrs['border'] = f'{border_size:.2}'
|
||||
|
||||
self.tables_amount = len(tables)
|
||||
|
||||
# def _process_quotes(self):
|
||||
# """
|
||||
# Function to process <dl> tags. All tags will be replaced with <blockquote> tags.
|
||||
@@ -603,6 +644,10 @@ class Book:
|
||||
self._process_two_columns()
|
||||
# self._process_quotes()
|
||||
|
||||
self.log('Tables processing.')
|
||||
self._process_tables()
|
||||
self.log(f'{self.tables_amount} tables have been processed.')
|
||||
|
||||
self.log('Footnotes processing.')
|
||||
self._process_footnotes()
|
||||
self.log(f'{len(self.footnotes)} footnotes have been processed.')
|
||||
|
||||
Reference in New Issue
Block a user