forked from LiveCarta/BookConverter
fix quotes processing
This commit is contained in:
57
src/book.py
57
src/book.py
@@ -362,13 +362,31 @@ class Book:
|
||||
|
||||
if style:
|
||||
indent = re.search(r'text-indent: ([\d\.]{1,4})in', style)
|
||||
margin_left = re.search(r'margin-left: ([\d\.]{1,4})in', style)
|
||||
margin_right= re.search(r'margin-right: ([\d\.]{1,4})in', style)
|
||||
margin_top = re.search(r'margin-top: ([\d\.]{1,4})in', style)
|
||||
margin_bottom = re.search(r'margin-bottom: ([\d\.]{1,4})in', style)
|
||||
else:
|
||||
indent = None
|
||||
margin_left = None
|
||||
margin_right = None
|
||||
margin_top = None
|
||||
margin_bottom = None
|
||||
|
||||
if margin_left and margin_right and margin_top and margin_bottom and \
|
||||
margin_left.group(1) == '0.6' and margin_right.group(1) == '0.6' and \
|
||||
margin_top.group(1) == '0.14' and margin_bottom.group(1) == '0.11':
|
||||
blockquote = BeautifulSoup(features='lxml').new_tag('blockquote')
|
||||
blockquote.append(BeautifulSoup(features='lxml').new_tag('p'))
|
||||
else:
|
||||
blockquote = None
|
||||
|
||||
p.attrs = {}
|
||||
style = ''
|
||||
|
||||
if align is not None and align != self.DEFAULT_ALIGN_STYLE:
|
||||
style += f'text-align: {align};'
|
||||
|
||||
if indent is not None:
|
||||
indent = indent.group(1)
|
||||
style += f'text-indent: {indent}in;'
|
||||
@@ -376,6 +394,11 @@ class Book:
|
||||
if style:
|
||||
p.attrs['style'] = style
|
||||
|
||||
if blockquote:
|
||||
blockquote.p.attrs = p.attrs
|
||||
blockquote.p.string = p.text
|
||||
p.replace_with(blockquote)
|
||||
|
||||
def _process_two_columns(self):
|
||||
"""
|
||||
Function to process paragraphs which has two columns layout.
|
||||
@@ -387,20 +410,20 @@ class Book:
|
||||
child["class"] = "columns2"
|
||||
div.unwrap()
|
||||
|
||||
def _process_quotes(self):
|
||||
"""
|
||||
Function to process <dl> tags. All tags will be replaced with <blockquote> tags.
|
||||
"""
|
||||
dls = self.body_tag.find_all('dl')
|
||||
|
||||
for dl in dls:
|
||||
pars = dl.find_all('p')
|
||||
for p in pars:
|
||||
p.wrap(BeautifulSoup(features='lxml').new_tag('blockquote'))
|
||||
new_div = BeautifulSoup(features='lxml').new_tag('div')
|
||||
for p in pars:
|
||||
new_div.append(p.parent)
|
||||
dl.replaceWith(new_div)
|
||||
# def _process_quotes(self):
|
||||
# """
|
||||
# Function to process <dl> tags. All tags will be replaced with <blockquote> tags.
|
||||
# """
|
||||
# dls = self.body_tag.find_all('dl')
|
||||
#
|
||||
# for dl in dls:
|
||||
# pars = dl.find_all('p')
|
||||
# for p in pars:
|
||||
# p.wrap(BeautifulSoup(features='lxml').new_tag('blockquote'))
|
||||
# new_div = BeautifulSoup(features='lxml').new_tag('div')
|
||||
# for p in pars:
|
||||
# new_div.append(p.parent)
|
||||
# dl.replaceWith(new_div)
|
||||
|
||||
@staticmethod
|
||||
def _clean_footnote_content(content):
|
||||
@@ -575,7 +598,7 @@ class Book:
|
||||
self._preprocessing_headings()
|
||||
self._process_paragraph()
|
||||
self._process_two_columns()
|
||||
self._process_quotes()
|
||||
# self._process_quotes()
|
||||
|
||||
self.log('Footnotes processing.')
|
||||
self._process_footnotes()
|
||||
@@ -766,8 +789,8 @@ class Book:
|
||||
|
||||
if __name__ == "__main__":
|
||||
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
file_path = pathlib.Path(os.path.join(folder_path, 'html/11/11.html'))
|
||||
out_path = pathlib.Path(os.path.join(folder_path, 'json/11.json'))
|
||||
file_path = pathlib.Path(os.path.join(folder_path, 'html/0/quote.html'))
|
||||
out_path = pathlib.Path(os.path.join(folder_path, 'json/quote.json'))
|
||||
|
||||
logging_format = '%(asctime)s - %(levelname)s - %(message)s'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user