forked from LiveCarta/BookConverter
fix quotes processing
This commit is contained in:
57
src/book.py
57
src/book.py
@@ -362,13 +362,31 @@ class Book:
|
|||||||
|
|
||||||
if style:
|
if style:
|
||||||
indent = re.search(r'text-indent: ([\d\.]{1,4})in', style)
|
indent = re.search(r'text-indent: ([\d\.]{1,4})in', style)
|
||||||
|
margin_left = re.search(r'margin-left: ([\d\.]{1,4})in', style)
|
||||||
|
margin_right= re.search(r'margin-right: ([\d\.]{1,4})in', style)
|
||||||
|
margin_top = re.search(r'margin-top: ([\d\.]{1,4})in', style)
|
||||||
|
margin_bottom = re.search(r'margin-bottom: ([\d\.]{1,4})in', style)
|
||||||
else:
|
else:
|
||||||
indent = None
|
indent = None
|
||||||
|
margin_left = None
|
||||||
|
margin_right = None
|
||||||
|
margin_top = None
|
||||||
|
margin_bottom = None
|
||||||
|
|
||||||
|
if margin_left and margin_right and margin_top and margin_bottom and \
|
||||||
|
margin_left.group(1) == '0.6' and margin_right.group(1) == '0.6' and \
|
||||||
|
margin_top.group(1) == '0.14' and margin_bottom.group(1) == '0.11':
|
||||||
|
blockquote = BeautifulSoup(features='lxml').new_tag('blockquote')
|
||||||
|
blockquote.append(BeautifulSoup(features='lxml').new_tag('p'))
|
||||||
|
else:
|
||||||
|
blockquote = None
|
||||||
|
|
||||||
p.attrs = {}
|
p.attrs = {}
|
||||||
style = ''
|
style = ''
|
||||||
|
|
||||||
if align is not None and align != self.DEFAULT_ALIGN_STYLE:
|
if align is not None and align != self.DEFAULT_ALIGN_STYLE:
|
||||||
style += f'text-align: {align};'
|
style += f'text-align: {align};'
|
||||||
|
|
||||||
if indent is not None:
|
if indent is not None:
|
||||||
indent = indent.group(1)
|
indent = indent.group(1)
|
||||||
style += f'text-indent: {indent}in;'
|
style += f'text-indent: {indent}in;'
|
||||||
@@ -376,6 +394,11 @@ class Book:
|
|||||||
if style:
|
if style:
|
||||||
p.attrs['style'] = style
|
p.attrs['style'] = style
|
||||||
|
|
||||||
|
if blockquote:
|
||||||
|
blockquote.p.attrs = p.attrs
|
||||||
|
blockquote.p.string = p.text
|
||||||
|
p.replace_with(blockquote)
|
||||||
|
|
||||||
def _process_two_columns(self):
|
def _process_two_columns(self):
|
||||||
"""
|
"""
|
||||||
Function to process paragraphs which has two columns layout.
|
Function to process paragraphs which has two columns layout.
|
||||||
@@ -387,20 +410,20 @@ class Book:
|
|||||||
child["class"] = "columns2"
|
child["class"] = "columns2"
|
||||||
div.unwrap()
|
div.unwrap()
|
||||||
|
|
||||||
def _process_quotes(self):
|
# def _process_quotes(self):
|
||||||
"""
|
# """
|
||||||
Function to process <dl> tags. All tags will be replaced with <blockquote> tags.
|
# Function to process <dl> tags. All tags will be replaced with <blockquote> tags.
|
||||||
"""
|
# """
|
||||||
dls = self.body_tag.find_all('dl')
|
# dls = self.body_tag.find_all('dl')
|
||||||
|
#
|
||||||
for dl in dls:
|
# for dl in dls:
|
||||||
pars = dl.find_all('p')
|
# pars = dl.find_all('p')
|
||||||
for p in pars:
|
# for p in pars:
|
||||||
p.wrap(BeautifulSoup(features='lxml').new_tag('blockquote'))
|
# p.wrap(BeautifulSoup(features='lxml').new_tag('blockquote'))
|
||||||
new_div = BeautifulSoup(features='lxml').new_tag('div')
|
# new_div = BeautifulSoup(features='lxml').new_tag('div')
|
||||||
for p in pars:
|
# for p in pars:
|
||||||
new_div.append(p.parent)
|
# new_div.append(p.parent)
|
||||||
dl.replaceWith(new_div)
|
# dl.replaceWith(new_div)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _clean_footnote_content(content):
|
def _clean_footnote_content(content):
|
||||||
@@ -575,7 +598,7 @@ class Book:
|
|||||||
self._preprocessing_headings()
|
self._preprocessing_headings()
|
||||||
self._process_paragraph()
|
self._process_paragraph()
|
||||||
self._process_two_columns()
|
self._process_two_columns()
|
||||||
self._process_quotes()
|
# self._process_quotes()
|
||||||
|
|
||||||
self.log('Footnotes processing.')
|
self.log('Footnotes processing.')
|
||||||
self._process_footnotes()
|
self._process_footnotes()
|
||||||
@@ -766,8 +789,8 @@ class Book:
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
file_path = pathlib.Path(os.path.join(folder_path, 'html/11/11.html'))
|
file_path = pathlib.Path(os.path.join(folder_path, 'html/0/quote.html'))
|
||||||
out_path = pathlib.Path(os.path.join(folder_path, 'json/11.json'))
|
out_path = pathlib.Path(os.path.join(folder_path, 'json/quote.json'))
|
||||||
|
|
||||||
logging_format = '%(asctime)s - %(levelname)s - %(message)s'
|
logging_format = '%(asctime)s - %(levelname)s - %(message)s'
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user