forked from LiveCarta/BookConverter
Revert "epub converter: add new type of blocks"
This reverts commit eb882a700356149c6133c5291ed003dcaa0a183d.
This commit is contained in:
@@ -44,7 +44,7 @@ def convert_book(book_id, access, logger, libra_locker):
|
|||||||
print('Book has been proceeded.')
|
print('Book has been proceeded.')
|
||||||
|
|
||||||
|
|
||||||
def convert_epub_book(book_id, access, logger):
|
def convert_epub_book(book_id, access, logger=None):
|
||||||
logger.info(f'Start processing epub book-{book_id}.')
|
logger.info(f'Start processing epub book-{book_id}.')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -414,7 +414,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
|
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
|
||||||
|
|
||||||
json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/9781119605959_f3.epub',
|
json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/9781119682387_pre_code2.epub',
|
||||||
logger=logger_object)
|
logger=logger_object)
|
||||||
tmp = json_converter.convert_to_dict()
|
tmp = json_converter.convert_to_dict()
|
||||||
|
|
||||||
|
|||||||
@@ -250,24 +250,13 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
|
|||||||
def unwrap_structural_tags(body_tag):
|
def unwrap_structural_tags(body_tag):
|
||||||
|
|
||||||
def _preserve_class_in_aside_tag(tag_):
|
def _preserve_class_in_aside_tag(tag_):
|
||||||
# to save css style inherited from class, copy class to aside tag (which is parent to tag_)
|
# to save css style inherited from class, copy class to aside tag
|
||||||
# this is for Wiley books with boxes
|
# this is for Wiley books with boxes
|
||||||
tag_class = tag_.attrs['class'] if not isinstance(tag_.attrs['class'], list) else tag_.attrs['class'][0]
|
tag_class = tag_.attrs['class'] if not isinstance(tag_.attrs['class'], list) else tag_.attrs['class'][0]
|
||||||
if tag_.parent.name == 'aside':
|
if tag_.parent.name == 'aside':
|
||||||
if not tag_.parent.attrs.get('class'):
|
if not tag_.parent.attrs.get('class'):
|
||||||
tag_.parent.attrs['class'] = tag_class
|
tag_.parent.attrs['class'] = tag_class
|
||||||
|
|
||||||
def _preserve_class_in_section_tag(tag_):
|
|
||||||
# to save css style inherited from class, copy class to child <p>
|
|
||||||
# this is for Wiley books with boxes
|
|
||||||
tag_class = tag_.attrs['class'] if not isinstance(tag_.attrs['class'], list) else tag_.attrs['class'][0]
|
|
||||||
child_p_tag = tag_.find_all("p")
|
|
||||||
if len(child_p_tag) != 1:
|
|
||||||
return
|
|
||||||
child_p_tag = child_p_tag[0]
|
|
||||||
if not child_p_tag.attrs.get('class'):
|
|
||||||
child_p_tag.attrs['class'] = tag_class
|
|
||||||
|
|
||||||
def _add_table_to_abc_books(tag_, border, bg_color):
|
def _add_table_to_abc_books(tag_, border, bg_color):
|
||||||
wrap_block_tag_with_table(body_tag, old_tag=tag_, width='100', border=border, bg_color=bg_color)
|
wrap_block_tag_with_table(body_tag, old_tag=tag_, width='100', border=border, bg_color=bg_color)
|
||||||
|
|
||||||
@@ -303,7 +292,6 @@ def unwrap_structural_tags(body_tag):
|
|||||||
for s in body_tag.find_all("section"):
|
for s in body_tag.find_all("section"):
|
||||||
if s.attrs.get('class'):
|
if s.attrs.get('class'):
|
||||||
_preserve_class_in_aside_tag(s)
|
_preserve_class_in_aside_tag(s)
|
||||||
_preserve_class_in_section_tag(s)
|
|
||||||
_add_span_to_save_ids_for_links(s)
|
_add_span_to_save_ids_for_links(s)
|
||||||
s.unwrap()
|
s.unwrap()
|
||||||
|
|
||||||
@@ -423,36 +411,26 @@ def wrap_block_tag_with_table(main_tag, old_tag, width='95', border='1px', bg_co
|
|||||||
return table
|
return table
|
||||||
|
|
||||||
|
|
||||||
def _clean_wiley_block(block):
|
|
||||||
hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
|
|
||||||
for hr in hrs:
|
|
||||||
hr.extract()
|
|
||||||
h = block.find(re.compile("h[1-9]"))
|
|
||||||
if h:
|
|
||||||
h.name = "p"
|
|
||||||
h.insert_before(BeautifulSoup(features='lxml').new_tag("br"))
|
|
||||||
|
|
||||||
|
|
||||||
def preprocess_block_tags(chapter_tag):
|
def preprocess_block_tags(chapter_tag):
|
||||||
for block in chapter_tag.find_all("blockquote"):
|
for block in chapter_tag.find_all("blockquote"):
|
||||||
if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
|
if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
|
||||||
_clean_wiley_block(block)
|
hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
|
||||||
|
for hr in hrs:
|
||||||
|
hr.extract()
|
||||||
|
|
||||||
|
h = block.find(re.compile("h[1-9]"))
|
||||||
|
if h:
|
||||||
|
h.name = "p"
|
||||||
|
h.insert_before(BeautifulSoup(features='lxml').new_tag("br"))
|
||||||
|
|
||||||
color = '#DDDDDD' if block.attrs.get('class') == 'feature1' else None
|
color = '#DDDDDD' if block.attrs.get('class') == 'feature1' else None
|
||||||
color = '#EEEEEE' if block.attrs.get('class') == 'feature2' else color
|
color = '#EEEEEE' if block.attrs.get('class') == 'feature2' else color
|
||||||
wrap_block_tag_with_table(chapter_tag, block, bg_color=color)
|
wrap_block_tag_with_table(chapter_tag, block, color)
|
||||||
block.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
|
block.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
|
||||||
block.unwrap()
|
block.unwrap()
|
||||||
|
|
||||||
for future_block in chapter_tag.find_all("p", attrs={"class": re.compile("feature[1234]")}):
|
|
||||||
_clean_wiley_block(future_block)
|
|
||||||
color = '#DDDDDD' if future_block.attrs.get('class') == 'feature1' else None
|
|
||||||
color = '#EEEEEE' if future_block.attrs.get('class') == 'feature2' else color
|
|
||||||
wrap_block_tag_with_table(chapter_tag, future_block, bg_color=color)
|
|
||||||
|
|
||||||
|
|
||||||
def _prepare_formatted(text):
|
def _prepare_formatted(text):
|
||||||
# replace <,> to save them as is in html code
|
|
||||||
text = text.replace("<", "\x3C")
|
text = text.replace("<", "\x3C")
|
||||||
text = text.replace(">", "\x3E")
|
text = text.replace(">", "\x3E")
|
||||||
text = text.replace('\t', "\xa0 \xa0 ") #
|
text = text.replace('\t', "\xa0 \xa0 ") #
|
||||||
@@ -465,7 +443,7 @@ def preprocess_pre_tags(chapter_tag):
|
|||||||
new_tag = BeautifulSoup(features='lxml').new_tag("span")
|
new_tag = BeautifulSoup(features='lxml').new_tag("span")
|
||||||
new_tag.attrs = pre.attrs.copy()
|
new_tag.attrs = pre.attrs.copy()
|
||||||
spans = pre.find_all("span")
|
spans = pre.find_all("span")
|
||||||
to_add_br = len(spans) > 1 # if in <pre> there are multiple <span>, we need to add <br> after each content
|
to_add_br = len(spans) > 1
|
||||||
|
|
||||||
for child in pre.children:
|
for child in pre.children:
|
||||||
if isinstance(child, NavigableString):
|
if isinstance(child, NavigableString):
|
||||||
@@ -492,7 +470,6 @@ def preprocess_pre_tags(chapter_tag):
|
|||||||
|
|
||||||
|
|
||||||
def preprocess_code_tags(chapter_tag):
|
def preprocess_code_tags(chapter_tag):
|
||||||
# function that emulates style of <code>, <kdb>, <var>
|
|
||||||
for code in chapter_tag.find_all(re.compile("code|kdb|var")):
|
for code in chapter_tag.find_all(re.compile("code|kdb|var")):
|
||||||
code.name = 'span'
|
code.name = 'span'
|
||||||
if code.parent.name == "pre":
|
if code.parent.name == "pre":
|
||||||
|
|||||||
Reference in New Issue
Block a user