forked from LiveCarta/BookConverter
epub converter: add <pre>, <code> processing
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import re
|
import re
|
||||||
|
from html import escape
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
from bs4 import BeautifulSoup, NavigableString, Tag
|
from bs4 import BeautifulSoup, NavigableString, Tag
|
||||||
@@ -381,6 +382,41 @@ def get_tags_between_chapter_marks(first_id, href, html_soup):
|
|||||||
return tags
|
return tags
|
||||||
|
|
||||||
|
|
||||||
|
def wrap_text_with_table(main_tag, text, old_tag):
|
||||||
|
table = main_tag.new_tag("table")
|
||||||
|
table.attrs['border'] = '0'
|
||||||
|
table.attrs['style'] = 'width:100%;'
|
||||||
|
tbody = main_tag.new_tag("tbody")
|
||||||
|
tr = main_tag.new_tag("tr")
|
||||||
|
td = main_tag.new_tag("td")
|
||||||
|
td.attrs['style'] = 'font-family: courier new,courier,monospace;'
|
||||||
|
td.attrs['bgcolor'] = '#f5f5f5'
|
||||||
|
td.insert(0, str(text))
|
||||||
|
old_tag.replace_with(td)
|
||||||
|
td.wrap(tr)
|
||||||
|
tr.wrap(tbody)
|
||||||
|
tbody.wrap(table)
|
||||||
|
return table
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_pre_tags(chapter_tag):
|
||||||
|
for pre in chapter_tag.find_all("pre"):
|
||||||
|
if not pre.children:
|
||||||
|
assert 1, 'Pre tag has other tags.'
|
||||||
|
else:
|
||||||
|
wrap_text_with_table(chapter_tag, escape(pre.text), pre)
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_code_tags(chapter_tag):
|
||||||
|
for code in chapter_tag.find_all("code"):
|
||||||
|
if not code.children:
|
||||||
|
assert 1, 'Code tag has other tags.'
|
||||||
|
else:
|
||||||
|
code.string = escape(code.text)
|
||||||
|
code.name = 'span'
|
||||||
|
code.attrs['style'] = 'color:#c7254e; font-family: courier new,courier,monospace;'
|
||||||
|
|
||||||
|
|
||||||
def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_from_chapter) -> Tuple[str, str]:
|
def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_from_chapter) -> Tuple[str, str]:
|
||||||
title_str = BeautifulSoup(title, features='lxml').string
|
title_str = BeautifulSoup(title, features='lxml').string
|
||||||
title_str = re.sub(r'([\n\t\xa0])', ' ', title_str)
|
title_str = re.sub(r'([\n\t\xa0])', ' ', title_str)
|
||||||
@@ -399,6 +435,8 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
|
|||||||
clean_headings_content(chapter_tag, title_str)
|
clean_headings_content(chapter_tag, title_str)
|
||||||
_process_lists(chapter_tag)
|
_process_lists(chapter_tag)
|
||||||
preprocess_table(chapter_tag)
|
preprocess_table(chapter_tag)
|
||||||
|
preprocess_pre_tags(chapter_tag)
|
||||||
|
preprocess_code_tags(chapter_tag)
|
||||||
# 2. class removal
|
# 2. class removal
|
||||||
for tag in chapter_tag.find_all(recursive=True):
|
for tag in chapter_tag.find_all(recursive=True):
|
||||||
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor']):
|
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor']):
|
||||||
|
|||||||
Reference in New Issue
Block a user