forked from LiveCarta/BookConverter
fix adding of heading 4
This commit is contained in:
25
src/book.py
25
src/book.py
@@ -510,6 +510,15 @@ class Book:
|
|||||||
title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title)
|
title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title)
|
||||||
return title.strip()
|
return title.strip()
|
||||||
|
|
||||||
|
def _preprocessing_headings(self):
|
||||||
|
"""
|
||||||
|
Function to convert all lower level headings to p tags
|
||||||
|
"""
|
||||||
|
header_tags = self.body_tag.find_all(re.compile("^h[4-6]$"))
|
||||||
|
for tag in header_tags:
|
||||||
|
tag.name = 'p'
|
||||||
|
print(tag)
|
||||||
|
|
||||||
def _process_headings(self):
|
def _process_headings(self):
|
||||||
"""
|
"""
|
||||||
Function to process tags <h>.
|
Function to process tags <h>.
|
||||||
@@ -526,11 +535,18 @@ class Book:
|
|||||||
if title == "":
|
if title == "":
|
||||||
tag.unwrap()
|
tag.unwrap()
|
||||||
else:
|
else:
|
||||||
if tag.name in ["h4", "h5", "h6"]: # All the lower level headings will be transformed to h3 headings
|
assert tag.name not in ["h4", "h5", "h6"], 'Preprocessing went wrong, there is still h4-h6 headings.'
|
||||||
tag.name = "h3"
|
# if tag.name in ["h4", "h5", "h6"]:
|
||||||
|
# tag.name = "h3" # All the lower level headings will be transformed to h3 headings
|
||||||
|
|
||||||
|
|
||||||
new_tag = BeautifulSoup(features='lxml').new_tag(name=tag.name)
|
new_tag = BeautifulSoup(features='lxml').new_tag(name=tag.name)
|
||||||
new_tag.string = title
|
new_tag.string = title
|
||||||
|
if new_tag.name == "p":
|
||||||
|
new_tag.attrs = tag.attrs
|
||||||
|
print(tag)
|
||||||
|
print(new_tag)
|
||||||
|
|
||||||
tag.replace_with(new_tag)
|
tag.replace_with(new_tag)
|
||||||
|
|
||||||
def write_html_from_list(self, file_name='url_test.html'):
|
def write_html_from_list(self, file_name='url_test.html'):
|
||||||
@@ -553,6 +569,7 @@ class Book:
|
|||||||
|
|
||||||
# process main elements of the .html doc
|
# process main elements of the .html doc
|
||||||
self.log(f'Processing main elements of html.')
|
self.log(f'Processing main elements of html.')
|
||||||
|
self._preprocessing_headings()
|
||||||
self._process_paragraph()
|
self._process_paragraph()
|
||||||
self._process_two_columns()
|
self._process_two_columns()
|
||||||
self._process_quotes()
|
self._process_quotes()
|
||||||
@@ -694,6 +711,10 @@ class Book:
|
|||||||
with codecs.open(self.output_path, 'w', encoding='utf-8') as f:
|
with codecs.open(self.output_path, 'w', encoding='utf-8') as f:
|
||||||
json.dump(self.content_dict, f, ensure_ascii=False)
|
json.dump(self.content_dict, f, ensure_ascii=False)
|
||||||
self.log('Data has been saved to .json file.')
|
self.log('Data has been saved to .json file.')
|
||||||
|
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
pprint(self.content_dict)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.log('Error has occurred while writing json file.', logging.ERROR)
|
self.log('Error has occurred while writing json file.', logging.ERROR)
|
||||||
# self.log_error_to_main_log()
|
# self.log_error_to_main_log()
|
||||||
|
|||||||
@@ -92,36 +92,36 @@ def local_run(filename):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
# folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
# config_path = Path(os.path.join(folder_path, "config/config.json"))
|
# # config_path = Path(os.path.join(folder_path, "config/config.json"))
|
||||||
config_path = Path(os.path.join(folder_path, "config/queue_config.json"))
|
# config_path = Path(os.path.join(folder_path, "config/queue_config.json"))
|
||||||
with open(config_path, "r") as f:
|
# with open(config_path, "r") as f:
|
||||||
conf_param = json.load(f)
|
# conf_param = json.load(f)
|
||||||
|
#
|
||||||
|
# host = conf_param.get('host') or pika.ConnectionParameters().DEFAULT_HOST
|
||||||
|
# port = conf_param.get('port') or pika.ConnectionParameters().DEFAULT_PORT
|
||||||
|
#
|
||||||
|
# credentials = pika.PlainCredentials(username=conf_param['username'], password=conf_param['password'])
|
||||||
|
# parameters = pika.ConnectionParameters(host=host, port=port, credentials=credentials)
|
||||||
|
# connection = pika.BlockingConnection(parameters)
|
||||||
|
# channel = connection.channel()
|
||||||
|
#
|
||||||
|
# logger = configure_file_logger('consumer', logging_format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
#
|
||||||
|
# try:
|
||||||
|
# channel.queue_declare(queue=conf_param['queue'], durable=True, arguments={'x-max-priority': 10})
|
||||||
|
# except ValueError as exc:
|
||||||
|
# logger.log(logging.ERROR, f'Queue {conf_param["queue"]} is not declared.')
|
||||||
|
# raise exc
|
||||||
|
#
|
||||||
|
# acs = Access()
|
||||||
|
# channel.basic_consume(queue=conf_param['queue'],
|
||||||
|
# auto_ack=True,
|
||||||
|
# on_message_callback=partial(callback, access=acs, logger=logger))
|
||||||
|
# logger.info('Connection has been established.')
|
||||||
|
# print('Waiting for messages...')
|
||||||
|
# logger.info('Waiting for messages...')
|
||||||
|
#
|
||||||
|
# channel.start_consuming()
|
||||||
|
|
||||||
host = conf_param.get('host') or pika.ConnectionParameters().DEFAULT_HOST
|
local_run('music')
|
||||||
port = conf_param.get('port') or pika.ConnectionParameters().DEFAULT_PORT
|
|
||||||
|
|
||||||
credentials = pika.PlainCredentials(username=conf_param['username'], password=conf_param['password'])
|
|
||||||
parameters = pika.ConnectionParameters(host=host, port=port, credentials=credentials)
|
|
||||||
connection = pika.BlockingConnection(parameters)
|
|
||||||
channel = connection.channel()
|
|
||||||
|
|
||||||
logger = configure_file_logger('consumer', logging_format='%(asctime)s - %(levelname)s - %(message)s')
|
|
||||||
|
|
||||||
try:
|
|
||||||
channel.queue_declare(queue=conf_param['queue'], durable=True, arguments={'x-max-priority': 10})
|
|
||||||
except ValueError as exc:
|
|
||||||
logger.log(logging.ERROR, f'Queue {conf_param["queue"]} is not declared.')
|
|
||||||
raise exc
|
|
||||||
|
|
||||||
acs = Access()
|
|
||||||
channel.basic_consume(queue=conf_param['queue'],
|
|
||||||
auto_ack=True,
|
|
||||||
on_message_callback=partial(callback, access=acs, logger=logger))
|
|
||||||
logger.info('Connection has been established.')
|
|
||||||
print('Waiting for messages...')
|
|
||||||
logger.info('Waiting for messages...')
|
|
||||||
|
|
||||||
channel.start_consuming()
|
|
||||||
|
|
||||||
# local_run('0')
|
|
||||||
|
|||||||
Reference in New Issue
Block a user