diff --git a/src/book.py b/src/book.py index 859ac64..2a7d488 100644 --- a/src/book.py +++ b/src/book.py @@ -510,6 +510,15 @@ class Book: title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title) return title.strip() + def _preprocessing_headings(self): + """ + Function to convert all lower level headings to p tags + """ + header_tags = self.body_tag.find_all(re.compile("^h[4-6]$")) + for tag in header_tags: + tag.name = 'p' + print(tag) + def _process_headings(self): """ Function to process tags . @@ -526,11 +535,18 @@ class Book: if title == "": tag.unwrap() else: - if tag.name in ["h4", "h5", "h6"]: # All the lower level headings will be transformed to h3 headings - tag.name = "h3" + assert tag.name not in ["h4", "h5", "h6"], 'Preprocessing went wrong, there is still h4-h6 headings.' + # if tag.name in ["h4", "h5", "h6"]: + # tag.name = "h3" # All the lower level headings will be transformed to h3 headings + new_tag = BeautifulSoup(features='lxml').new_tag(name=tag.name) new_tag.string = title + if new_tag.name == "p": + new_tag.attrs = tag.attrs + print(tag) + print(new_tag) + tag.replace_with(new_tag) def write_html_from_list(self, file_name='url_test.html'): @@ -553,6 +569,7 @@ class Book: # process main elements of the .html doc self.log(f'Processing main elements of html.') + self._preprocessing_headings() self._process_paragraph() self._process_two_columns() self._process_quotes() @@ -694,6 +711,10 @@ class Book: with codecs.open(self.output_path, 'w', encoding='utf-8') as f: json.dump(self.content_dict, f, ensure_ascii=False) self.log('Data has been saved to .json file.') + + from pprint import pprint + + pprint(self.content_dict) except Exception as exc: self.log('Error has occurred while writing json file.', logging.ERROR) # self.log_error_to_main_log() diff --git a/src/consumer.py b/src/consumer.py index 7ba9bb5..0d5a449 100644 --- a/src/consumer.py +++ b/src/consumer.py @@ -92,36 +92,36 @@ def local_run(filename): if __name__ == '__main__': - folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - # config_path = Path(os.path.join(folder_path, "config/config.json")) - config_path = Path(os.path.join(folder_path, "config/queue_config.json")) - with open(config_path, "r") as f: - conf_param = json.load(f) + # folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + # # config_path = Path(os.path.join(folder_path, "config/config.json")) + # config_path = Path(os.path.join(folder_path, "config/queue_config.json")) + # with open(config_path, "r") as f: + # conf_param = json.load(f) + # + # host = conf_param.get('host') or pika.ConnectionParameters().DEFAULT_HOST + # port = conf_param.get('port') or pika.ConnectionParameters().DEFAULT_PORT + # + # credentials = pika.PlainCredentials(username=conf_param['username'], password=conf_param['password']) + # parameters = pika.ConnectionParameters(host=host, port=port, credentials=credentials) + # connection = pika.BlockingConnection(parameters) + # channel = connection.channel() + # + # logger = configure_file_logger('consumer', logging_format='%(asctime)s - %(levelname)s - %(message)s') + # + # try: + # channel.queue_declare(queue=conf_param['queue'], durable=True, arguments={'x-max-priority': 10}) + # except ValueError as exc: + # logger.log(logging.ERROR, f'Queue {conf_param["queue"]} is not declared.') + # raise exc + # + # acs = Access() + # channel.basic_consume(queue=conf_param['queue'], + # auto_ack=True, + # on_message_callback=partial(callback, access=acs, logger=logger)) + # logger.info('Connection has been established.') + # print('Waiting for messages...') + # logger.info('Waiting for messages...') + # + # channel.start_consuming() - host = conf_param.get('host') or pika.ConnectionParameters().DEFAULT_HOST - port = conf_param.get('port') or pika.ConnectionParameters().DEFAULT_PORT - - credentials = pika.PlainCredentials(username=conf_param['username'], password=conf_param['password']) - parameters = pika.ConnectionParameters(host=host, port=port, credentials=credentials) - connection = pika.BlockingConnection(parameters) - channel = connection.channel() - - logger = configure_file_logger('consumer', logging_format='%(asctime)s - %(levelname)s - %(message)s') - - try: - channel.queue_declare(queue=conf_param['queue'], durable=True, arguments={'x-max-priority': 10}) - except ValueError as exc: - logger.log(logging.ERROR, f'Queue {conf_param["queue"]} is not declared.') - raise exc - - acs = Access() - channel.basic_consume(queue=conf_param['queue'], - auto_ack=True, - on_message_callback=partial(callback, access=acs, logger=logger)) - logger.info('Connection has been established.') - print('Waiting for messages...') - logger.info('Waiting for messages...') - - channel.start_consuming() - - # local_run('0') + local_run('music')