Add 1-many css + Fix bug 4635

This commit is contained in:
Kiryl
2021-09-28 13:37:37 +03:00
parent 955a64380c
commit ebb5f0802e
8 changed files with 101 additions and 82 deletions

View File

@@ -13,7 +13,7 @@ import os
import pathlib import pathlib
from abc import abstractmethod, ABCMeta from abc import abstractmethod, ABCMeta
from livecarta_config import LawCartaConfig from livecarta_config import LiveCartaConfig
from util.helpers import BookLogger, BookStatusWrapper from util.helpers import BookLogger, BookStatusWrapper
@@ -32,7 +32,7 @@ class BookSolver:
main_logger=main_logger) main_logger=main_logger)
self.status_wrapper = BookStatusWrapper(access, self.logger_object, book_id) self.status_wrapper = BookStatusWrapper(access, self.logger_object, book_id)
assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \ assert LiveCartaConfig.SUPPORTED_LEVELS == len(LiveCartaConfig.SUPPORTED_HEADERS), \
"Length of headers doesn't match allowed levels." "Length of headers doesn't match allowed levels."
def save_book_file(self, content): def save_book_file(self, content):

View File

@@ -9,7 +9,7 @@ from premailer import transform
from itertools import takewhile from itertools import takewhile
from logging import CRITICAL from logging import CRITICAL
from livecarta_config import LawCartaConfig from livecarta_config import LiveCartaConfig
from util.color_reader import str2hex from util.color_reader import str2hex
cssutils.log.setLevel(CRITICAL) cssutils.log.setLevel(CRITICAL)
@@ -30,7 +30,7 @@ list_types = ['circle', 'disc', 'armenian', 'decimal',
def convert_font_size(value): def convert_font_size(value):
if 'pt' in value: if 'pt' in value:
if int(value.replace('pt', '')) == LawCartaConfig.LAWCARTA_DEFAULT_FONT_SIZE: if int(value.replace('pt', '')) == LiveCartaConfig.LAWCARTA_DEFAULT_FONT_SIZE:
return '' return ''
else: else:
return value.replace('pt', 'px') return value.replace('pt', 'px')
@@ -57,22 +57,27 @@ def convert_font_size(value):
return '' return ''
def convert_indents(value): def convert_indents(value):
if '-' not in value[0]:
# 30px = 3.2% = 1.25em = 23pt # 30px = 3.2% = 1.25em = 23pt
positive_text_indent_regexp = re.compile(r'(\w+%)|(\w*.*\w+em)') positive_text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(\w+px)|(-*\w+pt)')
has_style_attrs = re.search(positive_text_indent_regexp, value) has_style_attrs = re.search(positive_text_indent_regexp, value)
if has_style_attrs: if has_style_attrs:
if has_style_attrs.group(1): if has_style_attrs.group(1):
value = value.replace(has_style_attrs.group(1), value = value.replace(has_style_attrs.group(1),
str(int("".join(filter(str.isdigit, str(has_style_attrs.group(1)))))) + str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(1))))) * 6)) +
'%') 'px')
# elif has_style_attrs.group(2):
# value = value.replace(has_style_attrs.group(2), elif has_style_attrs.group(2):
# str(int("".join(filter(str.isdigit, str(has_style_attrs.group(2))))) * 5) + value = value.replace(has_style_attrs.group(2),
# '%') str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(3))))) * 30)) +
return value 'px')
else:
return '' elif has_style_attrs.group(4):
value = value.replace(has_style_attrs.group(4), '30px')
elif has_style_attrs.group(5):
value = value.replace(has_style_attrs.group(5),
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(5))))))) + 'px')
return value
""" """
LIVECARTA_STYLE_ATTRS = { css property: value } LIVECARTA_STYLE_ATTRS = { css property: value }
@@ -83,11 +88,11 @@ If property has not empty list, it means that only certain property-value combin
LIVECARTA_STYLE_ATTRS = { LIVECARTA_STYLE_ATTRS = {
'text-indent': [], 'text-indent': [],
'font-variant': ['small-caps'], 'font-variant': ['small-caps'],
'text-align': [x for x in LawCartaConfig.ALIGN_STYLES if x != LawCartaConfig.DEFAULT_ALIGN_STYLE], 'text-align': [x for x in LiveCartaConfig.ALIGN_STYLES if x != LiveCartaConfig.DEFAULT_ALIGN_STYLE],
'align': [], # ??? 'align': [], # ???
'font': [], # ??? 'font': [], # ???
'font-family': [x for x in LawCartaConfig.font_correspondence_table.keys() 'font-family': [x for x in LiveCartaConfig.font_correspondence_table.keys()
if x != LawCartaConfig.DEFAULT_FONT_NAME], if x != LiveCartaConfig.DEFAULT_FONT_NAME],
'font-size': [], 'font-size': [],
'font-weight': ['bold', '600', '700', '800', '900'], # <strong> 'font-weight': ['bold', '600', '700', '800', '900'], # <strong>
'font-style': ['italic'], # <i> 'font-style': ['italic'], # <i>
@@ -129,11 +134,11 @@ def get_text_color(x):
LIVECARTA_STYLE_ATTRS_MAPPING = { LIVECARTA_STYLE_ATTRS_MAPPING = {
#'text-indent': convert_indents, 'text-indent': convert_indents,
'font-variant': lambda x: x, 'font-variant': lambda x: x,
'text-align': lambda x: x, 'text-align': lambda x: x,
'font': lambda x: '', 'font': lambda x: '',
'font-family': lambda x: LawCartaConfig.font_correspondence_table.get(x) or LawCartaConfig.font_correspondence_table.get(x.capitalize()), 'font-family': lambda x: LiveCartaConfig.font_correspondence_table.get(x) or LiveCartaConfig.font_correspondence_table.get(x.capitalize()),
'font-size': convert_font_size, 'font-size': convert_font_size,
'color': get_text_color, 'color': get_text_color,
'background-color': get_bg_color, 'background-color': get_bg_color,
@@ -145,7 +150,7 @@ LIVECARTA_STYLE_ATTRS_MAPPING = {
'border-bottom-width': lambda x: x if x != '0' else '', 'border-bottom-width': lambda x: x if x != '0' else '',
'list-style-type': lambda x: x if x in list_types else 'disc', 'list-style-type': lambda x: x if x in list_types else 'disc',
'list-style-image': lambda x: 'disc', 'list-style-image': lambda x: 'disc',
'margin-left': lambda x: x 'margin-left': convert_indents
} }
""" """
@@ -245,31 +250,46 @@ class TagStyleConverter:
@staticmethod @staticmethod
def convert_indentions_to_px(style): def convert_indentions_to_px(style):
margin_left_regexp = re.compile( margin_left_regexp = re.compile(
r'(margin-left:( *-*\w+%*);*)') r'(margin-left:( *-*\w+%);*)|(margin-left:( *-*\w+);*)')
text_indent_regexp = re.compile( text_indent_regexp = re.compile(
r'(text-indent:( *-*\w+%);*)|(text-indent:( *-*\w+);*)') r'(text-indent:( *-*\w+%);*)|(text-indent:( *-*\w+);*)')
has_margin_left = re.search(margin_left_regexp, style) has_margin_left = re.search(margin_left_regexp, style)
has_text_indent = re.search(text_indent_regexp, style) has_text_indent = re.search(text_indent_regexp, style)
# consider that 5% = 30px # consider that 5% = 30px
if has_margin_left and has_text_indent: if has_margin_left:
num_ml = abs(int("".join( hml_group = 0
filter(str.isdigit, str(has_margin_left.group(2))))) * 6) num_ml = 0
if has_text_indent.group(1): if has_margin_left.group(1):
num_ti = abs(int("".join( hml_group = has_margin_left.group(1)
filter(str.isdigit, str(has_text_indent.group(2))))) * 6) num_ml = abs(int("".join(
style = style.replace(has_text_indent.group(1), 'text-indent: ' + filter(str.isdigit, str(has_margin_left.group(2))))) * 6)
str(abs(num_ml - num_ti)) + 'px; ')
style = style.replace(has_margin_left.group(1), '')
return style
elif has_text_indent.group(3): elif has_margin_left.group(3):
num_ti = abs(int("".join( hml_group = has_margin_left.group(3)
filter(str.isdigit, str(has_text_indent.group(4))))) * 6) num_ml = abs(int("".join(
style = style.replace(has_text_indent.group(3), 'text-indent: ' + filter(str.isdigit, str(has_margin_left.group(4))))))
str(abs(num_ml - num_ti)) + 'px; ')
style = style.replace(has_margin_left.group(1), '') if has_text_indent:
return style if has_text_indent.group(1):
num_ti = abs(int("".join(
filter(str.isdigit, str(has_text_indent.group(2))))) * 6)
style = style.replace(has_text_indent.group(1), 'text-indent: ' +
str(abs(num_ml - num_ti)) + 'px; ')
style = style.replace(hml_group, '')
return style
elif has_text_indent.group(3):
num_ti = abs(int("".join(
filter(str.isdigit, str(has_text_indent.group(4))))))
style = style.replace(has_text_indent.group(3), 'text-indent: ' +
str(abs(num_ml - num_ti)) + 'px; ')
style = style.replace(hml_group, '')
return style
style = style.replace(hml_group, 'text-indent: ' +
str(abs(num_ml)) + 'px; ')
return style
elif has_text_indent: elif has_text_indent:
if has_text_indent.group(1): if has_text_indent.group(1):
@@ -282,12 +302,6 @@ class TagStyleConverter:
str("".join( str("".join(
filter(str.isdigit, str(has_text_indent.group(4))))) + 'px; ') filter(str.isdigit, str(has_text_indent.group(4))))) + 'px; ')
return style return style
elif has_margin_left:
num_ml = abs(int("".join(
filter(str.isdigit, str(has_margin_left.group(2))))) * 6)
style = style.replace(has_margin_left.group(1), 'text-indent: ' +
str(abs(num_ml)) + 'px; ')
return style
return style return style
def preprocess_style(self): def preprocess_style(self):

View File

@@ -2,7 +2,7 @@ import re
from typing import Union from typing import Union
from ebooklib.epub import Section, Link from ebooklib.epub import Section, Link
from livecarta_config import LawCartaConfig from livecarta_config import LiveCartaConfig
""" """
These are data structures which form mapping from NCX to python data structures. These are data structures which form mapping from NCX to python data structures.
@@ -64,14 +64,14 @@ class ChapterItem:
for i in self.sub_items: for i in self.sub_items:
sub_dicts.append(i.to_dict(lvl + 1)) sub_dicts.append(i.to_dict(lvl + 1))
if lvl > LawCartaConfig.SUPPORTED_LEVELS: if lvl > LiveCartaConfig.SUPPORTED_LEVELS:
return { return {
"title": self.title, "title": self.title,
"contents": [self.content] + [x['contents'] for x in sub_dicts], "contents": [self.content] + [x['contents'] for x in sub_dicts],
"sub_items": [] "sub_items": []
} }
if (lvl == LawCartaConfig.SUPPORTED_LEVELS) and sub_dicts: if (lvl == LiveCartaConfig.SUPPORTED_LEVELS) and sub_dicts:
return { return {
"title": self.title, "title": self.title,
"contents": [self.content] + flatten([x['contents'] for x in sub_dicts]), "contents": [self.content] + flatten([x['contents'] for x in sub_dicts]),

View File

@@ -18,7 +18,7 @@ from html_epub_preprocessor import unwrap_structural_tags, get_tags_between_chap
update_src_links_in_images, preprocess_footnotes update_src_links_in_images, preprocess_footnotes
from css_reader import clean_css, add_inline_style_to_html_soup from css_reader import clean_css, add_inline_style_to_html_soup
from livecarta_config import LawCartaConfig from livecarta_config import LiveCartaConfig
from util.helpers import BookLogger from util.helpers import BookLogger
@@ -107,6 +107,9 @@ class EpubConverter:
return nodes return nodes
def _read_css(self, css_href, html_path): def _read_css(self, css_href, html_path):
'''
'''
path_to_css_from_html = css_href path_to_css_from_html = css_href
html_folder = dirname(html_path) html_folder = dirname(html_path)
path_to_css_from_root = normpath(join(html_folder, path_to_css_from_html)).replace('\\', '/') path_to_css_from_root = normpath(join(html_folder, path_to_css_from_html)).replace('\\', '/')
@@ -117,8 +120,8 @@ class EpubConverter:
def build_css_content(self): def build_css_content(self):
css_href2content, html_href2css_href = {}, {} css_href2content, html_href2css_href = {}, {}
# html_href2css_href 1-to-1, todo: 1-to-many html_href2css_href = defaultdict(list)
# html_href2css_href 1-to-many
for item in self.ebooklib_book.get_items_of_type(ebooklib.ITEM_DOCUMENT): for item in self.ebooklib_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
html_text = item.content html_text = item.content
html_path = item.file_name html_path = item.file_name
@@ -127,13 +130,13 @@ class EpubConverter:
if tag.attrs.get('rel') and ('alternate' in tag.attrs['rel']): if tag.attrs.get('rel') and ('alternate' in tag.attrs['rel']):
continue continue
css_href = tag.attrs.get('href') css_href = tag.attrs.get('href')
html_href2css_href[html_path] = css_href html_href2css_href[html_path].append(css_href)
if css_href not in css_href2content: if css_href not in css_href2content:
css_href2content[css_href] = clean_css(self._read_css(css_href, html_path)) css_href2content[css_href] = clean_css(self._read_css(css_href, html_path))
for i, tag in enumerate(soup.find_all('style')): for i, tag in enumerate(soup.find_all('style')):
css_content = tag.string css_content = tag.string
html_href2css_href[html_path] = f'href{i}' html_href2css_href[html_path].append(f'href{i}')
css_href2content[f'href{i}'] = clean_css(css_content) css_href2content[f'href{i}'] = clean_css(css_content)
return css_href2content, html_href2css_href return css_href2content, html_href2css_href
@@ -141,7 +144,9 @@ class EpubConverter:
def add_css_styles2soup(self): def add_css_styles2soup(self):
for href in self.href2soup_html: for href in self.href2soup_html:
if self.html_href2css_href.get(href): if self.html_href2css_href.get(href):
css: str = self.css_href2content[self.html_href2css_href[href]] css =''
for key in self.html_href2css_href[href]:
css += self.css_href2content[key]
content: BeautifulSoup = self.href2soup_html[href] content: BeautifulSoup = self.href2soup_html[href]
content = add_inline_style_to_html_soup(content, css) content = add_inline_style_to_html_soup(content, css)
self.href2soup_html[href] = content self.href2soup_html[href] = content
@@ -399,7 +404,7 @@ class EpubConverter:
access=self.access, access=self.access,
path2aws_path=self.old_image_path2_aws_path) path2aws_path=self.old_image_path2_aws_path)
is_chapter = lvl <= LawCartaConfig.SUPPORTED_LEVELS is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
title_preprocessed, content_preprocessed = prepare_title_and_content(title, content, title_preprocessed, content_preprocessed = prepare_title_and_content(title, content,
remove_title_from_chapter=is_chapter) remove_title_from_chapter=is_chapter)
@@ -442,7 +447,7 @@ if __name__ == "__main__":
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0) logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
json_converter = EpubConverter('../epub/9781634256063.epub', json_converter = EpubConverter('../epub/index_with_html.epub',
logger=logger_object) logger=logger_object)
tmp = json_converter.convert_to_dict() tmp = json_converter.convert_to_dict()

View File

@@ -7,7 +7,7 @@ from typing import List
from bs4 import BeautifulSoup, NavigableString, Tag from bs4 import BeautifulSoup, NavigableString, Tag
from livecarta_config import LawCartaConfig from livecarta_config import LiveCartaConfig
from util.helpers import BookLogger, BookStatusWrapper from util.helpers import BookLogger, BookStatusWrapper
@@ -52,8 +52,8 @@ class HTMLDocxPreprocessor:
@classmethod @classmethod
def convert_pt_to_px(cls, value): def convert_pt_to_px(cls, value):
value = float(value) value = float(value)
if value == LawCartaConfig.WORD_DEFAULT_FONT_SIZE: if value == LiveCartaConfig.WORD_DEFAULT_FONT_SIZE:
return LawCartaConfig.LAWCARTA_DEFAULT_FONT_SIZE return LiveCartaConfig.LAWCARTA_DEFAULT_FONT_SIZE
else: else:
return value return value
@@ -73,7 +73,7 @@ class HTMLDocxPreprocessor:
size = size.group(1) size = size.group(1)
new_size = cls.convert_pt_to_px(size) new_size = cls.convert_pt_to_px(size)
if new_size == LawCartaConfig.LAWCARTA_DEFAULT_FONT_SIZE: if new_size == LiveCartaConfig.LAWCARTA_DEFAULT_FONT_SIZE:
return "" return ""
return re.sub(size + "pt", str(new_size) + "px", style) return re.sub(size + "pt", str(new_size) + "px", style)
@@ -93,18 +93,18 @@ class HTMLDocxPreprocessor:
if style: if style:
style = self.convert_font_pt_to_px(style) style = self.convert_font_pt_to_px(style)
if style != "": if style != "":
if color and color in LawCartaConfig.COLORS_MAP: if color and color in LiveCartaConfig.COLORS_MAP:
style += f'; color: {color};' style += f'; color: {color};'
font.attrs["style"] = style font.attrs["style"] = style
elif color and color in LawCartaConfig.COLORS_MAP: elif color and color in LiveCartaConfig.COLORS_MAP:
font.attrs["style"] = f'color: {color};' font.attrs["style"] = f'color: {color};'
if face is not None: if face is not None:
face = re.sub(r",[\w,\- ]*$", "", face) face = re.sub(r",[\w,\- ]*$", "", face)
if face != LawCartaConfig.DEFAULT_FONT_NAME and LawCartaConfig.font_correspondence_table.get(face): if face != LiveCartaConfig.DEFAULT_FONT_NAME and LiveCartaConfig.font_correspondence_table.get(face):
font.attrs["face"] = LawCartaConfig.font_correspondence_table[face] font.attrs["face"] = LiveCartaConfig.font_correspondence_table[face]
else: else:
font.attrs["face"] = LawCartaConfig.DEFAULT_FONT_NAME font.attrs["face"] = LiveCartaConfig.DEFAULT_FONT_NAME
if len(font.attrs) == 0: if len(font.attrs) == 0:
font.unwrap() font.unwrap()
@@ -182,12 +182,12 @@ class HTMLDocxPreprocessor:
p.attrs = {} p.attrs = {}
style = '' style = ''
if align is not None and align != LawCartaConfig.DEFAULT_ALIGN_STYLE: if align is not None and align != LiveCartaConfig.DEFAULT_ALIGN_STYLE:
style += f'text-align: {align};' style += f'text-align: {align};'
if indent is not None or indent_should_be_added: if indent is not None or indent_should_be_added:
# indent = indent.group(1) # indent = indent.group(1)
style += f'text-indent: {LawCartaConfig.INDENT};' style += f'text-indent: {LiveCartaConfig.INDENT};'
if style: if style:
p.attrs['style'] = style p.attrs['style'] = style
@@ -488,7 +488,7 @@ class HTMLDocxPreprocessor:
""" """
Function to convert all lower level headings to p tags Function to convert all lower level headings to p tags
""" """
pattern = f'^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$' pattern = f'^h[{LiveCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
header_tags = self.body_tag.find_all(re.compile(pattern)) header_tags = self.body_tag.find_all(re.compile(pattern))
for tag in header_tags: for tag in header_tags:
tag.name = 'p' tag.name = 'p'
@@ -592,8 +592,8 @@ class HTMLDocxPreprocessor:
if title == "": if title == "":
tag.unwrap() tag.unwrap()
else: else:
assert tag.name in LawCartaConfig.SUPPORTED_HEADERS, \ assert tag.name in LiveCartaConfig.SUPPORTED_HEADERS, \
f'Preprocessing went wrong, there is still h{LawCartaConfig.SUPPORTED_LEVELS + 1}-h9 headings.' f'Preprocessing went wrong, there is still h{LiveCartaConfig.SUPPORTED_LEVELS + 1}-h9 headings.'
content = list(tag.children) content = list(tag.children)

View File

@@ -6,7 +6,7 @@ from typing import List, Tuple
from bs4 import BeautifulSoup, NavigableString, Tag, Comment from bs4 import BeautifulSoup, NavigableString, Tag, Comment
from access import Access from access import Access
from livecarta_config import LawCartaConfig from livecarta_config import LiveCartaConfig
def save_image_locally(img_file_path, img_content, book_id): def save_image_locally(img_file_path, img_content, book_id):
@@ -148,7 +148,7 @@ def _heading_tag2p_tag(body_tag):
""" """
Function to convert all lower level headings to p tags Function to convert all lower level headings to p tags
""" """
pattern = f'^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$' pattern = f'^h[{LiveCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
header_tags = body_tag.find_all(re.compile(pattern)) header_tags = body_tag.find_all(re.compile(pattern))
for tag in header_tags: for tag in header_tags:
tag.name = 'p' tag.name = 'p'

View File

@@ -2,7 +2,7 @@ import logging
import re import re
from copy import copy from copy import copy
from livecarta_config import LawCartaConfig from livecarta_config import LiveCartaConfig
class LibraHTML2JSONConverter: class LibraHTML2JSONConverter:
@@ -32,7 +32,7 @@ class LibraHTML2JSONConverter:
:param ind: Index of header in content list. :param ind: Index of header in content list.
""" """
if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS: if self.content[ind].name in LiveCartaConfig.SUPPORTED_HEADERS:
title = str(self.content[ind]) title = str(self.content[ind])
title = title.replace(f'<{self.content[ind].name}>', '') title = title.replace(f'<{self.content[ind].name}>', '')
title = title.replace(f'</{self.content[ind].name}>', '') title = title.replace(f'</{self.content[ind].name}>', '')
@@ -49,7 +49,7 @@ class LibraHTML2JSONConverter:
while ind < len(self.content): while ind < len(self.content):
# 1. next tag is a header # 1. next tag is a header
if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS: if self.content[ind].name in LiveCartaConfig.SUPPORTED_HEADERS:
outline = int(re.sub(r"^h", "", self.content[ind].name)) outline = int(re.sub(r"^h", "", self.content[ind].name))
# - recursion step until h_i > h_initial # - recursion step until h_i > h_initial
if outline > curr_outline: if outline > curr_outline:
@@ -102,13 +102,13 @@ class LibraHTML2JSONConverter:
while ind < len(self.content): while ind < len(self.content):
res = {} res = {}
if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS: if self.content[ind].name in LiveCartaConfig.SUPPORTED_HEADERS:
res, ind = self.header_to_livecarta_chapter_item(ind) res, ind = self.header_to_livecarta_chapter_item(ind)
else: else:
chapter_title = f'Untitled chapter {ch_num}' chapter_title = f'Untitled chapter {ch_num}'
chapter = [] chapter = []
while ind < len(self.content) and self.content[ind].name not in LawCartaConfig.SUPPORTED_HEADERS: while ind < len(self.content) and self.content[ind].name not in LiveCartaConfig.SUPPORTED_HEADERS:
if not self._is_empty_p_tag(self.content[ind]): if not self._is_empty_p_tag(self.content[ind]):
chapter.append(self.format_html(str(self.content[ind]))) chapter.append(self.format_html(str(self.content[ind])))
ind += 1 ind += 1

View File

@@ -1,5 +1,5 @@
class LawCartaConfig: class LiveCartaConfig:
SUPPORTED_LEVELS = 5 SUPPORTED_LEVELS = 5
SUPPORTED_HEADERS = {"h1", "h2", "h3", "h4", "h5"} SUPPORTED_HEADERS = {"h1", "h2", "h3", "h4", "h5"}
HEADERS_LEVELS = {"h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "h9"} HEADERS_LEVELS = {"h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "h9"}