Fix double tags

This commit is contained in:
Kibzik
2023-03-17 12:59:17 +03:00
parent 28afc1d67f
commit e073ff0623

View File

@@ -179,20 +179,28 @@ class InlineStyleProcessor:
def change_attrs_with_corresponding_tags(self): def change_attrs_with_corresponding_tags(self):
# adds <strong>, <u>, <sup> instead of styles # adds <strong>, <u>, <sup> instead of styles
styles_to_remove = self.check_style_to_be_tag(self.tag_inline_style.attrs['style']) styles_to_remove = self.check_style_to_be_tag(self.tag_inline_style.attrs['style'])
for i, (attr, value) in enumerate(styles_to_remove): style_attr = self.tag_inline_style.attrs.get('style', '')
self.tag_inline_style.attrs["style"] = self.tag_inline_style.attrs["style"]\ # Replace each style with its corresponding tag
.replace(f"{attr}:{value};", "").strip() for attr, value in styles_to_remove:
corr_tag_name = LiveCartaConfig.STYLE_ATTRS_TO_TAGS[( # Remove the attribute and value from the style attribute
attr, value)] self.tag_inline_style.attrs["style"] = '; '.join(
[s for s in self.tag_inline_style.attrs.get('style', '').split(';')
if f'{attr}:{value}' not in s]).strip()
# Create a new tag for the corresponding style
corr_tag_name = LiveCartaConfig.STYLE_ATTRS_TO_TAGS[(attr, value)]
correspond_tag = BeautifulSoup(features="lxml").new_tag(corr_tag_name) correspond_tag = BeautifulSoup(features="lxml").new_tag(corr_tag_name)
# Move the contents of the original tag into the new tag
for content in reversed(self.tag_inline_style.contents): for content in reversed(self.tag_inline_style.contents):
correspond_tag.insert(0, content.extract()) correspond_tag.insert(0, content.extract())
# Add the new tag to the original tag
self.tag_inline_style.append(correspond_tag) self.tag_inline_style.append(correspond_tag)
@staticmethod @staticmethod
def wrap_span_in_tag_to_save_style_attrs(initial_tag: Tag) -> Tag: def wrap_span_in_tag_to_save_style_attrs(initial_tag: Tag) -> Tag:
"""Function designed to save style attrs that cannot be in tag.name -> span""" """Function designed to save style attrs that cannot be in tag.name -> span"""
# Compile a regex pattern to match tag names that can have certain style attributes # Compile a regex pattern to match tag names that can have certain style attributes
dictkeys_pattern = re.compile("|".join(LiveCartaConfig.TAGS_TO_STYLE_ATTRS_CAN_BE_IN_TAG)) dictkeys_pattern = re.compile("|".join(LiveCartaConfig.TAGS_TO_STYLE_ATTRS_CAN_BE_IN_TAG))