Merge pull request #302 from Teqniksoft/kiryl/converter_fix

Kiryl/converter fix
This commit is contained in:
Kiryl
2022-09-22 14:22:47 +03:00
committed by GitHub
9 changed files with 181 additions and 113 deletions

View File

@@ -1,7 +1,25 @@
# About <h1 align="center"> Converter </h1> <br>
<p align="center">
<a href="https://livecarta.com/">
<img alt="LiveCarta converter" title="LiveCarta converter" src="https://assets.openstax.org/oscms-prodcms/media/partner_logos/LiveCarta_Logo.png" width="450">
</a>
</p>
This repository contains code related to docx/epub files conversion to livecarta inner format. <!-- START doctoc generated TOC please keep comment here to allow auto update -->
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
## Table of Contents
- [Introduction](#introduction)
- [Features](#features)
- [Top level project structure](#top-level-project-structure)
- [How it Works](#how-it-works)
- [Setup](#setup)
- [Development](#development)
- [How to use](#how-to-use)
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
## Introduction
This is a Python 3 project for converting Docx|Epub documents -> LiveCarta inner format.
Livecarta book format is tree structure, where nodes are chapters. Livecarta book format is tree structure, where nodes are chapters.
Livecarta chapter is title + html code. Livecarta html code follows some restrictions: Livecarta chapter is title + html code. Livecarta html code follows some restrictions:
@@ -12,10 +30,57 @@ Livecarta chapter is title + html code. Livecarta html code follows some restric
- Styles are added as _inline_, i.e. attribute `style` in html tag. - Styles are added as _inline_, i.e. attribute `style` in html tag.
- Each tag has its own restrictions on attributes and style. See doc/style_config - Each tag has its own restrictions on attributes and style. See doc/style_config
## Features
- Converts Epub, Docx to JSON(LiveCarta inner format)
- Compatible with python 3
- Very small size (only .py files)
- Multithreaded
# Top level project structure ## Top level project structure
- `consumer.py` - code which is responsible for receiving messages from rabbitMQ - `consumer.py` - code which is responsible for receiving messages from rabbitMQ
- class `Access` - contains API code which is responsible for interaction with server. - class `Access` - contains API code which is responsible for interaction with server.
- class `Solver` - contains code responsible for pipeline of solving the task: receiving book file, conversion, status updating, sending result back to server. - class `Solver` - contains code responsible for pipeline of solving the task: receiving book file, conversion, status updating, sending result back to server.
- `livecarta_config.py `- constants that depend on LiveCarta - `livecarta_config.py `- constants that depend on LiveCarta
## How it Works
**2 approaches** in 3 steps each works:
#### Epub
***Step 1*** - Add CSS to HTML inline_style
**Step 2** - Process every HTML chapter of Epub with presets
**Step 3** - Convert dicts of HTML to JSON(LiveCarta inner format)
#### Docx
**Step 1** - Conversion of DOCX to HTML via LibreOffice
**Step 2** - Process HTML with presets
**Step 3** - Conversion of HTML to JSON(LiveCarta inner format)
## Setup
python -m pip install -r requirements.txt
### Development
To fix a bug or enhance an existing module, follow these steps:
- Fork the repo
- Create a new branch (`git checkout -b improve-feature`)
- Make the appropriate changes in the files
- Add changes to reflect the changes made
- Commit your changes (`git commit -am 'Improve feature'`)
- Push to the branch (`git push origin improve-feature`)
- Create a Pull Request
## How to Use
**1.** Run `consumer.py`
The script will be constantly waiting for a message from the queue(RabbitMQ), into which we load the book via Import File to Convert in the admin panel
You can also upload the book that have been converted locally using `def local_convert()` in `consumer.py`
**b.** Run `docx_solver.py`
1. You need to run it on Linux system, but if u're using Windows - just using python docker intepreter
2. Upload a book to books/docx/ and set the variable `docx_file_path = books/docx/book_name` in __main__
**c.** Run `epub_solver.py`
Before that upload a book to books/epub/ and set the variable `epub_file_path = books/epub/book_name` in __main__

View File

@@ -16,6 +16,10 @@
"name": "border", "name": "border",
"value": ".*" "value": ".*"
}, },
{
"name": "style",
"value": "border.*"
},
{ {
"name": "bgcolor", "name": "bgcolor",
"value": ".*" "value": ".*"
@@ -42,14 +46,14 @@
"preset_name": "replacer", "preset_name": "replacer",
"rules": [ "rules": [
{ {
"tags": ["^h[6-9]$", "^figure$", "^section$", "^div$"], "tags": ["^h[6-9]$", "^figure$", "^section$", "^div$", "blockquote"],
"condition": null, "condition": null,
"tag_to_replace": "p" "tag_to_replace": "p"
}, },
{ {
"tags": ["^aside$"], "tags": ["^aside$"],
"condition": null, "condition": null,
"tag_to_replace": "blockquote" "tag_to_replace": "div"
}, },
{ {
"tags": ["^header$", "^footer$"], "tags": ["^header$", "^footer$"],
@@ -65,6 +69,11 @@
}, },
"tag_to_replace": "span" "tag_to_replace": "span"
}, },
{
"tags": ["^em$"],
"condition": null,
"tag_to_replace": "i"
},
{ {
"tags": ["^b$"], "tags": ["^b$"],
"condition": null, "condition": null,
@@ -101,6 +110,7 @@
{ {
"tags": [ "tags": [
"^section$", "^section$",
"^blockquote$",
"^article$", "^article$",
"^figcaption$", "^figcaption$",
"^main$", "^main$",
@@ -131,6 +141,11 @@
"attrs": null "attrs": null
}, },
"tag_to_insert": "code" "tag_to_insert": "code"
},
{
"tags": ["^h[1-5]$"],
"condition": null,
"tag_to_insert": "strong"
} }
] ]
} }

View File

@@ -13,8 +13,7 @@ from src.inline_style_processor import modify_html_soup_with_css_styles
class HtmlDocxProcessor: class HtmlDocxProcessor:
def __init__(self, logger: BookLogger, html_soup: BeautifulSoup, html_preprocessor, style_preprocessor): def __init__(self, logger: BookLogger, html_soup: BeautifulSoup, html_preprocessor, style_preprocessor):
self.logger = logger self.logger = logger
self.html_soup = html_soup self.body_tag: BeautifulSoup = BeautifulSoup(str(html_soup.body))
self.body_tag = self.html_soup.body
self.html_preprocessor = html_preprocessor self.html_preprocessor = html_preprocessor
self.style_preprocessor = style_preprocessor self.style_preprocessor = style_preprocessor
self.content: List[Tag] = [] self.content: List[Tag] = []
@@ -23,7 +22,6 @@ class HtmlDocxProcessor:
for font in self.body_tag.find_all("font"): for font in self.body_tag.find_all("font"):
font.name = "span" font.name = "span"
def _process_hrefs(self): def _process_hrefs(self):
a_tags_with_href = self.body_tag.find_all( a_tags_with_href = self.body_tag.find_all(
"a", {"href": re.compile("^.*http.+")}) "a", {"href": re.compile("^.*http.+")})
@@ -206,10 +204,9 @@ class HtmlDocxProcessor:
else: else:
h_tag.unwrap() h_tag.unwrap()
def delete_content_before_toc(self): def delete_content_before_toc(self):
# remove all tag upper the <TOC> only in content !!! body tag is not updated # remove all tag upper the <TOC> only in content !!! body tag is not updated
toc_tag = self.html_soup.new_tag("TOC") toc_tag = self.body_tag.new_tag("TOC")
if toc_tag in self.content: if toc_tag in self.content:
ind = self.content.index(toc_tag) + 1 ind = self.content.index(toc_tag) + 1
self.content = self.content[ind:] self.content = self.content[ind:]
@@ -229,7 +226,7 @@ class HtmlDocxProcessor:
self.body_tag) self.body_tag)
self.logger.log("Inline style processing.") self.logger.log("Inline style processing.")
modify_html_soup_with_css_styles(self.body_tag) self.body_tag = modify_html_soup_with_css_styles(self.body_tag)
self.logger.log("Image processing.") self.logger.log("Image processing.")
images = process_images(access, path_to_html=html_path, images = process_images(access, path_to_html=html_path,
@@ -256,9 +253,9 @@ class HtmlDocxProcessor:
self.logger.log(f".html using presets processing.") self.logger.log(f".html using presets processing.")
_process_presets(html_preprocessor=self.html_preprocessor, _process_presets(html_preprocessor=self.html_preprocessor,
html_soup=self.html_soup) html_soup=self.body_tag)
self.content = self.body_tag.find_all(recursive=False) self.content = self.body_tag.body.find_all(recursive=False)
# delete text before table of content if exists # delete text before table of content if exists
self.delete_content_before_toc() self.delete_content_before_toc()

View File

@@ -1,5 +1,5 @@
import re import re
from typing import Union from typing import List, Union
from bs4.element import PageElement from bs4.element import PageElement
from bs4 import BeautifulSoup, Tag, NavigableString, Comment from bs4 import BeautifulSoup, Tag, NavigableString, Comment
@@ -92,26 +92,26 @@ class HtmlEpubProcessor:
clean/remove headings & add span with id clean/remove headings & add span with id
""" """
title_of_chapter = title_of_chapter.lower() def text_preparing(tag: PageElement):
for tag in chapter_tag.contents:
tag: PageElement
text: str = tag if isinstance(tag, NavigableString) else tag.text text: str = tag if isinstance(tag, NavigableString) else tag.text
if re.sub(r"[\s\xa0]", "", text): text = re.sub(r"[\s\xa0]", " ", text).lower()
text = re.sub(r"[\s\xa0]", " ", text).lower() text = text.strip() # delete extra spaces
text = text.strip() # delete extra spaces return text
if not isinstance(tag, NavigableString):
if title_of_chapter == text or \ title_of_chapter: str = title_of_chapter.lower()
(title_of_chapter in text and title_in_text: List[Tag] = chapter_tag.find_all(lambda tag: title_of_chapter == text_preparing(tag) or \
re.findall(r"^h[1-3]$", tag.name or chapter_tag.name)): (title_of_chapter in text_preparing(tag) and
self.html_preprocessor._add_span_to_save_ids_for_links( re.findall(r"^h[1-3]$", tag.name or chapter_tag.name)))
tag, chapter_tag)
tag.extract() text_in_title: List[Tag] = chapter_tag.find_all(lambda tag: (text_preparing(tag) in title_of_chapter))
return if title_in_text:
elif not self._remove_headings_content(tag, title_of_chapter): self.html_preprocessor._add_span_to_save_ids_for_links(
break title_in_text[-1], chapter_tag)
else: title_in_text[-1].extract()
tag.extract() elif text_in_title:
return [self.html_preprocessor._add_span_to_save_ids_for_links(
tag, chapter_tag) for tag in text_in_title]
[tag.extract() for tag in text_in_title]
@staticmethod @staticmethod
def _class_removing(chapter_tag: BeautifulSoup): def _class_removing(chapter_tag: BeautifulSoup):

View File

@@ -28,6 +28,7 @@ class HtmlPresetsProcessor:
@staticmethod @staticmethod
def _decompose_tag(**kwargs): def _decompose_tag(**kwargs):
kwargs["tag"].parent.attrs.update(kwargs["tag"].attrs)
kwargs["tag"].decompose() kwargs["tag"].decompose()
@staticmethod @staticmethod
@@ -112,6 +113,7 @@ class HtmlPresetsProcessor:
@staticmethod @staticmethod
def _unwrap_tag(**kwargs): def _unwrap_tag(**kwargs):
kwargs["tag"].parent.attrs.update(kwargs["tag"].attrs)
kwargs["tag"].unwrap() kwargs["tag"].unwrap()
@staticmethod @staticmethod
@@ -153,7 +155,6 @@ class HtmlPresetsProcessor:
for parent_tag in body_tag.select(condition_on_tag[1]): for parent_tag in body_tag.select(condition_on_tag[1]):
for tag in parent_tag.find_all([re.compile(tag) for tag in tags]): for tag in parent_tag.find_all([re.compile(tag) for tag in tags]):
# parent_tag != tag.parent # parent_tag != tag.parent
tag.parent.attrs.update(tag.attrs)
action(body_tag=body_tag, tag=tag, rule=rule) action(body_tag=body_tag, tag=tag, rule=rule)
elif condition_on_tag[0] == "child_tags": elif condition_on_tag[0] == "child_tags":
for tag in body_tag.find_all([re.compile(tag) for tag in tags]): for tag in body_tag.find_all([re.compile(tag) for tag in tags]):

View File

@@ -14,7 +14,7 @@ class InlineStyleProcessor:
def __init__(self, tag_inline_style: Tag): def __init__(self, tag_inline_style: Tag):
# tag with inline style + style parsed from css file # tag with inline style + style parsed from css file
self.tag_inline_style = tag_inline_style self.tag_inline_style = tag_inline_style
self.tag_inline_style.attrs['style']: str = self.process_inline_style() self.tag_inline_style.attrs["style"]: str = self.process_inline_style()
@staticmethod @staticmethod
def remove_white_if_no_bgcolor(style_: str, tag: Tag) -> str: def remove_white_if_no_bgcolor(style_: str, tag: Tag) -> str:
@@ -80,19 +80,19 @@ class InlineStyleProcessor:
processed_style = ";".join(split_style)+';' processed_style = ";".join(split_style)+';'
margin_left_regexp = re.compile( margin_left_regexp = re.compile(
r"((margin-left|margin): *(-*\w+);*)") r"((margin-left|margin): *-*((\d*)\.*\d+)\w+;*)")
text_indent_regexp = re.compile( text_indent_regexp = re.compile(
r"(text-indent: *(-*\w+);*)") r"(text-indent: *-*((\d*)\.*\d+)\w+;*)")
has_margin = re.search(margin_left_regexp, processed_style) has_margin = re.search(margin_left_regexp, processed_style)
has_text_indent = re.search(text_indent_regexp, processed_style) has_text_indent = re.search(text_indent_regexp, processed_style)
if has_margin: if has_margin:
num_m = abs(int("0" + "".join( num_m = abs(int("0" + "".join(
filter(str.isdigit, str(has_margin.group(3)))))) filter(str.isdigit, str(has_margin.group(4))))))
if has_text_indent: if has_text_indent:
num_ti = abs(int("0" + "".join( num_ti = abs(int("0" + "".join(
filter(str.isdigit, str(has_text_indent.group(2)))))) filter(str.isdigit, str(has_text_indent.group(3))))))
processed_style = processed_style.replace(has_text_indent.group(1), "text-indent: " + processed_style = processed_style.replace(has_text_indent.group(1), "text-indent: " +
str(abs(num_m - num_ti)) + "px; ") str(abs(num_m - num_ti)) + "px; ")
processed_style = processed_style.replace( processed_style = processed_style.replace(
@@ -106,7 +106,7 @@ class InlineStyleProcessor:
elif has_text_indent: elif has_text_indent:
processed_style = processed_style.replace(has_text_indent.group(1), "text-indent: " + processed_style = processed_style.replace(has_text_indent.group(1), "text-indent: " +
str(abs(int("0" + "".join( str(abs(int("0" + "".join(
filter(str.isdigit, str(has_text_indent.group(2))))))) filter(str.isdigit, str(has_text_indent.group(3)))))))
+ "px; ") + "px; ")
return processed_style return processed_style
return processed_style return processed_style
@@ -127,22 +127,25 @@ class InlineStyleProcessor:
processed inline style processed inline style
""" """
inline_style = self.tag_inline_style.attrs.get("style") + ";" if self.tag_inline_style.attrs.get("style"):
# 1. Remove white color if tag doesn"t have background color in style inline_style = self.tag_inline_style.attrs.get("style") + ";"
inline_style = self.remove_white_if_no_bgcolor( # 1. Remove white color if tag doesn't have background color in style
inline_style, self.tag_inline_style) inline_style = self.remove_white_if_no_bgcolor(
inline_style = inline_style.replace( inline_style, self.tag_inline_style)
"list-style-image", "list-style-type") inline_style = inline_style.replace(
# 2. Create list of styles from inline style "list-style-image", "list-style-type")
# replace all spaces between "; & letter" to ";" # 2. Create list of styles from inline style
style = re.sub(r"; *", ";", inline_style) # replace all spaces between "; & letter" to ";"
# when we split style by ";", last element of the list is "" - None (remove it) style = re.sub(r"; *", ";", inline_style)
split_inline_style: list = list(filter(None, style.split(";"))) # when we split style by ";", last element of the list is "" - None (remove it)
# 3. Duplicate styles check - if the tag had duplicate styles split_inline_style: list = list(filter(None, style.split(";")))
# split_inline_style = self.duplicate_styles_check(split_inline_style) # 3. Duplicate styles check - if the tag had duplicate styles
# 4. Processing indents # split_inline_style = self.duplicate_styles_check(split_inline_style)
inline_style: str = self.indents_processing(split_inline_style) # 4. Processing indents
return inline_style inline_style: str = self.indents_processing(split_inline_style)
return inline_style
else:
return ""
@staticmethod @staticmethod
def check_style_to_be_tag(style: str) -> List[tuple]: def check_style_to_be_tag(style: str) -> List[tuple]:

View File

@@ -59,6 +59,7 @@ class LiveCartaConfig:
"font-style": ["italic"], # <i> "font-style": ["italic"], # <i>
"text-decoration": ["underline", "line-through"], # <u> , <s> "text-decoration": ["underline", "line-through"], # <u> , <s>
"text-decoration-line": ["underline", "line-through"], # <u> , <s> "text-decoration-line": ["underline", "line-through"], # <u> , <s>
"text-transform": [],
"vertical-align": ["super"], # <sup> "vertical-align": ["super"], # <sup>
"color": [], "color": [],
"background-color": [], "background-color": [],
@@ -76,4 +77,5 @@ class LiveCartaConfig:
"margin-left": [], "margin-left": [],
"margin-top": [], "margin-top": [],
"margin": [], "margin": [],
} }

View File

@@ -1,6 +1,6 @@
import re import re
import cssutils import cssutils
from typing import Tuple from typing import List, Tuple, Union
from os.path import dirname, normpath, join from os.path import dirname, normpath, join
from src.util.color_reader import str2hex from src.util.color_reader import str2hex
@@ -16,28 +16,29 @@ class StyleReader:
to suit LiveCarta style convention. to suit LiveCarta style convention.
""" """
self.LIVECARTA_STYLE_ATTRS_MAPPING = { self.LIVECARTA_STYLE_ATTRS_MAPPING = {
"text-indent": self.convert_indents_tag_values, "text-indent": lambda x: self.convert_tag_style_values(x, is_indent=True),
"font-variant": lambda x: x, "font-variant": lambda x: x,
"text-align": lambda x: x, "text-align": lambda x: x,
"font": lambda x: "", "font": lambda x: "",
"font-family": lambda x: x, "font-family": lambda x: x,
"font-size": self.convert_tag_style_values, "font-size": self.convert_tag_style_values,
"text-transform": lambda x: x,
"color": self.get_text_color, "color": self.get_text_color,
"background-color": self.get_bg_color, "background-color": self.get_bg_color,
"background": self.get_bg_color, "background": self.get_bg_color,
"border": lambda x: x if x != "0" else "", "border": self.convert_tag_style_values,
"border-top-width": lambda x: x if x != "0" else "", "border-top-width": self.convert_tag_style_values,
"border-right-width": lambda x: x if x != "0" else "", "border-right-width": self.convert_tag_style_values,
"border-left-width": lambda x: x if x != "0" else "", "border-left-width": self.convert_tag_style_values,
"border-bottom-width": lambda x: x if x != "0" else "", "border-bottom-width": self.convert_tag_style_values,
"border-top": lambda x: x if x != "0" else "", "border-top": self.convert_tag_style_values,
"border-bottom": lambda x: x if x != "0" else "", "border-bottom": self.convert_tag_style_values,
"list-style-type": lambda x: x if x in LiveCartaConfig.list_types else "disc", "list-style-type": lambda x: x if x in LiveCartaConfig.list_types else "disc",
"list-style-image": lambda x: "disc", "list-style-image": lambda x: "disc",
"margin-left": self.convert_indents_tag_values, "margin-left": lambda x: self.convert_tag_style_values(x, is_indent=True),
"margin-top": self.convert_tag_style_values, "margin-top": lambda x: self.convert_tag_style_values(x, is_indent=True),
"margin": self.convert_indents_tag_values, "margin": lambda x: self.convert_tag_style_values(x, is_indent=True),
"width": self.convert_tag_style_values, "width": lambda x: self.convert_tag_style_values(x) if "%" not in x else x
} }
@staticmethod @staticmethod
@@ -68,43 +69,26 @@ class StyleReader:
------- -------
size_value: str size_value: str
converted value size converted value size
""" """
size_regexp = re.compile( def convert_size_number(size_number: str, unit_to_replace: str, multiplier: float) -> str:
r"(^-*(\d*\.*\d+)%$)|(^-*(\d*\.*\d+)em$)|(^-*(\d*\.*\d+)pt$)|(^-*(\d*\.*\d+)in$)") size_number = float(size_number.replace(unit_to_replace, "")) * multiplier
has_style_attrs = re.search(size_regexp, size_value) return str(size_number) + "px"
if has_style_attrs: has_size = re.search(r"(\d+)([\w%]+)", size_value)
if has_style_attrs.group(1): values: List = size_value.split(" ")
if has_size:
size_number_idx = [i for i, value in enumerate(values) if re.search("(\d+)([\w%]+)", value)][0]
if has_size.group(2) == "%":
multiplier = 5.76 if is_indent else 0.16 multiplier = 5.76 if is_indent else 0.16
size_value = float(size_value.replace("%", "")) * multiplier values[size_number_idx] = convert_size_number(values[size_number_idx], "%", multiplier)
return str(size_value) + "px" elif has_size.group(2) == "em":
elif has_style_attrs.group(3):
multiplier = 18 if is_indent else 16 multiplier = 18 if is_indent else 16
size_value = float(size_value.replace("em", "")) * multiplier values[size_number_idx] = convert_size_number(values[size_number_idx], "em", multiplier)
return str(size_value) + "px" elif has_size.group(2) == "pt":
elif has_style_attrs.group(5): values[size_number_idx] = convert_size_number(values[size_number_idx], "pt", 4 / 3)
size_value = float(size_value.replace("pt", "")) * 4/3 elif has_size.group(2) == "in":
return str(size_value) + "px" values[size_number_idx] = convert_size_number(values[size_number_idx], "in", 96)
elif has_style_attrs.group(7): size_value = " ".join(values)
size_value = float(size_value.replace("in", "")) * 96
return str(size_value) + "px"
else:
return ""
return size_value
def convert_indents_tag_values(self, size_value: str) -> str:
"""
Function converts values of ["text-indent", "margin-left", "margin"]
Parameters
----------
size_value: str
Returns
-------
size_value: str
"""
size_value = self.convert_tag_style_values(size_value.split(" ")[-2], True) if len(size_value.split(" ")) == 3\
else self.convert_tag_style_values(size_value.split(" ")[-1], True)
return size_value return size_value
@staticmethod @staticmethod
@@ -125,17 +109,18 @@ class StyleReader:
return constraints_on_value, value_not_in_possible_values_list return constraints_on_value, value_not_in_possible_values_list
def update_inline_styles_to_livecarta_convention(self, split_style: list) -> list: def update_inline_styles_to_livecarta_convention(self, split_style: list) -> list:
for i, style in enumerate(split_style): for i, style in reversed(list(enumerate(split_style))):
style_name, style_value = style.split(":") style_name, style_value = style.split(":")
if style_name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS: if style_name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS:
# property not in LIVECARTA_STYLE_ATTRS, remove from css file # property not in LIVECARTA_STYLE_ATTRS, remove
split_style[i] = "" split_style.remove(style)
return split_style continue
cleaned_value = self.clean_value(style_value, style_name) cleaned_value = self.clean_value(style_value, style_name)
if all(self.style_conditions(cleaned_value, style_name)): if all(self.style_conditions(cleaned_value, style_name)):
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file # there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove
split_style[i] = "" split_style.remove(style)
continue
else: else:
if style_name in self.LIVECARTA_STYLE_ATTRS_MAPPING: if style_name in self.LIVECARTA_STYLE_ATTRS_MAPPING:
# function that converts our data # function that converts our data
@@ -156,7 +141,7 @@ class StyleReader:
split_style = self.update_inline_styles_to_livecarta_convention( split_style = self.update_inline_styles_to_livecarta_convention(
split_style) split_style)
style = "; ".join(split_style) style = "; ".join(split_style) if split_style else ""
return style return style
def process_inline_styles_in_html_soup(self, html_content): def process_inline_styles_in_html_soup(self, html_content):

View File

@@ -103,7 +103,7 @@ def str2hex(s: str) -> str:
return rgb_percent_to_hex((r, g, b)) return rgb_percent_to_hex((r, g, b))
if "rgb" in s.lower(): if "rgb" in s.lower():
rgba = re.findall("([0-9] *\.?[0-9]+)", s) rgba = re.findall("(\d+(?:\.\d+)?)", s)
r, g, b = int(rgba[0]), int(rgba[1]), int(rgba[2]) r, g, b = int(rgba[0]), int(rgba[1]), int(rgba[2])
if len(rgba) == 4: if len(rgba) == 4:
alpha = float(rgba[3]) alpha = float(rgba[3])