forked from LiveCarta/BookConverter
Wrote documentation for every func/class in .py
This commit is contained in:
@@ -54,7 +54,6 @@ def convert_book(book_type: [DocxBook, EpubBook], params: dict, logger, book_id)
|
|||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
logger.info(f'Book-{book_id} has been proceeded.')
|
logger.info(f'Book-{book_id} has been proceeded.')
|
||||||
print('Book has been proceeded.')
|
|
||||||
|
|
||||||
|
|
||||||
def callback(ch, method, properties, body, logger, libra_locker):
|
def callback(ch, method, properties, body, logger, libra_locker):
|
||||||
|
|||||||
@@ -1,10 +1,3 @@
|
|||||||
""" This is Main Abstract class for solving a task of a book conversion
|
|
||||||
|
|
||||||
Having an id of coming book, gets book from server, runs conversion.
|
|
||||||
In parallel it updates status of a book conversion on admin panel.
|
|
||||||
Finally sends result to server.
|
|
||||||
Result is a json, JSON schema in book_schema.json
|
|
||||||
"""
|
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import codecs
|
import codecs
|
||||||
@@ -17,6 +10,14 @@ from src.util.helpers import BookLogger, BookStatusWrapper
|
|||||||
|
|
||||||
|
|
||||||
class BookSolver:
|
class BookSolver:
|
||||||
|
"""
|
||||||
|
This is Main Abstract class for solving a task of a book conversion
|
||||||
|
Having an id of coming book, gets book from server, runs conversion.
|
||||||
|
In parallel it updates status of a book conversion on admin panel.
|
||||||
|
Finally sends result to server.
|
||||||
|
Result is a json, JSON schema in book_schema.json
|
||||||
|
"""
|
||||||
|
|
||||||
__metaclass__ = ABCMeta
|
__metaclass__ = ABCMeta
|
||||||
|
|
||||||
def __init__(self, book_id=0, access=None, main_logger=None):
|
def __init__(self, book_id=0, access=None, main_logger=None):
|
||||||
@@ -55,9 +56,7 @@ class BookSolver:
|
|||||||
self.file_path = pathlib.Path(file_path)
|
self.file_path = pathlib.Path(file_path)
|
||||||
|
|
||||||
def get_book_file(self):
|
def get_book_file(self):
|
||||||
"""
|
""" Method for getting and saving book from server. """
|
||||||
Method for getting and saving book from server.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
self.logger_object.log(f'Start receiving file from server. URL: {self.access.url}/doc-convert/{self.book_id}/file')
|
self.logger_object.log(f'Start receiving file from server. URL: {self.access.url}/doc-convert/{self.book_id}/file')
|
||||||
content = self.access.get_book(self.book_id)
|
content = self.access.get_book(self.book_id)
|
||||||
@@ -92,6 +91,7 @@ class BookSolver:
|
|||||||
self.logger_object.log('Error has occurred while writing json file.' + str(exc), logging.ERROR)
|
self.logger_object.log('Error has occurred while writing json file.' + str(exc), logging.ERROR)
|
||||||
|
|
||||||
def send_json_content_to_server(self, content: dict):
|
def send_json_content_to_server(self, content: dict):
|
||||||
|
""" Function sends json_content to site """
|
||||||
try:
|
try:
|
||||||
self.access.send_book(self.book_id, content)
|
self.access.send_book(self.book_id, content)
|
||||||
self.logger_object.log(f'JSON data has been sent to server.')
|
self.logger_object.log(f'JSON data has been sent to server.')
|
||||||
@@ -108,8 +108,10 @@ class BookSolver:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
def test_conversion(self):
|
def test_conversion(self):
|
||||||
'''Function
|
"""
|
||||||
without sending to server'''
|
Function
|
||||||
|
- without sending to server
|
||||||
|
"""
|
||||||
self.logger_object.log('Beginning of the test.')
|
self.logger_object.log('Beginning of the test.')
|
||||||
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
folder_path = os.path.join(folder_path, f'{self.book_type}')
|
folder_path = os.path.join(folder_path, f'{self.book_type}')
|
||||||
@@ -121,9 +123,11 @@ class BookSolver:
|
|||||||
self.logger_object.log('End of the test.')
|
self.logger_object.log('End of the test.')
|
||||||
|
|
||||||
def conversion(self):
|
def conversion(self):
|
||||||
'''Function
|
"""
|
||||||
with downloading book from server
|
Function
|
||||||
with sending to server'''
|
- with downloading book from server
|
||||||
|
- with sending to server
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
self.logger_object.log(f'Beginning of conversion from .{self.book_type} to .json.')
|
self.logger_object.log(f'Beginning of conversion from .{self.book_type} to .json.')
|
||||||
self.get_book_file()
|
self.get_book_file()
|
||||||
@@ -140,9 +144,11 @@ class BookSolver:
|
|||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
def conversion_local(self):
|
def conversion_local(self):
|
||||||
'''Function
|
"""
|
||||||
without downloading book from server (local)
|
Function
|
||||||
with sending to server'''
|
- without downloading book from server (local)
|
||||||
|
- with sending to server
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
self.logger_object.log(f'Data has been downloaded from tmp.json file: {self.file_path}')
|
self.logger_object.log(f'Data has been downloaded from tmp.json file: {self.file_path}')
|
||||||
with codecs.open('json/tmp.json', 'r', encoding='utf-8') as f_json:
|
with codecs.open('json/tmp.json', 'r', encoding='utf-8') as f_json:
|
||||||
|
|||||||
@@ -2,21 +2,22 @@ import re
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
from ebooklib.epub import Section, Link
|
from ebooklib.epub import Section, Link
|
||||||
|
|
||||||
from src.livecarta_config import LiveCartaConfig
|
from src.livecarta_config import LiveCartaConfig
|
||||||
|
|
||||||
"""
|
|
||||||
These are data structures which form mapping from NCX to python data structures.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class NavPoint:
|
class NavPoint:
|
||||||
|
"""
|
||||||
|
Class - Navigation Point, - every html|xhtml from epub
|
||||||
|
These are data structures which form mapping from NCX to python data structures.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, obj: Union[Link, Section] = None, ):
|
def __init__(self, obj: Union[Link, Section] = None, ):
|
||||||
self.href, self.id = self.parse_href_id(obj)
|
self.href, self.id = self.parse_href_id(obj)
|
||||||
self.title = obj.title
|
self.title = obj.title
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse_href_id(item: Union[Link, Section]):
|
def parse_href_id(item: Union[Link, Section]):
|
||||||
|
"""Function parses href & id from item.href"""
|
||||||
reg = r'(.+\..+\#)(.+)'
|
reg = r'(.+\..+\#)(.+)'
|
||||||
match = re.search(reg, item.href)
|
match = re.search(reg, item.href)
|
||||||
href, div_id = None, None
|
href, div_id = None, None
|
||||||
@@ -36,13 +37,8 @@ class NavPoint:
|
|||||||
return '<NavPoint: %s, %s>' % (self.href, self.id)
|
return '<NavPoint: %s, %s>' % (self.href, self.id)
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
These are data structures which form mapping to livecarta json structure.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def flatten(x):
|
def flatten(x):
|
||||||
""" magic function from stackoverflow for list flattening """
|
"""magic function from stackoverflow for list flattening"""
|
||||||
atom = lambda i: not isinstance(i, list)
|
atom = lambda i: not isinstance(i, list)
|
||||||
nil = lambda i: not i
|
nil = lambda i: not i
|
||||||
car = lambda i: i[0]
|
car = lambda i: i[0]
|
||||||
@@ -54,12 +50,18 @@ def flatten(x):
|
|||||||
|
|
||||||
|
|
||||||
class ChapterItem:
|
class ChapterItem:
|
||||||
|
"""
|
||||||
|
Class of Chapter that could have subchapters
|
||||||
|
These are data structures which form mapping to livecarta json structure.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, title, content, sub_items):
|
def __init__(self, title, content, sub_items):
|
||||||
self.title = title
|
self.title = title
|
||||||
self.content = content
|
self.content = content
|
||||||
self.sub_items = sub_items
|
self.sub_items = sub_items
|
||||||
|
|
||||||
def to_dict(self, lvl=1):
|
def to_dict(self, lvl=1):
|
||||||
|
"""Function returns dictionary of chapter"""
|
||||||
sub_dicts = []
|
sub_dicts = []
|
||||||
if self.sub_items:
|
if self.sub_items:
|
||||||
for i in self.sub_items:
|
for i in self.sub_items:
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from src.book_solver import BookSolver
|
|||||||
|
|
||||||
|
|
||||||
class DocxBook(BookSolver):
|
class DocxBook(BookSolver):
|
||||||
|
"""Class of .docx type book - child of BookSolver"""
|
||||||
|
|
||||||
def __init__(self, book_id=0, access=None, html_path=None,
|
def __init__(self, book_id=0, access=None, html_path=None,
|
||||||
main_logger=None, libra_locker=None):
|
main_logger=None, libra_locker=None):
|
||||||
@@ -30,9 +31,7 @@ class DocxBook(BookSolver):
|
|||||||
self.logger_object.log(f'Any error while libra conversion for book_{self.book_id}: {result.stderr}', logging.DEBUG)
|
self.logger_object.log(f'Any error while libra conversion for book_{self.book_id}: {result.stderr}', logging.DEBUG)
|
||||||
|
|
||||||
def convert_doc_to_html(self):
|
def convert_doc_to_html(self):
|
||||||
"""
|
"""Method for convert .docx document to .html file."""
|
||||||
Method for convert .docx document to .html file.
|
|
||||||
"""
|
|
||||||
self.logger_object.log(f'File - {self.file_path}.')
|
self.logger_object.log(f'File - {self.file_path}.')
|
||||||
print(f'{self.file_path}')
|
print(f'{self.file_path}')
|
||||||
self.logger_object.log('Beginning of conversion from .docx to .html.')
|
self.logger_object.log('Beginning of conversion from .docx to .html.')
|
||||||
@@ -92,9 +91,7 @@ class DocxBook(BookSolver):
|
|||||||
self.logger_object.log(f'Input file path after conversion: {self.html_path}.')
|
self.logger_object.log(f'Input file path after conversion: {self.html_path}.')
|
||||||
|
|
||||||
def read_html(self):
|
def read_html(self):
|
||||||
"""
|
"""Method for reading .html file into beautiful soup tag."""
|
||||||
Method for reading .html file into beautiful soup tag.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
html_text = open(self.html_path, 'r', encoding='utf8').read()
|
html_text = open(self.html_path, 'r', encoding='utf8').read()
|
||||||
self.logger_object.log('HTML for book has been loaded.')
|
self.logger_object.log('HTML for book has been loaded.')
|
||||||
@@ -130,7 +127,6 @@ class DocxBook(BookSolver):
|
|||||||
1. Convert docx to html with libra office
|
1. Convert docx to html with libra office
|
||||||
2. Parse and clean html, get list of tags, get footnotes
|
2. Parse and clean html, get list of tags, get footnotes
|
||||||
3. Parse from line structure to nested structure with JSONConverter
|
3. Parse from line structure to nested structure with JSONConverter
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self.convert_doc_to_html()
|
self.convert_doc_to_html()
|
||||||
self.check_output_directory()
|
self.check_output_directory()
|
||||||
|
|||||||
@@ -35,9 +35,7 @@ class HTMLDocxPreprocessor:
|
|||||||
tag.unwrap()
|
tag.unwrap()
|
||||||
|
|
||||||
def _clean_underline_links(self):
|
def _clean_underline_links(self):
|
||||||
"""
|
"""Function cleans meaningless <u> tags before links."""
|
||||||
Function cleans meaningless <u> tags before links.
|
|
||||||
"""
|
|
||||||
underlines = self.body_tag.find_all("u")
|
underlines = self.body_tag.find_all("u")
|
||||||
for u in underlines:
|
for u in underlines:
|
||||||
if u.find_all('a'):
|
if u.find_all('a'):
|
||||||
@@ -79,9 +77,7 @@ class HTMLDocxPreprocessor:
|
|||||||
return re.sub(size + "pt", str(new_size) + "px", style)
|
return re.sub(size + "pt", str(new_size) + "px", style)
|
||||||
|
|
||||||
def _font_to_span(self):
|
def _font_to_span(self):
|
||||||
"""
|
"""Function to convert <font> tag to <span>. If font style is default, then remove this tag."""
|
||||||
Function to convert <font> tag to <span>. If font style is default, then remove this tag.
|
|
||||||
"""
|
|
||||||
fonts = self.body_tag.find_all("font")
|
fonts = self.body_tag.find_all("font")
|
||||||
for font in fonts:
|
for font in fonts:
|
||||||
face = font.get("face")
|
face = font.get("face")
|
||||||
@@ -119,9 +115,7 @@ class HTMLDocxPreprocessor:
|
|||||||
self.content = self.content[ind:]
|
self.content = self.content[ind:]
|
||||||
|
|
||||||
def clean_trash(self):
|
def clean_trash(self):
|
||||||
"""
|
"""Function to remove all styles and tags we don't need."""
|
||||||
Function to remove all styles and tags we don't need.
|
|
||||||
"""
|
|
||||||
self._clean_tag('span', 'style', re.compile(r'^background: #[0-9a-fA-F]{6}$'))
|
self._clean_tag('span', 'style', re.compile(r'^background: #[0-9a-fA-F]{6}$'))
|
||||||
self._clean_tag('span', 'lang', re.compile(r'^ru-RU$')) # todo: check for another languages
|
self._clean_tag('span', 'lang', re.compile(r'^ru-RU$')) # todo: check for another languages
|
||||||
self._clean_tag('span', 'style', re.compile('^letter-spacing: -?[\d\.]+pt$'))
|
self._clean_tag('span', 'style', re.compile('^letter-spacing: -?[\d\.]+pt$'))
|
||||||
@@ -140,9 +134,7 @@ class HTMLDocxPreprocessor:
|
|||||||
table.decompose()
|
table.decompose()
|
||||||
|
|
||||||
def _process_paragraph(self):
|
def _process_paragraph(self):
|
||||||
"""
|
"""Function to process <p> tags (text-align and text-indent value)."""
|
||||||
Function to process <p> tags (text-align and text-indent value).
|
|
||||||
"""
|
|
||||||
paragraphs = self.body_tag.find_all('p')
|
paragraphs = self.body_tag.find_all('p')
|
||||||
|
|
||||||
for p in paragraphs:
|
for p in paragraphs:
|
||||||
@@ -193,9 +185,7 @@ class HTMLDocxPreprocessor:
|
|||||||
p.attrs['style'] = style
|
p.attrs['style'] = style
|
||||||
|
|
||||||
def _process_two_columns(self):
|
def _process_two_columns(self):
|
||||||
"""
|
"""Function to process paragraphs which has two columns layout."""
|
||||||
Function to process paragraphs which has two columns layout.
|
|
||||||
"""
|
|
||||||
two_columns = self.body_tag.find_all("div", style="column-count: 2")
|
two_columns = self.body_tag.find_all("div", style="column-count: 2")
|
||||||
for div in two_columns:
|
for div in two_columns:
|
||||||
for child in div.children:
|
for child in div.children:
|
||||||
@@ -204,9 +194,7 @@ class HTMLDocxPreprocessor:
|
|||||||
div.unwrap()
|
div.unwrap()
|
||||||
|
|
||||||
def _process_tables(self):
|
def _process_tables(self):
|
||||||
"""
|
"""Function to process tables. Set "border" attribute."""
|
||||||
Function to process tables. Set "border" attribute.
|
|
||||||
"""
|
|
||||||
tables = self.body_tag.find_all("table")
|
tables = self.body_tag.find_all("table")
|
||||||
for table in tables:
|
for table in tables:
|
||||||
tds = table.find_all("td")
|
tds = table.find_all("td")
|
||||||
@@ -296,9 +284,7 @@ class HTMLDocxPreprocessor:
|
|||||||
return content.strip()
|
return content.strip()
|
||||||
|
|
||||||
def _process_footnotes(self):
|
def _process_footnotes(self):
|
||||||
"""
|
"""Function returns list of footnotes and delete them from html_soup."""
|
||||||
Function returns list of footnotes and delete them from html_soup.
|
|
||||||
"""
|
|
||||||
footnote_anchors = self.body_tag.find_all('a', class_='sdfootnoteanc')
|
footnote_anchors = self.body_tag.find_all('a', class_='sdfootnoteanc')
|
||||||
footnote_content = self.body_tag.find_all('div', id=re.compile(r'^sdfootnote\d+$'))
|
footnote_content = self.body_tag.find_all('div', id=re.compile(r'^sdfootnote\d+$'))
|
||||||
footnote_amt = len(footnote_anchors)
|
footnote_amt = len(footnote_anchors)
|
||||||
@@ -404,9 +390,7 @@ class HTMLDocxPreprocessor:
|
|||||||
div.decompose()
|
div.decompose()
|
||||||
|
|
||||||
def _process_div(self):
|
def _process_div(self):
|
||||||
"""
|
"""Function to process <div> tags. All the tags will be deleted from file, all content of the tags will stay."""
|
||||||
Function to process <div> tags. All the tags will be deleted from file, all content of the tags will stay.
|
|
||||||
"""
|
|
||||||
divs = self.body_tag.find_all("div")
|
divs = self.body_tag.find_all("div")
|
||||||
|
|
||||||
for div in divs:
|
for div in divs:
|
||||||
@@ -423,9 +407,7 @@ class HTMLDocxPreprocessor:
|
|||||||
return len(toc_links) > 0
|
return len(toc_links) > 0
|
||||||
|
|
||||||
def _process_toc_links(self):
|
def _process_toc_links(self):
|
||||||
"""
|
"""Function to extract nodes which contains TOC links, remove links from file and detect headers."""
|
||||||
Function to extract nodes which contains TOC links, remove links from file and detect headers.
|
|
||||||
"""
|
|
||||||
toc_links = self.body_tag.find_all("a", {'name': re.compile(r'^_Toc\d+')})
|
toc_links = self.body_tag.find_all("a", {'name': re.compile(r'^_Toc\d+')})
|
||||||
headers = [link.parent for link in toc_links]
|
headers = [link.parent for link in toc_links]
|
||||||
outline_level = "1" # All the unknown outlines will be predicted as <h1>
|
outline_level = "1" # All the unknown outlines will be predicted as <h1>
|
||||||
@@ -448,13 +430,11 @@ class HTMLDocxPreprocessor:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def clean_title_from_numbering(title: str):
|
def clean_title_from_numbering(title: str):
|
||||||
"""
|
"""Function to remove digits from headers."""
|
||||||
Function to remove digits from headers.
|
|
||||||
"""
|
|
||||||
title = re.sub(r'^(\s+)+', '', title)
|
title = re.sub(r'^(\s+)+', '', title)
|
||||||
title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title)
|
title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title)
|
||||||
# title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title) # delete chapter numbering from the title
|
# title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title) # delete chapter numbering from the title
|
||||||
title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title)
|
title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title) # delete chapter I, (ABC) from the title
|
||||||
return title
|
return title
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -485,9 +465,7 @@ class HTMLDocxPreprocessor:
|
|||||||
self.apply_func_to_last_child(children[0], func)
|
self.apply_func_to_last_child(children[0], func)
|
||||||
|
|
||||||
def _preprocessing_headings(self):
|
def _preprocessing_headings(self):
|
||||||
"""
|
"""Function to convert all lower level headings to p tags"""
|
||||||
Function to convert all lower level headings to p tags
|
|
||||||
"""
|
|
||||||
pattern = f'^h[{LiveCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
|
pattern = f'^h[{LiveCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
|
||||||
header_tags = self.body_tag.find_all(re.compile(pattern))
|
header_tags = self.body_tag.find_all(re.compile(pattern))
|
||||||
for tag in header_tags:
|
for tag in header_tags:
|
||||||
@@ -561,9 +539,7 @@ class HTMLDocxPreprocessor:
|
|||||||
self.top_level_headers[i]['should_be_numbered'] = True
|
self.top_level_headers[i]['should_be_numbered'] = True
|
||||||
|
|
||||||
def _process_headings(self):
|
def _process_headings(self):
|
||||||
"""
|
"""Function to process tags <h>."""
|
||||||
Function to process tags <h>.
|
|
||||||
"""
|
|
||||||
header_tags = self.body_tag.find_all(re.compile("^h[1-9]$"))
|
header_tags = self.body_tag.find_all(re.compile("^h[1-9]$"))
|
||||||
|
|
||||||
# 1. remove <b>, <span>
|
# 1. remove <b>, <span>
|
||||||
@@ -634,9 +610,7 @@ class HTMLDocxPreprocessor:
|
|||||||
il_tag.p.unwrap()
|
il_tag.p.unwrap()
|
||||||
|
|
||||||
def process_html(self, access, html_path, book_id):
|
def process_html(self, access, html_path, book_id):
|
||||||
"""
|
"""Process html code to satisfy LiveCarta formatting."""
|
||||||
Process html code to satisfy LiveCarta formatting.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
self.logger_object.log(f'Processing TOC and headers.')
|
self.logger_object.log(f'Processing TOC and headers.')
|
||||||
self._process_toc_links()
|
self._process_toc_links()
|
||||||
|
|||||||
@@ -90,9 +90,7 @@ class LibraHTML2JSONConverter:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
def convert_to_dict(self):
|
def convert_to_dict(self):
|
||||||
"""
|
"""Function which convert list of html nodes to appropriate json structure."""
|
||||||
Function which convert list of html nodes to appropriate json structure.
|
|
||||||
"""
|
|
||||||
json_strc = []
|
json_strc = []
|
||||||
ind = 0
|
ind = 0
|
||||||
ch_num = 0
|
ch_num = 0
|
||||||
|
|||||||
@@ -11,9 +11,9 @@ from itertools import takewhile
|
|||||||
from src.util.color_reader import str2hex
|
from src.util.color_reader import str2hex
|
||||||
from src.livecarta_config import LiveCartaConfig
|
from src.livecarta_config import LiveCartaConfig
|
||||||
|
|
||||||
|
|
||||||
cssutils.log.setLevel(CRITICAL)
|
cssutils.log.setLevel(CRITICAL)
|
||||||
|
|
||||||
|
|
||||||
sizes_pr = [-1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56,
|
sizes_pr = [-1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56,
|
||||||
1.63, 1.69, 1.75, 1.81, 1.88, 1.94, 2.0, 2.06, 2.13, 2.19, 2.25, 2.31, 2.38, 2.44, 2.5, 2.56, 2.63, 2.69,
|
1.63, 1.69, 1.75, 1.81, 1.88, 1.94, 2.0, 2.06, 2.13, 2.19, 2.25, 2.31, 2.38, 2.44, 2.5, 2.56, 2.63, 2.69,
|
||||||
2.75, 2.81, 2.88, 2.94, 3.0, 4.0, 5.0]
|
2.75, 2.81, 2.88, 2.94, 3.0, 4.0, 5.0]
|
||||||
@@ -29,6 +29,7 @@ list_types = ['circle', 'disc', 'armenian', 'decimal',
|
|||||||
|
|
||||||
|
|
||||||
def convert_font_size(value):
|
def convert_font_size(value):
|
||||||
|
""" Function converts font-size in mapping """
|
||||||
if 'pt' in value:
|
if 'pt' in value:
|
||||||
if int(value.replace('pt', '')) == LiveCartaConfig.LIVECARTA_DEFAULT_FONT_SIZE:
|
if int(value.replace('pt', '')) == LiveCartaConfig.LIVECARTA_DEFAULT_FONT_SIZE:
|
||||||
return ''
|
return ''
|
||||||
@@ -58,6 +59,7 @@ def convert_font_size(value):
|
|||||||
|
|
||||||
|
|
||||||
def convert_indents(value):
|
def convert_indents(value):
|
||||||
|
""" Function converts text-indent and margin-left values to px """
|
||||||
# 30px = 3.2% = 1.25em = 23pt
|
# 30px = 3.2% = 1.25em = 23pt
|
||||||
text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(-*\w+pt)')
|
text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(-*\w+pt)')
|
||||||
has_style_attrs = re.search(text_indent_regexp, value)
|
has_style_attrs = re.search(text_indent_regexp, value)
|
||||||
@@ -115,13 +117,6 @@ LIVECARTA_STYLE_ATTRS = {
|
|||||||
'margin-left': []
|
'margin-left': []
|
||||||
}
|
}
|
||||||
|
|
||||||
"""
|
|
||||||
LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
|
|
||||||
|
|
||||||
Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated
|
|
||||||
to suit livecarta style convention.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def get_bg_color(x):
|
def get_bg_color(x):
|
||||||
color = str2hex(x)
|
color = str2hex(x)
|
||||||
@@ -135,6 +130,12 @@ def get_text_color(x):
|
|||||||
return color
|
return color
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
|
||||||
|
|
||||||
|
Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated
|
||||||
|
to suit livecarta style convention.
|
||||||
|
"""
|
||||||
LIVECARTA_STYLE_ATTRS_MAPPING = {
|
LIVECARTA_STYLE_ATTRS_MAPPING = {
|
||||||
'text-indent': convert_indents,
|
'text-indent': convert_indents,
|
||||||
'font-variant': lambda x: x,
|
'font-variant': lambda x: x,
|
||||||
@@ -178,8 +179,10 @@ LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {
|
|||||||
|
|
||||||
|
|
||||||
def check_style_to_be_tag(style) -> List[tuple]:
|
def check_style_to_be_tag(style) -> List[tuple]:
|
||||||
""" Some css style properties converts to tags.
|
"""
|
||||||
Search for them and prepare list of properties to be removed from style string"""
|
Some css style properties converts to tags.
|
||||||
|
Search for them and prepare list of properties to be removed from style string
|
||||||
|
"""
|
||||||
to_remove = []
|
to_remove = []
|
||||||
for k in LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG:
|
for k in LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG:
|
||||||
if f'{k[0]}:{k[1]}' in style:
|
if f'{k[0]}:{k[1]}' in style:
|
||||||
@@ -208,6 +211,7 @@ def update_css_style_types_to_livecarta_convention(css_rule, style_type):
|
|||||||
|
|
||||||
|
|
||||||
def build_css_content(css_content):
|
def build_css_content(css_content):
|
||||||
|
""" Build css content with livecarta convention """
|
||||||
sheet = cssutils.parseString(css_content, validate=False)
|
sheet = cssutils.parseString(css_content, validate=False)
|
||||||
|
|
||||||
for css_rule in sheet:
|
for css_rule in sheet:
|
||||||
@@ -231,6 +235,7 @@ class TagStyleConverter:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def remove_white_if_no_bgcolor(style_, tag):
|
def remove_white_if_no_bgcolor(style_, tag):
|
||||||
|
""" Function remove white color if there is no text bg color """
|
||||||
if 'background' in style_:
|
if 'background' in style_:
|
||||||
return style_
|
return style_
|
||||||
|
|
||||||
@@ -260,8 +265,7 @@ class TagStyleConverter:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def process_indents_to_px(split_style: list) -> str:
|
def process_indents_to_px(split_style: list) -> str:
|
||||||
# clean with convert_indents() style string and make new clean_style
|
""" Function cleans using convert_indents() style string and returns new clean_style """
|
||||||
|
|
||||||
clean_style = ''
|
clean_style = ''
|
||||||
for item in split_style:
|
for item in split_style:
|
||||||
item = item.split(':')
|
item = item.split(':')
|
||||||
@@ -276,7 +280,7 @@ class TagStyleConverter:
|
|||||||
|
|
||||||
has_margin_left = re.search(margin_left_regexp, clean_style)
|
has_margin_left = re.search(margin_left_regexp, clean_style)
|
||||||
has_text_indent = re.search(text_indent_regexp, clean_style)
|
has_text_indent = re.search(text_indent_regexp, clean_style)
|
||||||
#formula_of_indent: indent = abs(margin_left - text_indent)
|
# formula_of_indent: indent = abs(margin_left - text_indent)
|
||||||
if has_margin_left:
|
if has_margin_left:
|
||||||
num_ml = abs(int("".join(
|
num_ml = abs(int("".join(
|
||||||
filter(str.isdigit, str(has_margin_left.group(2))))))
|
filter(str.isdigit, str(has_margin_left.group(2))))))
|
||||||
@@ -302,6 +306,7 @@ class TagStyleConverter:
|
|||||||
|
|
||||||
def preprocess_style(self):
|
def preprocess_style(self):
|
||||||
def remove_extra_spaces(style: str) -> List:
|
def remove_extra_spaces(style: str) -> List:
|
||||||
|
""" Function to remove extra spaces in style to process clean_style """
|
||||||
# replace all spaces between '; & letter' to ';'
|
# replace all spaces between '; & letter' to ';'
|
||||||
style = re.sub(r"; *", ";", style)
|
style = re.sub(r"; *", ";", style)
|
||||||
split_style = style.split(';')
|
split_style = style.split(';')
|
||||||
@@ -381,7 +386,7 @@ class TagStyleConverter:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def wrap_span_in_p_to_save_style_attrs(tag):
|
def wrap_span_in_p_to_save_style_attrs(tag):
|
||||||
'''Function designed to save style attrs that cannot be in p -> span'''
|
""" Function designed to save style attrs that cannot be in p -> span """
|
||||||
if tag.name == 'p' and tag.attrs.get('style'):
|
if tag.name == 'p' and tag.attrs.get('style'):
|
||||||
styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
|
styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
|
||||||
if attr not in ['text-align', 'text-indent', 'border-bottom', 'border-top']]
|
if attr not in ['text-align', 'text-indent', 'border-bottom', 'border-top']]
|
||||||
@@ -414,6 +419,7 @@ class TagStyleConverter:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def wrap_span_in_li_to_save_style_attrs(tag):
|
def wrap_span_in_li_to_save_style_attrs(tag):
|
||||||
|
""" Function designed to save style attrs that cannot be in li -> span """
|
||||||
if tag.name == 'li' and tag.attrs.get('style'):
|
if tag.name == 'li' and tag.attrs.get('style'):
|
||||||
styles_cant_be_in_li = [attr for attr in LIVECARTA_STYLE_ATTRS if
|
styles_cant_be_in_li = [attr for attr in LIVECARTA_STYLE_ATTRS if
|
||||||
attr not in ['text-align', 'list-style-type']]
|
attr not in ['text-align', 'list-style-type']]
|
||||||
@@ -441,6 +447,7 @@ class TagStyleConverter:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def wrap_span_in_ul_ol_to_save_style_attrs(tag):
|
def wrap_span_in_ul_ol_to_save_style_attrs(tag):
|
||||||
|
""" Function designed to save style attrs that cannot be in ul/ol -> span """
|
||||||
if tag.name in ['ul', 'ol'] and tag.attrs.get('style'):
|
if tag.name in ['ul', 'ol'] and tag.attrs.get('style'):
|
||||||
styles_cant_be_in_ul_ol = [
|
styles_cant_be_in_ul_ol = [
|
||||||
attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['list-style-type']]
|
attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['list-style-type']]
|
||||||
@@ -465,6 +472,7 @@ class TagStyleConverter:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def wrap_span_in_h_to_save_style_attrs(tag):
|
def wrap_span_in_h_to_save_style_attrs(tag):
|
||||||
|
""" Function designed to save style attrs that cannot be in h -> span """
|
||||||
h_regexp = re.compile('(^h[1-9]$)')
|
h_regexp = re.compile('(^h[1-9]$)')
|
||||||
|
|
||||||
if re.search(h_regexp, tag.name) and tag.attrs.get('style'):
|
if re.search(h_regexp, tag.name) and tag.attrs.get('style'):
|
||||||
@@ -487,6 +495,7 @@ class TagStyleConverter:
|
|||||||
|
|
||||||
|
|
||||||
def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
|
def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
|
||||||
|
""" Function adds styles from .css to inline style """
|
||||||
css_text = css_text.replace(
|
css_text = css_text.replace(
|
||||||
'@namespace epub "http://www.idpf.org/2007/ops";', '')
|
'@namespace epub "http://www.idpf.org/2007/ops";', '')
|
||||||
livecarta_tmp_ids = []
|
livecarta_tmp_ids = []
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ from src.livecarta_config import LiveCartaConfig
|
|||||||
from src.data_objects import ChapterItem, NavPoint
|
from src.data_objects import ChapterItem, NavPoint
|
||||||
from src.epub_converter.css_reader import build_css_content, convert_html_soup_with_css_style
|
from src.epub_converter.css_reader import build_css_content, convert_html_soup_with_css_style
|
||||||
from src.epub_converter.html_epub_preprocessor import unwrap_structural_tags, get_tags_between_chapter_marks, prepare_title, prepare_content, \
|
from src.epub_converter.html_epub_preprocessor import unwrap_structural_tags, get_tags_between_chapter_marks, prepare_title, prepare_content, \
|
||||||
update_src_links_in_images, preprocess_footnotes
|
update_images_src_links, preprocess_footnotes
|
||||||
|
|
||||||
|
|
||||||
class EpubConverter:
|
class EpubConverter:
|
||||||
@@ -48,7 +48,7 @@ class EpubConverter:
|
|||||||
# flag to be updated while ebooklib.toc is parsed
|
# flag to be updated while ebooklib.toc is parsed
|
||||||
self.id_anchor_exist_in_nav_points = False
|
self.id_anchor_exist_in_nav_points = False
|
||||||
self.img_href2img_bytes = {} # file path to bytes
|
self.img_href2img_bytes = {} # file path to bytes
|
||||||
self.old_image_path2aws_path = {} # file path from <a> to generated aws path
|
self.book_image_src_path2aws_path = {} # file path from <a> to generated aws path
|
||||||
self.footnotes_contents: List[str] = [] # to be sent on server as is
|
self.footnotes_contents: List[str] = [] # to be sent on server as is
|
||||||
self.noterefs: List[Tag] = [] # start of the footnote
|
self.noterefs: List[Tag] = [] # start of the footnote
|
||||||
self.footnotes: List[Tag] = [] # end of the footnote
|
self.footnotes: List[Tag] = [] # end of the footnote
|
||||||
@@ -124,12 +124,12 @@ class EpubConverter:
|
|||||||
return css_content
|
return css_content
|
||||||
|
|
||||||
def build_html_and_css_relations(self):
|
def build_html_and_css_relations(self):
|
||||||
'''
|
"""
|
||||||
This function is designed to get 2 dictionaries:
|
This function is designed to get 2 dictionaries:
|
||||||
The first is css_href2css_content. It is created to connect href of css to content of css
|
The first is css_href2css_content. It is created to connect href of css to content of css
|
||||||
The second is html_href2css_href. It is created to connect href of html to css files(hrefs of them) which are used on this html
|
The second is html_href2css_href. It is created to connect href of html to css files(hrefs of them) which are used on this html
|
||||||
...2... = key2value
|
...2... = key2value
|
||||||
'''
|
"""
|
||||||
|
|
||||||
# dictionary: href of html to related css files
|
# dictionary: href of html to related css files
|
||||||
html_href2css_href: defaultdict = defaultdict(list)
|
html_href2css_href: defaultdict = defaultdict(list)
|
||||||
@@ -159,10 +159,10 @@ class EpubConverter:
|
|||||||
return html_href2css_href, css_href2css_content,
|
return html_href2css_href, css_href2css_content,
|
||||||
|
|
||||||
def add_css_styles_to_html_soup(self):
|
def add_css_styles_to_html_soup(self):
|
||||||
'''
|
"""
|
||||||
This function is designed to update html_href2html_body_soup
|
This function is designed to update html_href2html_body_soup
|
||||||
And add to html_inline_style css_style_content
|
And add to html_inline_style css_style_content
|
||||||
'''
|
"""
|
||||||
for html_href in self.html_href2html_body_soup:
|
for html_href in self.html_href2html_body_soup:
|
||||||
if self.html_href2css_href.get(html_href):
|
if self.html_href2css_href.get(html_href):
|
||||||
css = ''
|
css = ''
|
||||||
@@ -179,6 +179,7 @@ class EpubConverter:
|
|||||||
|
|
||||||
return links
|
return links
|
||||||
|
|
||||||
|
# t_nodes = []
|
||||||
def build_adjacency_list_from_toc(self, element, lvl=0):
|
def build_adjacency_list_from_toc(self, element, lvl=0):
|
||||||
"""
|
"""
|
||||||
self.adjacency_list builds based on TOC nested structure, got from self.ebooklib.toc
|
self.adjacency_list builds based on TOC nested structure, got from self.ebooklib.toc
|
||||||
@@ -211,25 +212,31 @@ class EpubConverter:
|
|||||||
|
|
||||||
sub_nodes = []
|
sub_nodes = []
|
||||||
for i in second:
|
for i in second:
|
||||||
|
# if 'chapter' in (i.title.lower() if isinstance(i, Link) else i[0].title.lower()):
|
||||||
|
# self.t_nodes.append(self.build_adjacency_list_from_toc(i, lvl))
|
||||||
|
# else:
|
||||||
sub_nodes.append(
|
sub_nodes.append(
|
||||||
self.build_adjacency_list_from_toc(i, lvl + 1))
|
self.build_adjacency_list_from_toc(i, lvl + 1))
|
||||||
|
|
||||||
self.adjacency_list[nav_point] = sub_nodes
|
self.adjacency_list[nav_point] = sub_nodes
|
||||||
self.hrefs_added_to_toc.add(nav_point.href)
|
self.hrefs_added_to_toc.add(nav_point.href)
|
||||||
return nav_point
|
return nav_point
|
||||||
|
|
||||||
elif isinstance(element, list) and (lvl == 0):
|
elif isinstance(element, list) and (lvl == 0):
|
||||||
sub_nodes = []
|
nodes = []
|
||||||
for i in element:
|
for i in element:
|
||||||
sub_nodes.append(
|
nodes.append(
|
||||||
self.build_adjacency_list_from_toc(i, lvl + 1))
|
self.build_adjacency_list_from_toc(i, lvl + 1))
|
||||||
|
# for j in self.t_nodes:
|
||||||
self.adjacency_list[-1] = sub_nodes
|
# nodes.append(j)
|
||||||
|
# self.t_nodes = []
|
||||||
|
#
|
||||||
|
# self.adjacency_list[-1] = nodes
|
||||||
|
|
||||||
else:
|
else:
|
||||||
assert 0, f'Error. Element is not tuple/Link/list instance: {type(element)}'
|
assert 0, f'Error. Element is not tuple/Link/list instance: {type(element)}'
|
||||||
|
|
||||||
def is_toc_empty(self):
|
def is_toc_empty(self):
|
||||||
|
""" Function checks is toc empty """
|
||||||
# there is no toc in ebook or no top chapters
|
# there is no toc in ebook or no top chapters
|
||||||
if (self.ebooklib_book.toc is None) or (self.adjacency_list.get(-1) is None):
|
if (self.ebooklib_book.toc is None) or (self.adjacency_list.get(-1) is None):
|
||||||
return True
|
return True
|
||||||
@@ -247,6 +254,7 @@ class EpubConverter:
|
|||||||
self.hrefs_added_to_toc.add(nav_point.href)
|
self.hrefs_added_to_toc.add(nav_point.href)
|
||||||
|
|
||||||
def add_not_added_files_to_adjacency_list(self, not_added):
|
def add_not_added_files_to_adjacency_list(self, not_added):
|
||||||
|
""" Function add files that not added to adjacency list """
|
||||||
for i, file in enumerate(not_added):
|
for i, file in enumerate(not_added):
|
||||||
nav_point = NavPoint(
|
nav_point = NavPoint(
|
||||||
Section(f'To check #{i}, filename: {file}', file))
|
Section(f'To check #{i}, filename: {file}', file))
|
||||||
@@ -315,6 +323,11 @@ class EpubConverter:
|
|||||||
return full_path[0]
|
return full_path[0]
|
||||||
|
|
||||||
def process_internal_links(self):
|
def process_internal_links(self):
|
||||||
|
"""
|
||||||
|
Function
|
||||||
|
- processing internal links in a book
|
||||||
|
- make ids unique
|
||||||
|
"""
|
||||||
# 1. rebuild ids to be unique in all documents
|
# 1. rebuild ids to be unique in all documents
|
||||||
for toc_href in self.hrefs_added_to_toc:
|
for toc_href in self.hrefs_added_to_toc:
|
||||||
for tag in self.html_href2html_body_soup[toc_href].find_all(attrs={'id': re.compile(r'.+')}):
|
for tag in self.html_href2html_body_soup[toc_href].find_all(attrs={'id': re.compile(r'.+')}):
|
||||||
@@ -429,6 +442,7 @@ class EpubConverter:
|
|||||||
self.build_one_chapter(sub_node)
|
self.build_one_chapter(sub_node)
|
||||||
|
|
||||||
def define_chapters_content(self):
|
def define_chapters_content(self):
|
||||||
|
""" Function build chapters content starts from top level chapters """
|
||||||
top_level_nav_points = self.adjacency_list[-1]
|
top_level_nav_points = self.adjacency_list[-1]
|
||||||
if self.id_anchor_exist_in_nav_points:
|
if self.id_anchor_exist_in_nav_points:
|
||||||
for point in top_level_nav_points:
|
for point in top_level_nav_points:
|
||||||
@@ -441,12 +455,12 @@ class EpubConverter:
|
|||||||
nav_point.href, nav_point.id)]
|
nav_point.href, nav_point.id)]
|
||||||
else:
|
else:
|
||||||
content: BeautifulSoup = self.html_href2html_body_soup[nav_point.href]
|
content: BeautifulSoup = self.html_href2html_body_soup[nav_point.href]
|
||||||
self.old_image_path2aws_path = update_src_links_in_images(content,
|
self.book_image_src_path2aws_path = update_images_src_links(content,
|
||||||
self.img_href2img_bytes,
|
self.img_href2img_bytes,
|
||||||
path_to_html=nav_point.href,
|
path_to_html=nav_point.href,
|
||||||
access=self.access,
|
access=self.access,
|
||||||
path2aws_path=self.old_image_path2aws_path,
|
path2aws_path=self.book_image_src_path2aws_path,
|
||||||
book_id=lambda x: self.file.stem if hasattr(self.file, self.file.stem) else 'book_id')
|
book_id=self.file.stem if hasattr(self.file, self.file.stem) else 'book_id')
|
||||||
|
|
||||||
is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
|
is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
|
||||||
title_preprocessed = prepare_title(title)
|
title_preprocessed = prepare_title(title)
|
||||||
@@ -466,6 +480,7 @@ class EpubConverter:
|
|||||||
return ChapterItem(title_preprocessed, content_preprocessed, sub_nodes)
|
return ChapterItem(title_preprocessed, content_preprocessed, sub_nodes)
|
||||||
|
|
||||||
def convert_to_dict(self):
|
def convert_to_dict(self):
|
||||||
|
""" Function which convert list of html nodes to appropriate json structure. """
|
||||||
top_level_nav_points = self.adjacency_list[-1]
|
top_level_nav_points = self.adjacency_list[-1]
|
||||||
top_level_chapters = []
|
top_level_chapters = []
|
||||||
|
|
||||||
@@ -491,7 +506,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
|
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
|
||||||
|
|
||||||
json_converter = EpubConverter('../../epub/9781641051217.epub',
|
json_converter = EpubConverter('../../epub/9781614382263.epub',
|
||||||
logger=logger_object)
|
logger=logger_object)
|
||||||
tmp = json_converter.convert_to_dict()
|
tmp = json_converter.convert_to_dict()
|
||||||
|
|
||||||
|
|||||||
@@ -2,12 +2,17 @@ from src.book_solver import BookSolver
|
|||||||
from src.epub_converter.epub_converter import EpubConverter
|
from src.epub_converter.epub_converter import EpubConverter
|
||||||
|
|
||||||
class EpubBook(BookSolver):
|
class EpubBook(BookSolver):
|
||||||
|
""" Class of .epub type book - child of BookSolver """
|
||||||
|
|
||||||
def __init__(self, book_id=0, access=None, main_logger=None):
|
def __init__(self, book_id=0, access=None, main_logger=None):
|
||||||
super().__init__(book_id, access, main_logger)
|
super().__init__(book_id, access, main_logger)
|
||||||
self.book_type = 'epub'
|
self.book_type = 'epub'
|
||||||
|
|
||||||
def get_converted_book(self):
|
def get_converted_book(self):
|
||||||
|
"""
|
||||||
|
1. Convert epub to html
|
||||||
|
2. Parse from line structure to nested structure
|
||||||
|
"""
|
||||||
json_converter = EpubConverter(self.file_path, access=self.access, logger=self.logger_object)
|
json_converter = EpubConverter(self.file_path, access=self.access, logger=self.logger_object)
|
||||||
content_dict = json_converter.convert_to_dict()
|
content_dict = json_converter.convert_to_dict()
|
||||||
self.status_wrapper.set_generating()
|
self.status_wrapper.set_generating()
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from src.livecarta_config import LiveCartaConfig
|
|||||||
|
|
||||||
|
|
||||||
def save_image_locally(img_file_path, img_content, book_id):
|
def save_image_locally(img_file_path, img_content, book_id):
|
||||||
|
""" Function saves all images locally """
|
||||||
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
new_path = pathlib.Path(os.path.join(
|
new_path = pathlib.Path(os.path.join(
|
||||||
folder_path, f'../json/img_{book_id}/'))
|
folder_path, f'../json/img_{book_id}/'))
|
||||||
@@ -24,17 +25,19 @@ def save_image_locally(img_file_path, img_content, book_id):
|
|||||||
|
|
||||||
|
|
||||||
def save_image_to_aws(access: Access, img_file_path, img_content: bytes, book_id):
|
def save_image_to_aws(access: Access, img_file_path, img_content: bytes, book_id):
|
||||||
link = access.send_image(
|
""" Function saves all images to Amazon web service """
|
||||||
|
link_path = access.send_image(
|
||||||
img_file_path, doc_id=book_id, img_content=img_content)
|
img_file_path, doc_id=book_id, img_content=img_content)
|
||||||
return link
|
return link_path
|
||||||
|
|
||||||
|
|
||||||
def update_src_links_in_images(body_tag: Tag,
|
def update_images_src_links(body_tag: Tag,
|
||||||
href2img_content: dict,
|
href2img_content: dict,
|
||||||
path_to_html,
|
path_to_html,
|
||||||
access=None,
|
access=None,
|
||||||
path2aws_path=None,
|
path2aws_path=None,
|
||||||
book_id=None):
|
book_id=None):
|
||||||
|
""" Function makes dictionary image_src_path -> Amazon web service_path """
|
||||||
img_tags = body_tag.find_all('img')
|
img_tags = body_tag.find_all('img')
|
||||||
|
|
||||||
for img in img_tags:
|
for img in img_tags:
|
||||||
@@ -65,16 +68,16 @@ def update_src_links_in_images(body_tag: Tag,
|
|||||||
del img.attrs['height']
|
del img.attrs['height']
|
||||||
if img.attrs.get('style'):
|
if img.attrs.get('style'):
|
||||||
del img.attrs['style']
|
del img.attrs['style']
|
||||||
|
|
||||||
return path2aws_path
|
return path2aws_path
|
||||||
|
|
||||||
|
|
||||||
def preprocess_table(body_tag: BeautifulSoup):
|
def preprocess_table(body_tag: BeautifulSoup):
|
||||||
|
""" Function to preprocess tables and tags(td|th|tr): style """
|
||||||
tables = body_tag.find_all("table")
|
tables = body_tag.find_all("table")
|
||||||
for table in tables:
|
for table in tables:
|
||||||
tds = table.find_all(re.compile("td|th|tr"))
|
ts = table.find_all(re.compile("td|th|tr"))
|
||||||
for td in tds:
|
for t_tag in ts:
|
||||||
style = td.get('style')
|
style = t_tag.get('style')
|
||||||
width = ''
|
width = ''
|
||||||
if style:
|
if style:
|
||||||
width_match = re.search(
|
width_match = re.search(
|
||||||
@@ -84,13 +87,13 @@ def preprocess_table(body_tag: BeautifulSoup):
|
|||||||
units = width_match.group(2)
|
units = width_match.group(2)
|
||||||
width = size+'px'
|
width = size+'px'
|
||||||
|
|
||||||
td.attrs['width'] = td.get('width') or width
|
t_tag.attrs['width'] = t_tag.get('width') or width
|
||||||
|
|
||||||
if td.attrs.get('style'):
|
if t_tag.attrs.get('style'):
|
||||||
td.attrs['style'] = td.attrs['style'].replace('border:0;', '')
|
t_tag.attrs['style'] = t_tag.attrs['style'].replace('border:0;', '')
|
||||||
|
|
||||||
if td.attrs.get('style') == '':
|
elif t_tag.attrs.get('style') == '':
|
||||||
del td.attrs['style']
|
del t_tag.attrs['style']
|
||||||
|
|
||||||
if not table.attrs.get('border') or table.attrs.get('border') in ['0', '0px']:
|
if not table.attrs.get('border') or table.attrs.get('border') in ['0', '0px']:
|
||||||
table.attrs['border'] = '1'
|
table.attrs['border'] = '1'
|
||||||
@@ -110,6 +113,7 @@ def process_lists(body_tag):
|
|||||||
|
|
||||||
|
|
||||||
def insert_span_with_attrs_before_tag(main_tag, tag, id_, class_):
|
def insert_span_with_attrs_before_tag(main_tag, tag, id_, class_):
|
||||||
|
""" Function inserts span before tag to be removed(aren't supported by livecarta) """
|
||||||
new_tag = main_tag.new_tag("span")
|
new_tag = main_tag.new_tag("span")
|
||||||
new_tag.attrs['id'] = id_ or ''
|
new_tag.attrs['id'] = id_ or ''
|
||||||
new_tag.attrs['class'] = class_ or ''
|
new_tag.attrs['class'] = class_ or ''
|
||||||
@@ -153,9 +157,7 @@ def clean_headings_content(content: Tag, title: str):
|
|||||||
|
|
||||||
|
|
||||||
def heading_tag_to_p_tag(body_tag):
|
def heading_tag_to_p_tag(body_tag):
|
||||||
"""
|
""" Function to convert all lower level headings to p tags """
|
||||||
Function to convert all lower level headings to p tags
|
|
||||||
"""
|
|
||||||
pattern = f'^h[{LiveCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
|
pattern = f'^h[{LiveCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
|
||||||
header_tags = body_tag.find_all(re.compile(pattern))
|
header_tags = body_tag.find_all(re.compile(pattern))
|
||||||
for tag in header_tags:
|
for tag in header_tags:
|
||||||
@@ -163,17 +165,16 @@ def heading_tag_to_p_tag(body_tag):
|
|||||||
|
|
||||||
|
|
||||||
def clean_title_from_numbering(title: str):
|
def clean_title_from_numbering(title: str):
|
||||||
"""
|
""" Function removes numbering from titles """
|
||||||
Function to remove digits from headers.
|
|
||||||
"""
|
|
||||||
title = re.sub(r'^(\s+)+', '', title)
|
title = re.sub(r'^(\s+)+', '', title)
|
||||||
title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title)
|
title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title)
|
||||||
# title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title) # delete chapter numbering from the title
|
# title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title) # delete chapter numbering from the title
|
||||||
title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title)
|
title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title) # delete chapter I, (ABC) from the title
|
||||||
return title
|
return title
|
||||||
|
|
||||||
|
|
||||||
def replace_with_livecarta_anchor_tag(anchor, i):
|
def replace_with_livecarta_anchor_tag(anchor, i):
|
||||||
|
""" Function replace noteref_tag(anchor) with new livecarta tag """
|
||||||
new_tag = BeautifulSoup(features='lxml').new_tag('sup')
|
new_tag = BeautifulSoup(features='lxml').new_tag('sup')
|
||||||
new_tag['class'] = 'footnote-element'
|
new_tag['class'] = 'footnote-element'
|
||||||
new_tag['data-id'] = i + 1
|
new_tag['data-id'] = i + 1
|
||||||
@@ -188,11 +189,11 @@ def replace_with_livecarta_anchor_tag(anchor, i):
|
|||||||
def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, noteref_attr_name='epub:type') \
|
def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, noteref_attr_name='epub:type') \
|
||||||
-> Tuple[list, list, list]:
|
-> Tuple[list, list, list]:
|
||||||
"""
|
"""
|
||||||
|
This function preprocessing footnotes
|
||||||
This function should be earlier that adding fonts in pipeline.
|
This function should be earlier that adding fonts in pipeline.
|
||||||
|
|
||||||
<p>Here is an example footnote<sup><a epub:type="noteref" href="#n1">1</a></sup></p>
|
<p>Here is an example footnote<sup><a epub:type="noteref" href="#n1">1</a></sup></p>
|
||||||
<aside epub:type="footnote" id="n1"><p>With a footnote here.</p></aside>
|
<aside epub:type="footnote" id="n1"><p>With a footnote here.</p></aside>
|
||||||
|
|
||||||
"""
|
"""
|
||||||
footnotes = []
|
footnotes = []
|
||||||
noterefs_tags = source_html_tag.find_all(
|
noterefs_tags = source_html_tag.find_all(
|
||||||
@@ -205,12 +206,14 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
|
|||||||
new_footnotes_tags = []
|
new_footnotes_tags = []
|
||||||
[tag.decompose() for tag in bad_noterefs_tags]
|
[tag.decompose() for tag in bad_noterefs_tags]
|
||||||
|
|
||||||
def parse_a_tag_href(s: str):
|
def parse_a_tag_href(s: str) -> Tuple[str, str]:
|
||||||
|
""" Returns name of file & id of an anchor """
|
||||||
assert '#' in s, f'Error. Unexpected href: {s} in a tag. Href must contain an id.'
|
assert '#' in s, f'Error. Unexpected href: {s} in a tag. Href must contain an id.'
|
||||||
f, id_ = s.split('#')
|
f, id_ = s.split('#')
|
||||||
return f, id_
|
return f, id_
|
||||||
|
|
||||||
def verify_footnote_tag(tags: list):
|
def verify_footnote_tag(tags: list):
|
||||||
|
""" Function verifies is tag - footnote """
|
||||||
assert len(tags) <= 1, f'Error, Multiple id: {href}.\n{tags}'
|
assert len(tags) <= 1, f'Error, Multiple id: {href}.\n{tags}'
|
||||||
if len(tags) == 0:
|
if len(tags) == 0:
|
||||||
anchored_tags = list(target_html_tag.find_all(id=element_id))
|
anchored_tags = list(target_html_tag.find_all(id=element_id))
|
||||||
@@ -275,7 +278,7 @@ def unwrap_structural_tags(body_tag):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def _preserve_class_in_aside_tag(tag_):
|
def _preserve_class_in_aside_tag(tag_):
|
||||||
# to save css style inherited from class, copy class to aside tag (which is parent to tag_)
|
""" to save css style inherited from class, copy class to aside tag (which is parent to tag_) """
|
||||||
# this is for Wiley books with boxes
|
# this is for Wiley books with boxes
|
||||||
tag_class = tag_.attrs['class'] if not isinstance(
|
tag_class = tag_.attrs['class'] if not isinstance(
|
||||||
tag_.attrs['class'], list) else tag_.attrs['class'][0]
|
tag_.attrs['class'], list) else tag_.attrs['class'][0]
|
||||||
@@ -284,10 +287,11 @@ def unwrap_structural_tags(body_tag):
|
|||||||
tag_.parent.attrs['class'] = tag_class
|
tag_.parent.attrs['class'] = tag_class
|
||||||
|
|
||||||
def preserve_class_in_section_tag(tag_) -> bool:
|
def preserve_class_in_section_tag(tag_) -> bool:
|
||||||
# to save css style inherited from class, copy class to child <p>
|
"""
|
||||||
|
to save css style inherited from class, copy class to child <p>
|
||||||
|
returns True, if <section> could be unwrapped
|
||||||
|
"""
|
||||||
# this is for Wiley books with boxes
|
# this is for Wiley books with boxes
|
||||||
# returns True, if <section> could be unwrapped
|
|
||||||
|
|
||||||
tag_class = tag_.attrs['class'] if not isinstance(
|
tag_class = tag_.attrs['class'] if not isinstance(
|
||||||
tag_.attrs['class'], list) else tag_.attrs['class'][0]
|
tag_.attrs['class'], list) else tag_.attrs['class'][0]
|
||||||
if 'feature' not in tag_class:
|
if 'feature' not in tag_class:
|
||||||
@@ -312,6 +316,10 @@ def unwrap_structural_tags(body_tag):
|
|||||||
class_=tag_to_be_removed.attrs.get('class'))
|
class_=tag_to_be_removed.attrs.get('class'))
|
||||||
|
|
||||||
def replace_div_tag_with_table():
|
def replace_div_tag_with_table():
|
||||||
|
"""Function replace <div> with <table>:
|
||||||
|
1. Convert div with certain classes to tables
|
||||||
|
2. Add background color to div with background-color
|
||||||
|
"""
|
||||||
for div in body_tag.find_all("div"):
|
for div in body_tag.find_all("div"):
|
||||||
if div.attrs.get('class'):
|
if div.attrs.get('class'):
|
||||||
div_class = div.attrs['class'] if not isinstance(
|
div_class = div.attrs['class'] if not isinstance(
|
||||||
@@ -348,12 +356,12 @@ def unwrap_structural_tags(body_tag):
|
|||||||
continue
|
continue
|
||||||
add_span_to_save_ids_for_links(div)
|
add_span_to_save_ids_for_links(div)
|
||||||
div.unwrap()
|
div.unwrap()
|
||||||
|
|
||||||
# comments removal
|
# comments removal
|
||||||
for tag in body_tag.find_all():
|
for tag in body_tag.find_all():
|
||||||
for element in tag(text=lambda text: isinstance(text, Comment)):
|
for element in tag(text=lambda text: isinstance(text, Comment)):
|
||||||
element.extract()
|
element.extract()
|
||||||
|
|
||||||
|
|
||||||
replace_div_tag_with_table()
|
replace_div_tag_with_table()
|
||||||
|
|
||||||
for s in body_tag.find_all("section"):
|
for s in body_tag.find_all("section"):
|
||||||
@@ -458,23 +466,8 @@ def get_tags_between_chapter_marks(first_id, href, html_soup):
|
|||||||
return tags
|
return tags
|
||||||
|
|
||||||
|
|
||||||
def wrap_preformatted_span_with_table(main_tag, old_tag):
|
|
||||||
table = main_tag.new_tag("table")
|
|
||||||
table.attrs['border'] = '1px #ccc;'
|
|
||||||
table.attrs['style'] = 'width:100%;'
|
|
||||||
tbody = main_tag.new_tag("tbody")
|
|
||||||
tr = main_tag.new_tag("tr")
|
|
||||||
td = main_tag.new_tag("td")
|
|
||||||
td.attrs['bgcolor'] = '#f5f5f5'
|
|
||||||
# td.attrs['border-radius'] = '4px'
|
|
||||||
old_tag.wrap(td)
|
|
||||||
td.wrap(tr)
|
|
||||||
tr.wrap(tbody)
|
|
||||||
tbody.wrap(table)
|
|
||||||
return table
|
|
||||||
|
|
||||||
|
|
||||||
def wrap_block_tag_with_table(main_tag, old_tag, width='95', border='1px', bg_color=None):
|
def wrap_block_tag_with_table(main_tag, old_tag, width='95', border='1px', bg_color=None):
|
||||||
|
""" Function wraps <block> with <table> """
|
||||||
table = main_tag.new_tag("table")
|
table = main_tag.new_tag("table")
|
||||||
table.attrs['border'] = border
|
table.attrs['border'] = border
|
||||||
table.attrs['align'] = 'center'
|
table.attrs['align'] = 'center'
|
||||||
@@ -497,7 +490,6 @@ def clean_wiley_block(block):
|
|||||||
hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
|
hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
|
||||||
for hr in hrs:
|
for hr in hrs:
|
||||||
hr.extract()
|
hr.extract()
|
||||||
print(hr)
|
|
||||||
h = block.find(re.compile("h[1-9]"))
|
h = block.find(re.compile("h[1-9]"))
|
||||||
if h:
|
if h:
|
||||||
h.name = "p"
|
h.name = "p"
|
||||||
@@ -505,6 +497,7 @@ def clean_wiley_block(block):
|
|||||||
|
|
||||||
|
|
||||||
def preprocess_block_tags(chapter_tag):
|
def preprocess_block_tags(chapter_tag):
|
||||||
|
""" Function preprocessing <block> tags """
|
||||||
for block in chapter_tag.find_all("blockquote"):
|
for block in chapter_tag.find_all("blockquote"):
|
||||||
if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
|
if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
|
||||||
clean_wiley_block(block)
|
clean_wiley_block(block)
|
||||||
@@ -527,7 +520,7 @@ def preprocess_block_tags(chapter_tag):
|
|||||||
|
|
||||||
|
|
||||||
def prepare_formatted(text):
|
def prepare_formatted(text):
|
||||||
# replace <,> to save them as is in html code
|
""" Function replaces special symbols with their Unicode representation """
|
||||||
text = text.replace("<", "\x3C")
|
text = text.replace("<", "\x3C")
|
||||||
text = text.replace(">", "\x3E")
|
text = text.replace(">", "\x3E")
|
||||||
text = text.replace('\t', "\xa0 \xa0 ") #
|
text = text.replace('\t', "\xa0 \xa0 ") #
|
||||||
@@ -536,7 +529,25 @@ def prepare_formatted(text):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def wrap_preformatted_span_with_table(main_tag, old_tag):
|
||||||
|
""" Function wraps <span> with <table> """
|
||||||
|
table = main_tag.new_tag("table")
|
||||||
|
table.attrs['border'] = '1px #ccc;'
|
||||||
|
table.attrs['style'] = 'width:100%;'
|
||||||
|
tbody = main_tag.new_tag("tbody")
|
||||||
|
tr = main_tag.new_tag("tr")
|
||||||
|
td = main_tag.new_tag("td")
|
||||||
|
td.attrs['bgcolor'] = '#f5f5f5'
|
||||||
|
# td.attrs['border-radius'] = '4px'
|
||||||
|
old_tag.wrap(td)
|
||||||
|
td.wrap(tr)
|
||||||
|
tr.wrap(tbody)
|
||||||
|
tbody.wrap(table)
|
||||||
|
return table
|
||||||
|
|
||||||
|
|
||||||
def preprocess_pre_tags(chapter_tag):
|
def preprocess_pre_tags(chapter_tag):
|
||||||
|
""" Function preprocessing <pre> tags """
|
||||||
for pre in chapter_tag.find_all("pre"):
|
for pre in chapter_tag.find_all("pre"):
|
||||||
new_tag = BeautifulSoup(features='lxml').new_tag("span")
|
new_tag = BeautifulSoup(features='lxml').new_tag("span")
|
||||||
new_tag.attrs = pre.attrs.copy()
|
new_tag.attrs = pre.attrs.copy()
|
||||||
@@ -575,7 +586,7 @@ def preprocess_pre_tags(chapter_tag):
|
|||||||
|
|
||||||
|
|
||||||
def preprocess_code_tags(chapter_tag):
|
def preprocess_code_tags(chapter_tag):
|
||||||
# function that emulates style of <code>, <kdb>, <var>
|
""" Function that emulates style of <code>, <kdb>, <var> """
|
||||||
for code in chapter_tag.find_all(re.compile("code|kdb|var")):
|
for code in chapter_tag.find_all(re.compile("code|kdb|var")):
|
||||||
code.name = 'span'
|
code.name = 'span'
|
||||||
if code.parent.name == "pre":
|
if code.parent.name == "pre":
|
||||||
@@ -584,9 +595,7 @@ def preprocess_code_tags(chapter_tag):
|
|||||||
|
|
||||||
|
|
||||||
def prepare_title(title_of_chapter: str) -> str:
|
def prepare_title(title_of_chapter: str) -> str:
|
||||||
"""
|
""" Function finalise processing/cleaning title """
|
||||||
Final processing/cleaning function.
|
|
||||||
"""
|
|
||||||
title_str = BeautifulSoup(title_of_chapter, features='lxml').string
|
title_str = BeautifulSoup(title_of_chapter, features='lxml').string
|
||||||
title_str = re.sub(r'([\n\t\xa0])', ' ', title_str)
|
title_str = re.sub(r'([\n\t\xa0])', ' ', title_str)
|
||||||
title_str = re.sub(r' +', ' ', title_str).rstrip()
|
title_str = re.sub(r' +', ' ', title_str).rstrip()
|
||||||
@@ -596,7 +605,11 @@ def prepare_title(title_of_chapter: str) -> str:
|
|||||||
|
|
||||||
def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_from_chapter: bool) -> str:
|
def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_from_chapter: bool) -> str:
|
||||||
"""
|
"""
|
||||||
Final processing/cleaning function.
|
Function finalise processing/cleaning content
|
||||||
|
1. cleaning \n
|
||||||
|
2. heading removal
|
||||||
|
3. processing tags
|
||||||
|
4. class removal
|
||||||
"""
|
"""
|
||||||
# 0. cleaning \n
|
# 0. cleaning \n
|
||||||
to_remove = []
|
to_remove = []
|
||||||
@@ -609,13 +622,15 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
|
|||||||
# 1. heading removal
|
# 1. heading removal
|
||||||
if remove_title_from_chapter:
|
if remove_title_from_chapter:
|
||||||
clean_headings_content(content_tag, title_str)
|
clean_headings_content(content_tag, title_str)
|
||||||
|
|
||||||
|
# 2. processing tags (<li>, <table>, <code>, <pre>, <block>)
|
||||||
process_lists(content_tag)
|
process_lists(content_tag)
|
||||||
preprocess_table(content_tag)
|
preprocess_table(content_tag)
|
||||||
preprocess_code_tags(content_tag)
|
preprocess_code_tags(content_tag)
|
||||||
preprocess_pre_tags(content_tag)
|
preprocess_pre_tags(content_tag)
|
||||||
preprocess_block_tags(content_tag)
|
preprocess_block_tags(content_tag)
|
||||||
|
|
||||||
# 2. class removal
|
# 3. class removal
|
||||||
for tag in content_tag.find_all(recursive=True):
|
for tag in content_tag.find_all(recursive=True):
|
||||||
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
|
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
|
||||||
'footnote-element']):
|
'footnote-element']):
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
class LiveCartaConfig:
|
class LiveCartaConfig:
|
||||||
|
"""Class of values that LiveCarta platform using and supports"""
|
||||||
SUPPORTED_LEVELS = 5
|
SUPPORTED_LEVELS = 5
|
||||||
SUPPORTED_HEADERS = {"h1", "h2", "h3", "h4", "h5"}
|
SUPPORTED_HEADERS = {"h1", "h2", "h3", "h4", "h5"}
|
||||||
HEADERS_LEVELS = {"h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "h9"}
|
HEADERS_LEVELS = {"h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "h9"}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from webcolors import html4_hex_to_names, hex_to_rgb, rgb_to_name, rgb_percent_t
|
|||||||
|
|
||||||
|
|
||||||
def closest_colour_rgb(requested_color):
|
def closest_colour_rgb(requested_color):
|
||||||
|
""" Function finds closes colour rgb """
|
||||||
min_colours = {}
|
min_colours = {}
|
||||||
for key, name in html4_hex_to_names.items():
|
for key, name in html4_hex_to_names.items():
|
||||||
r_c, g_c, b_c = hex_to_rgb(key)
|
r_c, g_c, b_c = hex_to_rgb(key)
|
||||||
@@ -18,6 +19,7 @@ def closest_colour_rgb(requested_color):
|
|||||||
|
|
||||||
|
|
||||||
def rgb2color_name(color):
|
def rgb2color_name(color):
|
||||||
|
""" Transform rgb -> color name """
|
||||||
try:
|
try:
|
||||||
closest_name = actual_name = rgb_to_name(color, 'html4')
|
closest_name = actual_name = rgb_to_name(color, 'html4')
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -30,6 +32,7 @@ def rgb2color_name(color):
|
|||||||
|
|
||||||
|
|
||||||
def hex2color_name(color):
|
def hex2color_name(color):
|
||||||
|
""" Transform hex -> color name """
|
||||||
try:
|
try:
|
||||||
color = hex_to_rgb(color)
|
color = hex_to_rgb(color)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -47,6 +50,7 @@ def hex2color_name(color):
|
|||||||
|
|
||||||
|
|
||||||
def str2closest_html_color_name(s: str):
|
def str2closest_html_color_name(s: str):
|
||||||
|
""" Transform str -> closest color name """
|
||||||
if 'rgb' in s:
|
if 'rgb' in s:
|
||||||
rgb_str = 'rgba' if ('rgba' in s) else 'rgb'
|
rgb_str = 'rgba' if ('rgba' in s) else 'rgb'
|
||||||
s = s.replace(rgb_str, '').replace('(', '').replace(')', '')
|
s = s.replace(rgb_str, '').replace('(', '').replace(')', '')
|
||||||
@@ -80,6 +84,7 @@ def str2closest_html_color_name(s: str):
|
|||||||
|
|
||||||
|
|
||||||
def rgba2rgb(r, g, b, alpha):
|
def rgba2rgb(r, g, b, alpha):
|
||||||
|
""" Transform rgba -> rgb """
|
||||||
r_background, g_background, b_background = 255, 255, 255
|
r_background, g_background, b_background = 255, 255, 255
|
||||||
r_new = int((1 - alpha) * r_background + alpha * r)
|
r_new = int((1 - alpha) * r_background + alpha * r)
|
||||||
g_new = int((1 - alpha) * g_background + alpha * g)
|
g_new = int((1 - alpha) * g_background + alpha * g)
|
||||||
@@ -88,6 +93,7 @@ def rgba2rgb(r, g, b, alpha):
|
|||||||
|
|
||||||
|
|
||||||
def str2hex(s: str):
|
def str2hex(s: str):
|
||||||
|
""" Transform str -> hex """
|
||||||
if '#' in s and (len(s) <= 7):
|
if '#' in s and (len(s) <= 7):
|
||||||
return s.lower()
|
return s.lower()
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import logging
|
|||||||
|
|
||||||
|
|
||||||
class ColoredFormatter(logging.Formatter):
|
class ColoredFormatter(logging.Formatter):
|
||||||
|
""" Class to prettify logger and command line output """
|
||||||
MAPPING = {
|
MAPPING = {
|
||||||
'DEBUG': 37, # white
|
'DEBUG': 37, # white
|
||||||
'INFO': 36, # cyan
|
'INFO': 36, # cyan
|
||||||
@@ -61,9 +62,7 @@ class BookLogger:
|
|||||||
self.logger.log(msg=message, level=logging_level, stacklevel=2)
|
self.logger.log(msg=message, level=logging_level, stacklevel=2)
|
||||||
|
|
||||||
def log_error_to_main_log(self, message=''):
|
def log_error_to_main_log(self, message=''):
|
||||||
"""
|
""" Method for logging error to main log file. """
|
||||||
Method for logging error to main log file.
|
|
||||||
"""
|
|
||||||
if self.main_logger:
|
if self.main_logger:
|
||||||
if not message:
|
if not message:
|
||||||
message = f'Error in book conversion. Check log file.'
|
message = f'Error in book conversion. Check log file.'
|
||||||
@@ -71,6 +70,8 @@ class BookLogger:
|
|||||||
|
|
||||||
|
|
||||||
class BookStatusWrapper:
|
class BookStatusWrapper:
|
||||||
|
"""Class sets/updates statuses of Converter on Platform"""
|
||||||
|
|
||||||
def __init__(self, access, logger_object, book_id=0):
|
def __init__(self, access, logger_object, book_id=0):
|
||||||
self.access = access
|
self.access = access
|
||||||
self.logger_object = logger_object
|
self.logger_object = logger_object
|
||||||
|
|||||||
Reference in New Issue
Block a user