forked from LiveCarta/BookConverter
quick fix
This commit is contained in:
@@ -32,7 +32,7 @@ class Book:
|
||||
main_logger=main_logger)
|
||||
self.book_api_wrapper = BookApiWrapper(access, self.logger_object, book_id)
|
||||
|
||||
assert BookConfig.SUPPORTED_LEVELS == len(BookConfig.SUPPORTED_HEADERS), \
|
||||
assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \
|
||||
"Length of headers doesn't match allowed levels."
|
||||
|
||||
def save_docx(self, content):
|
||||
|
||||
@@ -5,7 +5,7 @@ import re
|
||||
from shutil import copyfile
|
||||
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
from config import BookConfig, BookLogger, BookApiWrapper
|
||||
from config import LawCartaConfig, BookLogger, BookApiWrapper
|
||||
|
||||
|
||||
class HTMLPreprocessor:
|
||||
@@ -49,8 +49,8 @@ class HTMLPreprocessor:
|
||||
@classmethod
|
||||
def convert_pt_to_px(cls, value):
|
||||
value = int(value)
|
||||
if value == BookConfig.WORD_DEFAULT_FONT_SIZE:
|
||||
return BookConfig.LAWCARTA_DEFAULT_FONT_SIZE
|
||||
if value == LawCartaConfig.WORD_DEFAULT_FONT_SIZE:
|
||||
return LawCartaConfig.LAWCARTA_DEFAULT_FONT_SIZE
|
||||
else:
|
||||
return value
|
||||
|
||||
@@ -70,7 +70,7 @@ class HTMLPreprocessor:
|
||||
size = size.group(1)
|
||||
new_size = cls.convert_pt_to_px(size)
|
||||
|
||||
if new_size == BookConfig.LAWCARTA_DEFAULT_FONT_SIZE:
|
||||
if new_size == LawCartaConfig.LAWCARTA_DEFAULT_FONT_SIZE:
|
||||
return ""
|
||||
|
||||
return re.sub(size + "pt", str(new_size) + "px", style)
|
||||
@@ -178,7 +178,7 @@ class HTMLPreprocessor:
|
||||
p.attrs = {}
|
||||
style = ''
|
||||
|
||||
if align is not None and align != BookConfig.DEFAULT_ALIGN_STYLE:
|
||||
if align is not None and align != LawCartaConfig.DEFAULT_ALIGN_STYLE:
|
||||
style += f'text-align: {align};'
|
||||
|
||||
if indent is not None:
|
||||
@@ -280,10 +280,6 @@ class HTMLPreprocessor:
|
||||
tag.string = tag.text.replace('\u200c', '')
|
||||
tag['href'] = tag.attrs.get('href').replace('%E2%80%8C', '')
|
||||
|
||||
# %E2%80%8C
|
||||
for tag in a_tags_with_href:
|
||||
print(tag)
|
||||
|
||||
@staticmethod
|
||||
def _clean_footnote_content(content):
|
||||
content = content.strip()
|
||||
@@ -433,7 +429,7 @@ class HTMLPreprocessor:
|
||||
"""
|
||||
Function to convert all lower level headings to p tags
|
||||
"""
|
||||
pattern = f'^h[{BookConfig.SUPPORTED_LEVELS + 1}-9]$'
|
||||
pattern = f'^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
|
||||
header_tags = self.body_tag.find_all(re.compile(pattern))
|
||||
for tag in header_tags:
|
||||
tag.name = 'p'
|
||||
@@ -521,8 +517,8 @@ class HTMLPreprocessor:
|
||||
if title == "":
|
||||
tag.unwrap()
|
||||
else:
|
||||
assert tag.name in BookConfig.SUPPORTED_HEADERS, \
|
||||
f'Preprocessing went wrong, there is still h{BookConfig.SUPPORTED_LEVELS + 1}-h9 headings.'
|
||||
assert tag.name in LawCartaConfig.SUPPORTED_HEADERS, \
|
||||
f'Preprocessing went wrong, there is still h{LawCartaConfig.SUPPORTED_LEVELS + 1}-h9 headings.'
|
||||
# if tag.name in ["h4", "h5", "h6"]:
|
||||
# tag.name = "h3" # All the lower level headings will be transformed to h3 headings
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import codecs
|
||||
import json
|
||||
|
||||
from copy import copy
|
||||
from config import BookConfig
|
||||
from config import LawCartaConfig
|
||||
|
||||
|
||||
class JSONConverter:
|
||||
@@ -34,7 +34,7 @@ class JSONConverter:
|
||||
|
||||
:param ind: Index of header in content list.
|
||||
"""
|
||||
if self.content[ind].name in BookConfig.SUPPORTED_HEADERS:
|
||||
if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS:
|
||||
title = self.content[ind].text
|
||||
curr_outline = int(re.sub(r"^h", "", self.content[ind].name)) # extract outline from tag
|
||||
result = {
|
||||
@@ -47,7 +47,7 @@ class JSONConverter:
|
||||
|
||||
while ind < len(self.content):
|
||||
# 1. next tag is a header
|
||||
if self.content[ind].name in BookConfig.SUPPORTED_HEADERS:
|
||||
if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS:
|
||||
outline = int(re.sub(r"^h", "", self.content[ind].name))
|
||||
# - recursion step until h_i > h_initial
|
||||
if outline > curr_outline:
|
||||
@@ -100,13 +100,13 @@ class JSONConverter:
|
||||
while ind < len(self.content):
|
||||
res = {}
|
||||
|
||||
if self.content[ind].name in BookConfig.SUPPORTED_HEADERS:
|
||||
if self.content[ind].name in LawCartaConfig.SUPPORTED_HEADERS:
|
||||
res, ind = self.header_to_json(ind)
|
||||
|
||||
else:
|
||||
chapter_title = f'Untitled chapter {ch_num}'
|
||||
chapter = []
|
||||
while ind < len(self.content) and self.content[ind].name not in BookConfig.SUPPORTED_HEADERS:
|
||||
while ind < len(self.content) and self.content[ind].name not in LawCartaConfig.SUPPORTED_HEADERS:
|
||||
if not self._is_empty_p_tag(self.content[ind]):
|
||||
chapter.append(self.format_html(str(self.content[ind])))
|
||||
ind += 1
|
||||
|
||||
Reference in New Issue
Block a user