diff --git a/src/docx_converter/html_docx_preprocessor.py b/src/docx_converter/html_docx_preprocessor.py index 28f434e..a8e76a2 100644 --- a/src/docx_converter/html_docx_preprocessor.py +++ b/src/docx_converter/html_docx_preprocessor.py @@ -432,8 +432,8 @@ class HTMLDocxPreprocessor: def clean_title_from_numbering(title: str): """Function to remove digits from headers.""" title = re.sub(r'^(\s+)+', '', title) - title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title) - # title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title) # delete chapter numbering from the title + # title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title) # delete chapter numbering from the title + # title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title) # delete chapter numbering(letters) from the title # title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title) # delete chapter I, (ABC) from the title return title diff --git a/src/epub_converter/html_epub_preprocessor.py b/src/epub_converter/html_epub_preprocessor.py index 16345b6..6561662 100644 --- a/src/epub_converter/html_epub_preprocessor.py +++ b/src/epub_converter/html_epub_preprocessor.py @@ -167,8 +167,8 @@ def heading_tag_to_p_tag(body_tag): def clean_title_from_numbering(title: str): """ Function removes numbering from titles """ title = re.sub(r'^(\s+)+', '', title) - title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title) - # title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title) # delete chapter numbering from the title + # title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title) # delete chapter numbering from the title + # title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title) # delete chapter numbering(letters) from the title # title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title) # delete chapter I, (ABC) from the title return title