epub converter: add files

2021-04-14 14:29:19 +03:00
parent 4eb30bd80c
commit 880b045de0
3 changed files with 371 additions and 0 deletions
--- a/src/data_objects.py
+++ b/src/data_objects.py
@@ -0,0 +1,62 @@
+import re
+from typing import Union
+
+from ebooklib.epub import Section, Link
+
+
+"""
+These are data structures which form mapping from NCX to python data structures.
+"""
+
+
+class NavPoint:
+    def __init__(self, obj: Union[Link, Section]=None, ):
+        self.href, self.id = self.parse_href_id(obj)
+        self.title = obj.title
+
+    @staticmethod
+    def parse_href_id(item: Union[Link, Section]):
+        reg = '(.+\..+\#)(.+)'
+        match = re.search(reg, item.href)
+        href, div_id = None, None
+        if match:
+            div_id = match.group(2)
+            if match.group(1):
+                href = match.group(1)[:-1]
+        else:
+            reg2 = '(.+\..+)'
+            match2 = re.search(reg2, item.href)
+            if match2 and match2.group(1):
+                href = match2.group(1)
+
+        return href, div_id
+
+    def __str__(self):
+        return '<NavPoint: %s, %s>' % (self.href, self.id)
+
+
+"""
+These are data structures which form mapping to livecarta json structure.
+"""
+
+
+class ChapterItem:
+    def __init__(self, title, content, sub_items):
+        self.title = title
+        self.content = content
+        self.sub_items = sub_items
+
+    def to_dict(self):
+        tmp = []
+        if self.sub_items:
+            for i in self.sub_items:
+                tmp.append(i.to_dict())
+
+        return {
+            "title": self.title,
+            "contents": [self.content],
+            "sub_items": tmp
+        }
+
+    def __str__(self):
+        return '<Chapter: %s>' % self.title