Coverage for domain / converters / epub_converter.py: 100.00%
16 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-07 00:07 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-07 00:07 +0000
1"""EPUB document converter."""
2from bs4 import BeautifulSoup
3from typing import Optional, Callable, List, Any
4from domain.core.base_converter import BaseConverter
5from .reader_protocols import _EPubReader
6from .epub_reader import EbookLibReader
9class EPubConverter(BaseConverter):
10 """Converter for EPUB documents."""
12 def __init__(self, source_path, reader: Optional[_EPubReader] = None):
13 super().__init__(source_path)
14 self._reader: _EPubReader = reader or EbookLibReader()
15 self._book = None
17 def _load_items(self) -> List[Any]:
18 """Load all chapters from EPUB document.
20 Returns:
21 List of EPUB items (content type 9 = readable content)
22 """
23 self._book = self._reader.open(self.source_path)
24 return [it for it in self._book.get_items() if it.get_type() == 9]
26 def _extract_from_item(self, item: Any) -> str:
27 """Extract text from a single EPUB chapter.
29 Args:
30 item: EPUB item object
32 Returns:
33 Extracted text string
34 """
35 soup = BeautifulSoup(item.get_content(), 'html.parser')
36 return soup.get_text()