Coverage for domain / converters / epub_converter.py: 100.00%

16 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-07 00:07 +0000

1"""EPUB document converter.""" 

2from bs4 import BeautifulSoup 

3from typing import Optional, Callable, List, Any 

4from domain.core.base_converter import BaseConverter 

5from .reader_protocols import _EPubReader 

6from .epub_reader import EbookLibReader 

7 

8 

9class EPubConverter(BaseConverter): 

10 """Converter for EPUB documents.""" 

11 

12 def __init__(self, source_path, reader: Optional[_EPubReader] = None): 

13 super().__init__(source_path) 

14 self._reader: _EPubReader = reader or EbookLibReader() 

15 self._book = None 

16 

17 def _load_items(self) -> List[Any]: 

18 """Load all chapters from EPUB document. 

19  

20 Returns: 

21 List of EPUB items (content type 9 = readable content) 

22 """ 

23 self._book = self._reader.open(self.source_path) 

24 return [it for it in self._book.get_items() if it.get_type() == 9] 

25 

26 def _extract_from_item(self, item: Any) -> str: 

27 """Extract text from a single EPUB chapter. 

28  

29 Args: 

30 item: EPUB item object 

31  

32 Returns: 

33 Extracted text string 

34 """ 

35 soup = BeautifulSoup(item.get_content(), 'html.parser') 

36 return soup.get_text()