| 123456789101112 |
- # SpacySentenceSplitter.py
- from typing import List
- from .Splitter import Splitter
- import spacy
- class SpacySentenceSplitter(Splitter):
- def __init__(self, model: str = "en_core_web_sm"):
- self.nlp = spacy.load(model)
- def split(self, text: str) -> List[str]:
- doc = self.nlp(text)
- return [str(sent).strip() for sent in doc.sents]
|