SpacySentenceSplitter.py 359 B

123456789101112
  1. # SpacySentenceSplitter.py
  2. from typing import List
  3. from .Splitter import Splitter
  4. import spacy
  5. class SpacySentenceSplitter(Splitter):
  6. def __init__(self, model: str = "en_core_web_sm"):
  7. self.nlp = spacy.load(model)
  8. def split(self, text: str) -> List[str]:
  9. doc = self.nlp(text)
  10. return [str(sent).strip() for sent in doc.sents]