SpacySentenceSplitter.py 316 B

123456789101112131415
  1. from typing import List
  2. from .Splitter import Splitter
  3. import spacy
  4. class SpacySentenceSplitter(Splitter):
  5. def __init__(self):
  6. self.nlp = spacy.load("en_core_web_sm")
  7. def split(self, text: str) -> List[str]:
  8. doc = self.nlp(text)
  9. return [str(sent).strip() for sent in doc.sents]