| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- import pytest
- from utls import load_testdata
- from semantic_split import SentenceTransformersSimilarity, \
- SimilarSentenceSplitter, SpacySentenceSplitter
- splitter = None
- # Loading Spacy and The SentenceTransformer takes time, so we do it once for all tests.
- @pytest.fixture(autouse=True)
- def run_before_and_after_tests(tmpdir):
- global splitter
- model = SentenceTransformersSimilarity()
- sentence_splitter = SpacySentenceSplitter()
- splitter = SimilarSentenceSplitter(similarity_model = model,
- sentence_splitter=sentence_splitter)
- yield # this is where the testing happens
- def test_two_similar_sentences():
- text = """I love dogs. I love cats."""
- res = splitter.split(text)
- assert res == [["I love dogs.", "I love cats."]]
- def test_similar_sentences():
- text = """I dogs are amazing.
- Cats must be the easiest pets around.
- Robots are advanced now with AI.
- Flying in space can only be done by Artificial intelligence."""
- res = splitter.split(text)
- assert res == [
- ["I dogs are amazing.",
- "Cats must be the easiest pets around."],
- ["Robots are advanced now with AI.",
- "Flying in space can only be done by Artificial intelligence."]]
- def test_different_sentences():
- text = """I love dogs. He has flowers at home."""
- res = splitter.split(text)
- print(res)
- assert res[0][0] == 'I love dogs.'
- assert res[1][0] == 'He has flowers at home.'
- assert res == [['I love dogs.'], ['He has flowers at home.']]
- def test_5th_sentences():
- text = load_testdata('sentences.txt')
- res = splitter.split(text)
- assert len(res) == 5
- def test_max_group_sentences():
- text = load_testdata('sentences.txt')
- res = splitter.split(text, group_max_sentences=1)
-
- assert len(res) == 20
|