SentenceSimilarity.py 721 B

1234567891011121314151617181920
  1. from typing import List
  2. from sentence_transformers import SentenceTransformer, util
  3. class SentenceTransformersSimilarity():
  4. def __init__(self, model='all-MiniLM-L6-v2', similarity_threshold=0.2):
  5. self.model = SentenceTransformer(model)
  6. self.similarity_threshold = similarity_threshold
  7. def similarities(self, sentences: List[str]):
  8. # Encode all sentences
  9. embeddings = self.model.encode(sentences)
  10. # Calculate cosine similarities for neighboring sentences
  11. similarities = []
  12. for i in range(1, len(embeddings)):
  13. sim = util.pytorch_cos_sim(embeddings[i-1], embeddings[i]).item()
  14. similarities.append(sim)
  15. return similarities