SentenceSimilarity.py 772 B

1234567891011121314151617181920
  1. # SentenceSimilarity.py
  2. from typing import List
  3. from sentence_transformers import SentenceTransformer, util
  4. class SentenceTransformersSimilarity:
  5. def __init__(self, model: str = 'all-MiniLM-L6-v2', similarity_threshold: float = 0.2):
  6. self.model = SentenceTransformer(model)
  7. self.similarity_threshold = similarity_threshold
  8. def similarities(self, sentences: List[str]) -> List[float]:
  9. # Encode all sentences
  10. embeddings = self.model.encode(sentences)
  11. # Calculate cosine similarities for neighboring sentences
  12. similarities = []
  13. for i in range(1, len(embeddings)):
  14. sim = util.pytorch_cos_sim(embeddings[i-1], embeddings[i]).item()
  15. similarities.append(sim)
  16. return similarities