File size: 1,665 Bytes
a983ce0
fb0495b
a983ce0
fb0495b
 
a983ce0
fb0495b
 
 
a983ce0
 
 
 
 
fb0495b
a983ce0
fb0495b
a983ce0
 
 
fb0495b
a983ce0
 
 
 
 
fb0495b
a983ce0
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from pinecone import Pinecone
from sklearn.metrics.pairwise import cosine_similarity
import os


from text_embedder_encoder import encoder_model_name


class Reranker:
    def __init__(self,

                 pinecone_api_key=os.environ["pinecone_api_key"],

                 answer_index_name=f"hebrew-dentist-answers-{encoder_model_name.replace('/', '-')}".lower()):
        self.pc = Pinecone(api_key=pinecone_api_key)
        self.answer_index_name = answer_index_name

    def rerank(self, query_vector, retrieved_answers_ids, top_n=5):
        # Encode query and documents
        try:
            index = self.pc.Index(self.answer_index_name)
            fetch_response = index.fetch(ids=retrieved_answers_ids)

            doc_embeddings = []
            answers = []
            for i in range(len(retrieved_answers_ids)):
                doc_embeddings.append(fetch_response['vectors'][retrieved_answers_ids[i]]['values'])
                answers.append(fetch_response['vectors'][retrieved_answers_ids[i]]['metadata']['answer'])

            similarity_scores = cosine_similarity([query_vector], doc_embeddings)[0]
            similarity_scores_with_idxes = list(zip(similarity_scores, range(len(similarity_scores))))
            similarity_scores_with_idxes.sort(reverse=True)
            similarity_scores_with_idxes_final = similarity_scores_with_idxes[:top_n]
            reranked_answers = [answers[idx] for score, idx in similarity_scores_with_idxes_final if score >= 0.7]

            return reranked_answers
        except Exception as e:
            print(f"Error performing rerank: {e}")
            return []