hebrew-dentsit / reranker.py
borodache's picture
Change the retrieval and reranking into two steps search with two different indexes - which is supposed to make the latency much lower (faster)
a983ce0 verified
from pinecone import Pinecone
from sklearn.metrics.pairwise import cosine_similarity
import os
from text_embedder_encoder import encoder_model_name
class Reranker:
def __init__(self,
pinecone_api_key=os.environ["pinecone_api_key"],
answer_index_name=f"hebrew-dentist-answers-{encoder_model_name.replace('/', '-')}".lower()):
self.pc = Pinecone(api_key=pinecone_api_key)
self.answer_index_name = answer_index_name
def rerank(self, query_vector, retrieved_answers_ids, top_n=5):
# Encode query and documents
try:
index = self.pc.Index(self.answer_index_name)
fetch_response = index.fetch(ids=retrieved_answers_ids)
doc_embeddings = []
answers = []
for i in range(len(retrieved_answers_ids)):
doc_embeddings.append(fetch_response['vectors'][retrieved_answers_ids[i]]['values'])
answers.append(fetch_response['vectors'][retrieved_answers_ids[i]]['metadata']['answer'])
similarity_scores = cosine_similarity([query_vector], doc_embeddings)[0]
similarity_scores_with_idxes = list(zip(similarity_scores, range(len(similarity_scores))))
similarity_scores_with_idxes.sort(reverse=True)
similarity_scores_with_idxes_final = similarity_scores_with_idxes[:top_n]
reranked_answers = [answers[idx] for score, idx in similarity_scores_with_idxes_final if score >= 0.7]
return reranked_answers
except Exception as e:
print(f"Error performing rerank: {e}")
return []