from pinecone import Pinecone import os from text_embedder_encoder import encoder_model_name class Retriever: def __init__(self, pinecone_api_key=os.environ["pinecone_api_key"], question_index_name=f"hebrew-dentist-questions-{encoder_model_name.replace('/', '-')}".lower()): # Initialize Pinecone connection self.pc = Pinecone(api_key=pinecone_api_key) self.question_index_name = question_index_name def search_similar(self, query_vector, top_k=50): """ Search for similar content using vector similarity in Pinecone """ try: # Get Pinecone index index = self.pc.Index(self.question_index_name) # Execute search results = index.query( vector=query_vector, top_k=top_k, include_metadata=True, ) answers_records_ids = [] for match in results['matches']: answers_records_ids.append( ':'.join(match['id'].split(':')[:-1]) + ":" + str(int(match['metadata']['answer_id']))) return answers_records_ids except Exception as e: print(f"Error performing retriever: {e}") return []