File size: 1,406 Bytes
fb0495b
d0eb2f2
fb0495b
 
 
 
 
 
c9580eb
fb0495b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from pinecone import Pinecone
import os

from text_embedder_encoder import TextEmbedder, encoder_model_name


class Retriever:
    def __init__(self,

                 pinecone_api_key=os.environ["pinecone_api_key"],

                 index_name=f"hebrew-dentist-qa-{encoder_model_name.replace('/', '-')}".lower()):
        # Initialize Pinecone connection
        self.pc = Pinecone(api_key=pinecone_api_key)
        self.index_name = index_name
        self.text_embedder = TextEmbedder()
        self.vector_dim = 768

    def search_similar(self, query_text, top_k=50):
        """

        Search for similar content using vector similarity in Pinecone

        """
        try:
            # Generate embedding for query
            query_vector = self.text_embedder.encode(query_text)

            # Get Pinecone index
            index = self.pc.Index(self.index_name)

            # Execute search
            results = index.query(
                vector=query_vector,
                top_k=top_k,
                include_metadata=True,
            )

            answers = []
            for match in results['matches']:
                answer = match['metadata']['answer']
                answers.append(answer)

            return answers
        except Exception as e:
            print(f"Error performing similarity search: {e}")
            return []