File size: 1,175 Bytes
268c7f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import numpy as np


class SimilarityScorer:

    def cosine_similarity(
        self,
        query_vector: np.ndarray,
        corpus_vectors: np.ndarray,
    ) -> np.ndarray:
        """Calculate cosine similarity between prompt vectors.
        Args:
        query_vector: Vectorized prompt query of shape (1, D).
        corpus_vectors: Vectorized prompt corpus of shape (N, D).
        Returns: The vector of shape (N,) with values in range [-1, 1] where 1
        is max similarity i.e., two vectors are the same.
        """

        # Normalize the query vector
        query_norm = np.linalg.norm(query_vector)
        if query_norm == 0:
            raise ValueError("The query vector cannot be zero.")
        query_vector = query_vector / query_norm

        # Normalize the corpus vectors
        corpus_norms = np.linalg.norm(corpus_vectors, axis=1)
        if np.any(corpus_norms == 0):
            raise ValueError("The corpus contains zero vectors.")
        normalized_corpus = corpus_vectors / corpus_norms[:, np.newaxis]

        # Calculate cosine similarity
        similarities = np.dot(normalized_corpus, query_vector.T)

        return similarities