Spaces:
Running
Running
import numpy as np | |
from langflow.custom import Component | |
from langflow.io import DataInput, DropdownInput, Output | |
from langflow.schema import Data | |
class EmbeddingSimilarityComponent(Component): | |
display_name: str = "Embedding Similarity" | |
description: str = "Compute selected form of similarity between two embedding vectors." | |
icon = "equal" | |
inputs = [ | |
DataInput( | |
name="embedding_vectors", | |
display_name="Embedding Vectors", | |
info="A list containing exactly two data objects with embedding vectors to compare.", | |
is_list=True, | |
), | |
DropdownInput( | |
name="similarity_metric", | |
display_name="Similarity Metric", | |
info="Select the similarity metric to use.", | |
options=["Cosine Similarity", "Euclidean Distance", "Manhattan Distance"], | |
value="Cosine Similarity", | |
), | |
] | |
outputs = [ | |
Output(display_name="Similarity Data", name="similarity_data", method="compute_similarity"), | |
] | |
def compute_similarity(self) -> Data: | |
embedding_vectors: list[Data] = self.embedding_vectors | |
# Assert that the list contains exactly two Data objects | |
if len(embedding_vectors) != 2: # noqa: PLR2004 | |
msg = "Exactly two embedding vectors are required." | |
raise ValueError(msg) | |
embedding_1 = np.array(embedding_vectors[0].data["embeddings"]) | |
embedding_2 = np.array(embedding_vectors[1].data["embeddings"]) | |
if embedding_1.shape != embedding_2.shape: | |
similarity_score = {"error": "Embeddings must have the same dimensions."} | |
else: | |
similarity_metric = self.similarity_metric | |
if similarity_metric == "Cosine Similarity": | |
score = np.dot(embedding_1, embedding_2) / (np.linalg.norm(embedding_1) * np.linalg.norm(embedding_2)) | |
similarity_score = {"cosine_similarity": score} | |
elif similarity_metric == "Euclidean Distance": | |
score = np.linalg.norm(embedding_1 - embedding_2) | |
similarity_score = {"euclidean_distance": score} | |
elif similarity_metric == "Manhattan Distance": | |
score = np.sum(np.abs(embedding_1 - embedding_2)) | |
similarity_score = {"manhattan_distance": score} | |
# Create a Data object to encapsulate the similarity score and additional information | |
similarity_data = Data( | |
data={ | |
"embedding_1": embedding_vectors[0].data["embeddings"], | |
"embedding_2": embedding_vectors[1].data["embeddings"], | |
"similarity_score": similarity_score, | |
}, | |
text_key="similarity_score", | |
) | |
self.status = similarity_data | |
return similarity_data | |