Spaces:
Running
on
Zero
Running
on
Zero
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import spaces
|
3 |
+
import torch
|
4 |
+
from datasets import load_dataset, concatenate_datasets
|
5 |
+
from sentence_transformers import SentenceTransformer
|
6 |
+
from sentence_transformers.evaluation import InformationRetrievalEvaluator, SequentialEvaluator
|
7 |
+
from sentence_transformers.util import cos_sim
|
8 |
+
import pandas as pd
|
9 |
+
|
10 |
+
# Check for GPU support and configure appropriately
|
11 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
12 |
+
zero = torch.Tensor([0]).to(device) # Ensure that tensor operates on the appropriate device
|
13 |
+
print(f"Device being used: {zero.device}")
|
14 |
+
|
15 |
+
@spaces.GPU # Enable GPU support for Gradio Spaces
|
16 |
+
def evaluate_model(model_id):
|
17 |
+
# Load the model on the appropriate device
|
18 |
+
model = SentenceTransformer(model_id, device=device)
|
19 |
+
|
20 |
+
# Define the evaluation parameters
|
21 |
+
matryoshka_dimensions = [768, 512, 256, 128, 64]
|
22 |
+
|
23 |
+
# Prepare datasets
|
24 |
+
datasets_info = [
|
25 |
+
{
|
26 |
+
"name": "Arabic-financial",
|
27 |
+
"dataset_id": "Omartificial-Intelligence-Space/Arabic-finanical-rag-embedding-dataset",
|
28 |
+
"split": "train",
|
29 |
+
"size": 7000,
|
30 |
+
"columns": ("question", "context"),
|
31 |
+
"sample_size": 100
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"name": "MLQA Arabic",
|
35 |
+
"dataset_id": "google/xtreme",
|
36 |
+
"split": "validation",
|
37 |
+
"subset": "MLQA.ar.ar",
|
38 |
+
"size": 500,
|
39 |
+
"columns": ("question", "context"),
|
40 |
+
"sample_size": 100
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"name": "Custom",
|
44 |
+
"dataset_id": "philschmid/finanical-rag-embedding-dataset",
|
45 |
+
"split": "train",
|
46 |
+
"size": None,
|
47 |
+
"columns": ("question", "context"),
|
48 |
+
"sample_size": 100
|
49 |
+
}
|
50 |
+
]
|
51 |
+
|
52 |
+
evaluation_results = []
|
53 |
+
|
54 |
+
for dataset_info in datasets_info:
|
55 |
+
# Load the dataset
|
56 |
+
dataset = load_dataset(dataset_info["dataset_id"], split=dataset_info["split"])
|
57 |
+
if dataset_info.get("subset"):
|
58 |
+
dataset = dataset.filter(lambda x: x["subset"] == dataset_info["subset"])
|
59 |
+
|
60 |
+
# Sample the dataset
|
61 |
+
dataset = dataset.select(range(min(dataset_info["sample_size"], len(dataset))))
|
62 |
+
|
63 |
+
# Rename columns
|
64 |
+
dataset = dataset.rename_column(dataset_info["columns"][0], "anchor")
|
65 |
+
dataset = dataset.rename_column(dataset_info["columns"][1], "positive")
|
66 |
+
dataset = dataset.add_column("id", range(len(dataset)))
|
67 |
+
|
68 |
+
# Prepare queries and corpus
|
69 |
+
corpus = dict(zip(dataset["id"], dataset["positive"]))
|
70 |
+
queries = dict(zip(dataset["id"], dataset["anchor"]))
|
71 |
+
|
72 |
+
# Create a mapping of relevant documents (1 in our case) for each query
|
73 |
+
relevant_docs = {q_id: [q_id] for q_id in queries}
|
74 |
+
|
75 |
+
matryoshka_evaluators = []
|
76 |
+
# Iterate over the different dimensions
|
77 |
+
for dim in matryoshka_dimensions:
|
78 |
+
ir_evaluator = InformationRetrievalEvaluator(
|
79 |
+
queries=queries,
|
80 |
+
corpus=corpus,
|
81 |
+
relevant_docs=relevant_docs,
|
82 |
+
name=f"dim_{dim}",
|
83 |
+
truncate_dim=dim,
|
84 |
+
score_functions={"cosine": cos_sim},
|
85 |
+
)
|
86 |
+
matryoshka_evaluators.append(ir_evaluator)
|
87 |
+
|
88 |
+
# Create a sequential evaluator
|
89 |
+
evaluator = SequentialEvaluator(matryoshka_evaluators)
|
90 |
+
|
91 |
+
# Evaluate the model
|
92 |
+
results = evaluator(model)
|
93 |
+
|
94 |
+
# Collect results for each dataset
|
95 |
+
for dim in matryoshka_dimensions:
|
96 |
+
key = f"dim_{dim}_cosine_ndcg@10"
|
97 |
+
score = results[key] if key in results else None
|
98 |
+
evaluation_results.append({
|
99 |
+
"Dataset": dataset_info["name"],
|
100 |
+
"Dimension": dim,
|
101 |
+
"Score": score
|
102 |
+
})
|
103 |
+
|
104 |
+
# Convert results to DataFrame for display
|
105 |
+
result_df = pd.DataFrame(evaluation_results)
|
106 |
+
return result_df
|
107 |
+
|
108 |
+
# Define the Gradio interface
|
109 |
+
def display_results(model_name):
|
110 |
+
# Evaluate model and return results
|
111 |
+
result_df = evaluate_model(model_name)
|
112 |
+
return result_df
|
113 |
+
|
114 |
+
# Create the Gradio interface
|
115 |
+
demo = gr.Interface(
|
116 |
+
fn=display_results,
|
117 |
+
inputs=gr.Textbox(label="Model ID"),
|
118 |
+
outputs="dataframe",
|
119 |
+
title="Model Evaluation with GPU Support",
|
120 |
+
description="Enter a Hugging Face Sentence Transformer model ID to evaluate it across datasets, leveraging GPU if available."
|
121 |
+
)
|
122 |
+
|
123 |
+
# Launch the Gradio app
|
124 |
+
if __name__ == "__main__":
|
125 |
+
demo.launch(debug= True)
|