Omartificial-Intelligence-Space commited on
Commit
16b7567
·
verified ·
1 Parent(s): 9832c5a

update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -35
app.py CHANGED
@@ -1,25 +1,23 @@
1
  import gradio as gr
2
  import spaces
3
  import torch
 
 
4
  from datasets import load_dataset, concatenate_datasets
5
  from sentence_transformers import SentenceTransformer
6
  from sentence_transformers.evaluation import InformationRetrievalEvaluator, SequentialEvaluator
7
  from sentence_transformers.util import cos_sim
8
- import pandas as pd
9
 
10
  # Check for GPU support and configure appropriately
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
- zero = torch.Tensor([0]).to(device) # Ensure that tensor operates on the appropriate device
13
  print(f"Device being used: {zero.device}")
14
 
15
- @spaces.GPU # Enable GPU support for Gradio Spaces
16
  def evaluate_model(model_id):
17
- # Load the model on the appropriate device
18
  model = SentenceTransformer(model_id, device=device)
19
-
20
- # Define the evaluation parameters
21
  matryoshka_dimensions = [768, 512, 256, 128, 64]
22
-
23
  # Prepare datasets
24
  datasets_info = [
25
  {
@@ -28,38 +26,43 @@ def evaluate_model(model_id):
28
  "split": "train",
29
  "size": 7000,
30
  "columns": ("question", "context"),
31
- "sample_size": 100
32
  },
33
  {
34
- "name": "MLQA Arabic",
35
  "dataset_id": "google/xtreme",
36
- "subset": "MLQA.ar.ar", # Specify the correct subset
37
  "split": "validation",
38
  "size": 500,
39
  "columns": ("question", "context"),
40
- "sample_size": 100
41
  },
42
  {
43
- "name": "Custom",
44
- "dataset_id": "philschmid/finanical-rag-embedding-dataset",
45
  "split": "train",
46
  "size": None,
47
  "columns": ("question", "context"),
48
- "sample_size": 100
 
49
  }
50
  ]
51
 
52
  evaluation_results = []
53
-
 
54
  for dataset_info in datasets_info:
55
  # Load the dataset with subset if available
56
  if "subset" in dataset_info:
57
  dataset = load_dataset(dataset_info["dataset_id"], dataset_info["subset"], split=dataset_info["split"])
58
  else:
59
  dataset = load_dataset(dataset_info["dataset_id"], split=dataset_info["split"])
60
-
61
- # Sample the dataset
62
- dataset = dataset.select(range(min(dataset_info["sample_size"], len(dataset))))
 
 
 
63
 
64
  # Rename columns
65
  dataset = dataset.rename_column(dataset_info["columns"][0], "anchor")
@@ -77,7 +80,6 @@ def evaluate_model(model_id):
77
  relevant_docs = {q_id: [q_id] for q_id in queries}
78
 
79
  matryoshka_evaluators = []
80
- # Iterate over the different dimensions
81
  for dim in matryoshka_dimensions:
82
  ir_evaluator = InformationRetrievalEvaluator(
83
  queries=queries,
@@ -88,14 +90,11 @@ def evaluate_model(model_id):
88
  score_functions={"cosine": cos_sim},
89
  )
90
  matryoshka_evaluators.append(ir_evaluator)
91
-
92
- # Create a sequential evaluator
93
  evaluator = SequentialEvaluator(matryoshka_evaluators)
94
-
95
- # Evaluate the model
96
  results = evaluator(model)
97
 
98
- # Collect results for each dataset
99
  for dim in matryoshka_dimensions:
100
  key = f"dim_{dim}_cosine_ndcg@10"
101
  score = results[key] if key in results else None
@@ -104,26 +103,52 @@ def evaluate_model(model_id):
104
  "Dimension": dim,
105
  "Score": score
106
  })
107
-
 
 
 
 
108
  # Convert results to DataFrame for display
109
  result_df = pd.DataFrame(evaluation_results)
110
- return result_df
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  # Define the Gradio interface
113
  def display_results(model_name):
114
- # Evaluate model and return results
115
- result_df = evaluate_model(model_name)
116
- return result_df
117
 
118
- # Create the Gradio interface
119
  demo = gr.Interface(
120
  fn=display_results,
121
- inputs=gr.Textbox(label="Model ID"),
122
- outputs="dataframe",
123
- title="Model Evaluation with GPU Support",
124
- description="Enter a Hugging Face Sentence Transformer model ID to evaluate it across datasets, leveraging GPU if available."
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  )
126
 
127
- # Launch the Gradio app
128
  if __name__ == "__main__":
129
  demo.launch()
 
1
  import gradio as gr
2
  import spaces
3
  import torch
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
  from datasets import load_dataset, concatenate_datasets
7
  from sentence_transformers import SentenceTransformer
8
  from sentence_transformers.evaluation import InformationRetrievalEvaluator, SequentialEvaluator
9
  from sentence_transformers.util import cos_sim
 
10
 
11
  # Check for GPU support and configure appropriately
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ zero = torch.Tensor([0]).to(device)
14
  print(f"Device being used: {zero.device}")
15
 
16
+ @spaces.GPU
17
  def evaluate_model(model_id):
 
18
  model = SentenceTransformer(model_id, device=device)
 
 
19
  matryoshka_dimensions = [768, 512, 256, 128, 64]
20
+
21
  # Prepare datasets
22
  datasets_info = [
23
  {
 
26
  "split": "train",
27
  "size": 7000,
28
  "columns": ("question", "context"),
29
+ "sample_size": 500
30
  },
31
  {
32
+ "name": "MLQA Arabic (Long Context)",
33
  "dataset_id": "google/xtreme",
34
+ "subset": "MLQA.ar.ar",
35
  "split": "validation",
36
  "size": 500,
37
  "columns": ("question", "context"),
38
+ "sample_size": 500
39
  },
40
  {
41
+ "name": "ARCD (Short Context)",
42
+ "dataset_id": "hsseinmz/arcd",
43
  "split": "train",
44
  "size": None,
45
  "columns": ("question", "context"),
46
+ "sample_size": 500,
47
+ "last_rows": True # Flag to take the last 500 rows
48
  }
49
  ]
50
 
51
  evaluation_results = []
52
+ scores_by_dataset = {}
53
+
54
  for dataset_info in datasets_info:
55
  # Load the dataset with subset if available
56
  if "subset" in dataset_info:
57
  dataset = load_dataset(dataset_info["dataset_id"], dataset_info["subset"], split=dataset_info["split"])
58
  else:
59
  dataset = load_dataset(dataset_info["dataset_id"], split=dataset_info["split"])
60
+
61
+ # Take last 500 rows if specified
62
+ if dataset_info.get("last_rows"):
63
+ dataset = dataset.select(range(len(dataset) - dataset_info["sample_size"], len(dataset)))
64
+ else:
65
+ dataset = dataset.select(range(min(dataset_info["sample_size"], len(dataset))))
66
 
67
  # Rename columns
68
  dataset = dataset.rename_column(dataset_info["columns"][0], "anchor")
 
80
  relevant_docs = {q_id: [q_id] for q_id in queries}
81
 
82
  matryoshka_evaluators = []
 
83
  for dim in matryoshka_dimensions:
84
  ir_evaluator = InformationRetrievalEvaluator(
85
  queries=queries,
 
90
  score_functions={"cosine": cos_sim},
91
  )
92
  matryoshka_evaluators.append(ir_evaluator)
93
+
 
94
  evaluator = SequentialEvaluator(matryoshka_evaluators)
 
 
95
  results = evaluator(model)
96
 
97
+ scores = []
98
  for dim in matryoshka_dimensions:
99
  key = f"dim_{dim}_cosine_ndcg@10"
100
  score = results[key] if key in results else None
 
103
  "Dimension": dim,
104
  "Score": score
105
  })
106
+ scores.append(score)
107
+
108
+ # Store scores by dataset for bar chart creation
109
+ scores_by_dataset[dataset_info["name"]] = scores
110
+
111
  # Convert results to DataFrame for display
112
  result_df = pd.DataFrame(evaluation_results)
113
+
114
+ # Generate bar charts for each dataset
115
+ charts = []
116
+ for dataset_name, scores in scores_by_dataset.items():
117
+ fig, ax = plt.subplots()
118
+ ax.bar([str(dim) for dim in matryoshka_dimensions], scores)
119
+ ax.set_title(f"{dataset_name} Evaluation Scores", fontsize=16, color='darkblue')
120
+ ax.set_xlabel("Embedding Dimension", fontsize=12)
121
+ ax.set_ylabel("NDCG@10 Score", fontsize=12)
122
+ plt.tight_layout()
123
+ charts.append(fig)
124
+
125
+ return result_df, charts
126
 
127
  # Define the Gradio interface
128
  def display_results(model_name):
129
+ result_df, charts = evaluate_model(model_name)
130
+ return result_df, charts[0], charts[1], charts[2]
 
131
 
 
132
  demo = gr.Interface(
133
  fn=display_results,
134
+ inputs=gr.Textbox(label="Enter a Hugging Face Model ID", placeholder="e.g., sentence-transformers/all-MiniLM-L6-v2"),
135
+ outputs=[
136
+ gr.Dataframe(label="Evaluation Results"),
137
+ gr.Plot(label="Arabic Financial Dataset (Financial Evaluation)"),
138
+ gr.Plot(label="MLQA Arabic (Long Context Evaluation)"),
139
+ gr.Plot(label="ARCD (Short Context Evaluation)")
140
+ ],
141
+ title="Sentence Transformer Evaluation Dashboard",
142
+ description=(
143
+ "Evaluate your Sentence Transformer model across three datasets:\n"
144
+ "- **Arabic Financial Dataset** for financial data evaluation.\n"
145
+ "- **MLQA Arabic** for long context question-answer evaluation.\n"
146
+ "- **ARCD** for short context question-answer evaluation.\n"
147
+ "Results are displayed in a table and visualized with bar charts for each dataset."
148
+ ),
149
+ theme="default",
150
+ live=False
151
  )
152
 
 
153
  if __name__ == "__main__":
154
  demo.launch()