Jatin Mehra commited on
Commit
0d7f003
·
1 Parent(s): 8d0a63e

Add Jupyter notebook for loading and analyzing RAG scores from CSV

Browse files
Files changed (1) hide show
  1. test_RAG.ipynb +295 -0
test_RAG.ipynb ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "81bb23ad",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "text/html": [
12
+ "<div>\n",
13
+ "<style scoped>\n",
14
+ " .dataframe tbody tr th:only-of-type {\n",
15
+ " vertical-align: middle;\n",
16
+ " }\n",
17
+ "\n",
18
+ " .dataframe tbody tr th {\n",
19
+ " vertical-align: top;\n",
20
+ " }\n",
21
+ "\n",
22
+ " .dataframe thead th {\n",
23
+ " text-align: right;\n",
24
+ " }\n",
25
+ "</style>\n",
26
+ "<table border=\"1\" class=\"dataframe\">\n",
27
+ " <thead>\n",
28
+ " <tr style=\"text-align: right;\">\n",
29
+ " <th></th>\n",
30
+ " <th>query</th>\n",
31
+ " <th>semantic_similarity</th>\n",
32
+ " <th>rougeL_f1</th>\n",
33
+ " <th>status</th>\n",
34
+ " </tr>\n",
35
+ " </thead>\n",
36
+ " <tbody>\n",
37
+ " <tr>\n",
38
+ " <th>0</th>\n",
39
+ " <td>What is the Berry Export Summary 2028 and what...</td>\n",
40
+ " <td>0.8763</td>\n",
41
+ " <td>0.3206</td>\n",
42
+ " <td>PASS</td>\n",
43
+ " </tr>\n",
44
+ " <tr>\n",
45
+ " <th>1</th>\n",
46
+ " <td>What are some of the benefits reported from ha...</td>\n",
47
+ " <td>0.9655</td>\n",
48
+ " <td>0.6016</td>\n",
49
+ " <td>PASS</td>\n",
50
+ " </tr>\n",
51
+ " <tr>\n",
52
+ " <th>2</th>\n",
53
+ " <td>What are the unique features of the Coolands f...</td>\n",
54
+ " <td>0.7942</td>\n",
55
+ " <td>0.2519</td>\n",
56
+ " <td>PASS</td>\n",
57
+ " </tr>\n",
58
+ " <tr>\n",
59
+ " <th>3</th>\n",
60
+ " <td>What is the main difference between the Nation...</td>\n",
61
+ " <td>0.9024</td>\n",
62
+ " <td>0.2597</td>\n",
63
+ " <td>PASS</td>\n",
64
+ " </tr>\n",
65
+ " <tr>\n",
66
+ " <th>4</th>\n",
67
+ " <td>How did Gunnar Nelson win the fight against Za...</td>\n",
68
+ " <td>0.8510</td>\n",
69
+ " <td>0.3101</td>\n",
70
+ " <td>PASS</td>\n",
71
+ " </tr>\n",
72
+ " <tr>\n",
73
+ " <th>5</th>\n",
74
+ " <td>What are some of the features of Fabiana Filip...</td>\n",
75
+ " <td>0.9099</td>\n",
76
+ " <td>0.2963</td>\n",
77
+ " <td>PASS</td>\n",
78
+ " </tr>\n",
79
+ " <tr>\n",
80
+ " <th>6</th>\n",
81
+ " <td>How did Dan Foley feel about his portrayal on ...</td>\n",
82
+ " <td>0.9170</td>\n",
83
+ " <td>0.4444</td>\n",
84
+ " <td>PASS</td>\n",
85
+ " </tr>\n",
86
+ " <tr>\n",
87
+ " <th>7</th>\n",
88
+ " <td>What is the reason for the closure of the comm...</td>\n",
89
+ " <td>0.8298</td>\n",
90
+ " <td>0.3636</td>\n",
91
+ " <td>PASS</td>\n",
92
+ " </tr>\n",
93
+ " <tr>\n",
94
+ " <th>8</th>\n",
95
+ " <td>What are the five love and relationship podcas...</td>\n",
96
+ " <td>0.7104</td>\n",
97
+ " <td>0.1860</td>\n",
98
+ " <td>FAIL</td>\n",
99
+ " </tr>\n",
100
+ " <tr>\n",
101
+ " <th>9</th>\n",
102
+ " <td>Which two teams dropped out of the Primal Ques...</td>\n",
103
+ " <td>0.9701</td>\n",
104
+ " <td>0.4258</td>\n",
105
+ " <td>PASS</td>\n",
106
+ " </tr>\n",
107
+ " </tbody>\n",
108
+ "</table>\n",
109
+ "</div>"
110
+ ],
111
+ "text/plain": [
112
+ " query semantic_similarity \\\n",
113
+ "0 What is the Berry Export Summary 2028 and what... 0.8763 \n",
114
+ "1 What are some of the benefits reported from ha... 0.9655 \n",
115
+ "2 What are the unique features of the Coolands f... 0.7942 \n",
116
+ "3 What is the main difference between the Nation... 0.9024 \n",
117
+ "4 How did Gunnar Nelson win the fight against Za... 0.8510 \n",
118
+ "5 What are some of the features of Fabiana Filip... 0.9099 \n",
119
+ "6 How did Dan Foley feel about his portrayal on ... 0.9170 \n",
120
+ "7 What is the reason for the closure of the comm... 0.8298 \n",
121
+ "8 What are the five love and relationship podcas... 0.7104 \n",
122
+ "9 Which two teams dropped out of the Primal Ques... 0.9701 \n",
123
+ "\n",
124
+ " rougeL_f1 status \n",
125
+ "0 0.3206 PASS \n",
126
+ "1 0.6016 PASS \n",
127
+ "2 0.2519 PASS \n",
128
+ "3 0.2597 PASS \n",
129
+ "4 0.3101 PASS \n",
130
+ "5 0.2963 PASS \n",
131
+ "6 0.4444 PASS \n",
132
+ "7 0.3636 PASS \n",
133
+ "8 0.1860 FAIL \n",
134
+ "9 0.4258 PASS "
135
+ ]
136
+ },
137
+ "execution_count": 1,
138
+ "metadata": {},
139
+ "output_type": "execute_result"
140
+ }
141
+ ],
142
+ "source": [
143
+ "# Load scores from CSV\n",
144
+ "\n",
145
+ "import pandas as pd\n",
146
+ "\n",
147
+ "df = pd.read_csv(\"rag_scores.csv\")\n",
148
+ "\n",
149
+ "df.head(10)"
150
+ ]
151
+ },
152
+ {
153
+ "cell_type": "code",
154
+ "execution_count": 2,
155
+ "id": "58e0482f",
156
+ "metadata": {},
157
+ "outputs": [
158
+ {
159
+ "data": {
160
+ "text/html": [
161
+ "<div>\n",
162
+ "<style scoped>\n",
163
+ " .dataframe tbody tr th:only-of-type {\n",
164
+ " vertical-align: middle;\n",
165
+ " }\n",
166
+ "\n",
167
+ " .dataframe tbody tr th {\n",
168
+ " vertical-align: top;\n",
169
+ " }\n",
170
+ "\n",
171
+ " .dataframe thead th {\n",
172
+ " text-align: right;\n",
173
+ " }\n",
174
+ "</style>\n",
175
+ "<table border=\"1\" class=\"dataframe\">\n",
176
+ " <thead>\n",
177
+ " <tr style=\"text-align: right;\">\n",
178
+ " <th></th>\n",
179
+ " <th>semantic_similarity</th>\n",
180
+ " <th>rougeL_f1</th>\n",
181
+ " </tr>\n",
182
+ " </thead>\n",
183
+ " <tbody>\n",
184
+ " <tr>\n",
185
+ " <th>count</th>\n",
186
+ " <td>75.000000</td>\n",
187
+ " <td>75.000000</td>\n",
188
+ " </tr>\n",
189
+ " <tr>\n",
190
+ " <th>mean</th>\n",
191
+ " <td>0.852692</td>\n",
192
+ " <td>0.395061</td>\n",
193
+ " </tr>\n",
194
+ " <tr>\n",
195
+ " <th>std</th>\n",
196
+ " <td>0.088759</td>\n",
197
+ " <td>0.216511</td>\n",
198
+ " </tr>\n",
199
+ " <tr>\n",
200
+ " <th>min</th>\n",
201
+ " <td>0.591500</td>\n",
202
+ " <td>0.098600</td>\n",
203
+ " </tr>\n",
204
+ " <tr>\n",
205
+ " <th>25%</th>\n",
206
+ " <td>0.794600</td>\n",
207
+ " <td>0.251600</td>\n",
208
+ " </tr>\n",
209
+ " <tr>\n",
210
+ " <th>50%</th>\n",
211
+ " <td>0.873200</td>\n",
212
+ " <td>0.325600</td>\n",
213
+ " </tr>\n",
214
+ " <tr>\n",
215
+ " <th>75%</th>\n",
216
+ " <td>0.918150</td>\n",
217
+ " <td>0.495100</td>\n",
218
+ " </tr>\n",
219
+ " <tr>\n",
220
+ " <th>max</th>\n",
221
+ " <td>1.000000</td>\n",
222
+ " <td>1.000000</td>\n",
223
+ " </tr>\n",
224
+ " </tbody>\n",
225
+ "</table>\n",
226
+ "</div>"
227
+ ],
228
+ "text/plain": [
229
+ " semantic_similarity rougeL_f1\n",
230
+ "count 75.000000 75.000000\n",
231
+ "mean 0.852692 0.395061\n",
232
+ "std 0.088759 0.216511\n",
233
+ "min 0.591500 0.098600\n",
234
+ "25% 0.794600 0.251600\n",
235
+ "50% 0.873200 0.325600\n",
236
+ "75% 0.918150 0.495100\n",
237
+ "max 1.000000 1.000000"
238
+ ]
239
+ },
240
+ "execution_count": 2,
241
+ "metadata": {},
242
+ "output_type": "execute_result"
243
+ }
244
+ ],
245
+ "source": [
246
+ "df.describe()"
247
+ ]
248
+ },
249
+ {
250
+ "cell_type": "code",
251
+ "execution_count": 3,
252
+ "id": "a9073d52",
253
+ "metadata": {},
254
+ "outputs": [
255
+ {
256
+ "data": {
257
+ "text/plain": [
258
+ "status\n",
259
+ "PASS 64\n",
260
+ "FAIL 11\n",
261
+ "Name: count, dtype: int64"
262
+ ]
263
+ },
264
+ "execution_count": 3,
265
+ "metadata": {},
266
+ "output_type": "execute_result"
267
+ }
268
+ ],
269
+ "source": [
270
+ "df['status'].value_counts()"
271
+ ]
272
+ }
273
+ ],
274
+ "metadata": {
275
+ "kernelspec": {
276
+ "display_name": ".venv",
277
+ "language": "python",
278
+ "name": "python3"
279
+ },
280
+ "language_info": {
281
+ "codemirror_mode": {
282
+ "name": "ipython",
283
+ "version": 3
284
+ },
285
+ "file_extension": ".py",
286
+ "mimetype": "text/x-python",
287
+ "name": "python",
288
+ "nbconvert_exporter": "python",
289
+ "pygments_lexer": "ipython3",
290
+ "version": "3.12.1"
291
+ }
292
+ },
293
+ "nbformat": 4,
294
+ "nbformat_minor": 5
295
+ }