iwashuman0405 commited on
Commit
464151b
·
verified ·
1 Parent(s): 8b8bcac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +262 -262
app.py CHANGED
@@ -1,262 +1,262 @@
1
- import torch
2
- import pandas as pd
3
- import numpy as np
4
- import gradio as gr
5
- from sklearn.metrics.pairwise import cosine_similarity
6
- from sentence_transformers import util, SentenceTransformer
7
- import ast
8
- import json
9
- import re
10
-
11
- # Load embeddings and data
12
- embeddings = torch.load("embeddings.pth") # shape: [377, 768]
13
- data_df = pd.read_csv("data.csv")
14
-
15
- # Load model once
16
- # model = SentenceTransformer("all-MiniLM-L6-v2")
17
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1",trust_remote_code=True)
18
-
19
- def extract_duration(text):
20
- match = re.search(r"\d+", str(text)) # look for the first number
21
- return int(match.group()) if match else 0
22
-
23
- type_mapping = {
24
- "A": "Ability & Aptitude",
25
- "B": "Biodata & Situational Judgement",
26
- "C": "Competencies",
27
- "D": "Development & 360",
28
- "E": "Assessment Exercises",
29
- "K": "Knowledge & Skills",
30
- "P": "Personality & Behavior",
31
- "S": "Simulations"
32
- }
33
-
34
- def decode_test_types(test_type_raw):
35
- try:
36
- test_type_list = ast.literal_eval(test_type_raw)
37
- return [type_mapping.get(code.strip(), code.strip()) for code in test_type_list]
38
- except Exception:
39
- return []
40
-
41
-
42
- def clean_query_text(text):
43
- replacements = {
44
- "Java Script": "JavaScript",
45
- "java script": "JavaScript",
46
- "Java script": "JavaScript"
47
- }
48
- for wrong, correct in replacements.items():
49
- text = text.replace(wrong, correct)
50
- return text
51
-
52
- def prepare_input(query):
53
- cleaned_query = clean_query_text(query)
54
- input_text = f"{cleaned_query}"
55
- return input_text.strip()
56
-
57
- def find_top_k(query: str, k: int = 5):
58
- query_str = prepare_input(query)
59
- query_vec = model.encode([query_str], normalize_embeddings=True)
60
-
61
- scores = util.cos_sim(query_vec, embeddings)[0].numpy()
62
- ranked_indices = np.argsort(-scores)
63
-
64
- results = []
65
- for idx in ranked_indices[:k]:
66
- item = data_df.iloc[idx]
67
- test_type_raw = item["test_types"]
68
- test_type_decoded = decode_test_types(test_type_raw)
69
-
70
- results.append({
71
- "url": item["url"],
72
- "adaptive_support": item["adaptive"],
73
- "description": item["description"],
74
- "duration": extract_duration(item["assessment_length"]),
75
- "remote_support": item["remote"],
76
- "test_type": test_type_decoded
77
- })
78
-
79
- # result = {
80
- # "name": item["name"],
81
- # "url": item["url"],
82
- # "duration": item["assessment_length"],
83
- # "remote": item["remote"],
84
- # "adaptive": item["adaptive"]
85
- # }
86
- # results.append(result)
87
-
88
- return results
89
-
90
- def health():
91
- return gr.JSON({"status": "healthy"})
92
-
93
- def recommend(query):
94
- recommended = find_top_k(query)
95
- return gr.JSON({"recommended_assessments": recommended})
96
-
97
- recommend_api = gr.Interface(fn=recommend, inputs=gr.Textbox(), outputs="json")
98
- health_api = gr.Interface(fn=health, inputs=[], outputs="json")
99
-
100
- # Gradio app with multiple endpoints
101
- demo = gr.TabbedInterface(
102
- interface_list=[recommend_api, health_api],
103
- tab_names=["recommend", "health"]
104
- )
105
-
106
- if __name__ == "__main__":
107
- demo.launch()
108
- # Gradio Interface
109
-
110
- # app = gr.Interface(
111
- # fn=recommend,
112
- # inputs=gr.Textbox(label="Job Description or Query"),
113
- # outputs="json",
114
- # examples=["Looking for java developer assessment", "Communication skills test"]
115
- # )
116
-
117
- # # Add `/health` route manually using FastAPI inside Gradio
118
- # app.launch(server_name="0.0.0.0", server_port=7860, share=False, show_error=True, inline=False)
119
- # with gr.Blocks() as demo:
120
- # gr.Markdown("### SHL Assessment Recommender")
121
-
122
- # query_input = gr.Textbox(label="Job Query", placeholder="e.g. JavaScript Developer")
123
- # duration_input = gr.Textbox(label="Assessment Duration (minutes)", placeholder="e.g. 30")
124
- # topk_input = gr.Slider(label="Top K Results", minimum=1, maximum=10, step=1, value=5)
125
- # output = gr.JSON(label="Top Matches")
126
-
127
- # submit_btn = gr.Button("Submit")
128
-
129
- # def process(query, duration, top_k):
130
- # return find_top_k(query, duration, top_k)
131
-
132
- # submit_btn.click(fn=process, inputs=[query_input, duration_input, topk_input], outputs=[output])
133
-
134
- # demo.launch()
135
-
136
- # def find_top_k(query_json, k=5):
137
- # query_str = prepare_input(query_json)
138
-
139
- # # Convert query to vector
140
- # query_vec = model.encode([query_str], normalize_embeddings=True)
141
-
142
- # # Cosine similarity with precomputed normalized embeddings
143
- # scores = util.cos_sim(query_vec, embeddings)[0].numpy()
144
- # ranked_indices = np.argsort(-scores)
145
-
146
- # results = []
147
- # for idx in ranked_indices[:k]:
148
- # item = data_df.iloc[idx]
149
- # result = {
150
- # "name": item["name"],
151
- # "url": item["url"],
152
- # "remote_testing": item["remote"],
153
- # "adaptive": item["adaptive"],
154
- # "duration": item["assessment_length"],
155
- # "test_type": item["test_types"],
156
- # }
157
- # results.append(result)
158
-
159
- # return results
160
-
161
- # # Gradio Interface
162
- # with gr.Blocks() as demo:
163
- # gr.Markdown("### RAG Gradio Demo with JSON Query")
164
-
165
- # json_input = gr.Textbox(label="JSON Query (as JSON string)")
166
- # output = gr.JSON(label="Top Matches from Data")
167
-
168
- # def process(json_input_str):
169
- # try:
170
- # query_json = json.loads(json_input_str)
171
- # results = find_top_k(query_json)
172
- # return results
173
- # except Exception as e:
174
- # return {"error": str(e)}
175
-
176
- # submit_btn = gr.Button("Submit")
177
- # submit_btn.click(fn=process, inputs=[json_input], outputs=[output])
178
-
179
- # demo.launch()
180
-
181
- # import torch
182
- # import pandas as pd
183
- # import numpy as np
184
- # import gradio as gr
185
- # from sklearn.metrics.pairwise import cosine_similarity
186
- # from sentence_transformers import util,SentenceTransformer
187
-
188
- # # Load embeddings and data
189
- # embeddings = torch.load("embeddings.pth") # shape: [377, 768]
190
- # data_df = pd.read_csv("data.csv")
191
-
192
- # def clean_query_text(text):
193
- # replacements = {
194
- # "Java Script": "JavaScript",
195
- # "java script": "JavaScript",
196
- # "Java script": "JavaScript"
197
- # }
198
- # for wrong, correct in replacements.items():
199
- # text = text.replace(wrong, correct)
200
- # return text
201
-
202
- # def prepare_input(data):
203
- # cleaned_query = clean_query_text(data.query)
204
- # input_text = f"{cleaned_query}. Candidate should complete assessment in {data.duration} minutes."
205
- # return input_text.strip()
206
-
207
- # def find_top_k(query_json, k=5):
208
- # query_str = prepare_input(query_json)
209
- # # Convert query to vector
210
- # from sentence_transformers import SentenceTransformer
211
- # model = SentenceTransformer("all-MiniLM-L6-v2")
212
- # query_vec = model.encode([query_str], normalize_embeddings=True)
213
- # scores = util.cos_sim(query_vec, embeddings)[0].numpy()
214
- # ranked_indices = np.argsort(-scores)
215
-
216
- # results = []
217
- # for idx in ranked_indices:
218
- # item = data_df.iloc[idx]
219
- # print(f"Matched: {item['name']} with duration {item['assessment_length']}")
220
-
221
- # result = {
222
- # "name": item["name"],
223
- # "url": item["url"],
224
- # "remote_testing": item["remote"],
225
- # "adaptive": item["adaptive"],
226
- # "duration": item['assessment_length'],
227
- # "test_type": item["test_types"],
228
- # }
229
- # results.append(result)
230
-
231
- # if len(results) >= top_k:
232
- # break
233
-
234
- # return results
235
-
236
-
237
- # # Compute similarity
238
- # # similarities = cosine_similarity(query_vec, embeddings.numpy())[0]
239
- # # top_indices = similarities.argsort()[-k:][::-1]
240
-
241
- # # results = data_df.iloc[top_indices].to_dict(orient="records")
242
- # # return results
243
-
244
- # with gr.Blocks() as demo:
245
- # gr.Markdown("### RAG Gradio Demo with JSON Query")
246
-
247
- # json_input = gr.Textbox(label="JSON Query (as string)")
248
- # output = gr.JSON(label="Top Matches from Data")
249
-
250
- # def process(json_input_str):
251
- # try:
252
- # import json
253
- # query_json = json.loads(json_input_str)
254
- # results = find_top_k(query_json)
255
- # return results
256
- # except Exception as e:
257
- # return {"error": str(e)}
258
-
259
- # submit_btn = gr.Button("Submit")
260
- # submit_btn.click(fn=process, inputs=[json_input], outputs=[output])
261
-
262
- # demo.launch()
 
1
+ import torch
2
+ import pandas as pd
3
+ import numpy as np
4
+ import gradio as gr
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ from sentence_transformers import util, SentenceTransformer
7
+ import ast
8
+ import json
9
+ import re
10
+
11
+ # Load embeddings and data
12
+ embeddings = torch.load("embeddings.pth",weights_only = False) # shape: [377, 768]
13
+ data_df = pd.read_csv("data.csv")
14
+
15
+ # Load model once
16
+ # model = SentenceTransformer("all-MiniLM-L6-v2")
17
+ model = SentenceTransformer("nomic-ai/nomic-embed-text-v1",trust_remote_code=True)
18
+
19
+ def extract_duration(text):
20
+ match = re.search(r"\d+", str(text)) # look for the first number
21
+ return int(match.group()) if match else 0
22
+
23
+ type_mapping = {
24
+ "A": "Ability & Aptitude",
25
+ "B": "Biodata & Situational Judgement",
26
+ "C": "Competencies",
27
+ "D": "Development & 360",
28
+ "E": "Assessment Exercises",
29
+ "K": "Knowledge & Skills",
30
+ "P": "Personality & Behavior",
31
+ "S": "Simulations"
32
+ }
33
+
34
+ def decode_test_types(test_type_raw):
35
+ try:
36
+ test_type_list = ast.literal_eval(test_type_raw)
37
+ return [type_mapping.get(code.strip(), code.strip()) for code in test_type_list]
38
+ except Exception:
39
+ return []
40
+
41
+
42
+ def clean_query_text(text):
43
+ replacements = {
44
+ "Java Script": "JavaScript",
45
+ "java script": "JavaScript",
46
+ "Java script": "JavaScript"
47
+ }
48
+ for wrong, correct in replacements.items():
49
+ text = text.replace(wrong, correct)
50
+ return text
51
+
52
+ def prepare_input(query):
53
+ cleaned_query = clean_query_text(query)
54
+ input_text = f"{cleaned_query}"
55
+ return input_text.strip()
56
+
57
+ def find_top_k(query: str, k: int = 5):
58
+ query_str = prepare_input(query)
59
+ query_vec = model.encode([query_str], normalize_embeddings=True)
60
+
61
+ scores = util.cos_sim(query_vec, embeddings)[0].numpy()
62
+ ranked_indices = np.argsort(-scores)
63
+
64
+ results = []
65
+ for idx in ranked_indices[:k]:
66
+ item = data_df.iloc[idx]
67
+ test_type_raw = item["test_types"]
68
+ test_type_decoded = decode_test_types(test_type_raw)
69
+
70
+ results.append({
71
+ "url": item["url"],
72
+ "adaptive_support": item["adaptive"],
73
+ "description": item["description"],
74
+ "duration": extract_duration(item["assessment_length"]),
75
+ "remote_support": item["remote"],
76
+ "test_type": test_type_decoded
77
+ })
78
+
79
+ # result = {
80
+ # "name": item["name"],
81
+ # "url": item["url"],
82
+ # "duration": item["assessment_length"],
83
+ # "remote": item["remote"],
84
+ # "adaptive": item["adaptive"]
85
+ # }
86
+ # results.append(result)
87
+
88
+ return results
89
+
90
+ def health():
91
+ return gr.JSON({"status": "healthy"})
92
+
93
+ def recommend(query):
94
+ recommended = find_top_k(query)
95
+ return gr.JSON({"recommended_assessments": recommended})
96
+
97
+ recommend_api = gr.Interface(fn=recommend, inputs=gr.Textbox(), outputs="json")
98
+ health_api = gr.Interface(fn=health, inputs=[], outputs="json")
99
+
100
+ # Gradio app with multiple endpoints
101
+ demo = gr.TabbedInterface(
102
+ interface_list=[recommend_api, health_api],
103
+ tab_names=["recommend", "health"]
104
+ )
105
+
106
+ if __name__ == "__main__":
107
+ demo.launch()
108
+ # Gradio Interface
109
+
110
+ # app = gr.Interface(
111
+ # fn=recommend,
112
+ # inputs=gr.Textbox(label="Job Description or Query"),
113
+ # outputs="json",
114
+ # examples=["Looking for java developer assessment", "Communication skills test"]
115
+ # )
116
+
117
+ # # Add `/health` route manually using FastAPI inside Gradio
118
+ # app.launch(server_name="0.0.0.0", server_port=7860, share=False, show_error=True, inline=False)
119
+ # with gr.Blocks() as demo:
120
+ # gr.Markdown("### SHL Assessment Recommender")
121
+
122
+ # query_input = gr.Textbox(label="Job Query", placeholder="e.g. JavaScript Developer")
123
+ # duration_input = gr.Textbox(label="Assessment Duration (minutes)", placeholder="e.g. 30")
124
+ # topk_input = gr.Slider(label="Top K Results", minimum=1, maximum=10, step=1, value=5)
125
+ # output = gr.JSON(label="Top Matches")
126
+
127
+ # submit_btn = gr.Button("Submit")
128
+
129
+ # def process(query, duration, top_k):
130
+ # return find_top_k(query, duration, top_k)
131
+
132
+ # submit_btn.click(fn=process, inputs=[query_input, duration_input, topk_input], outputs=[output])
133
+
134
+ # demo.launch()
135
+
136
+ # def find_top_k(query_json, k=5):
137
+ # query_str = prepare_input(query_json)
138
+
139
+ # # Convert query to vector
140
+ # query_vec = model.encode([query_str], normalize_embeddings=True)
141
+
142
+ # # Cosine similarity with precomputed normalized embeddings
143
+ # scores = util.cos_sim(query_vec, embeddings)[0].numpy()
144
+ # ranked_indices = np.argsort(-scores)
145
+
146
+ # results = []
147
+ # for idx in ranked_indices[:k]:
148
+ # item = data_df.iloc[idx]
149
+ # result = {
150
+ # "name": item["name"],
151
+ # "url": item["url"],
152
+ # "remote_testing": item["remote"],
153
+ # "adaptive": item["adaptive"],
154
+ # "duration": item["assessment_length"],
155
+ # "test_type": item["test_types"],
156
+ # }
157
+ # results.append(result)
158
+
159
+ # return results
160
+
161
+ # # Gradio Interface
162
+ # with gr.Blocks() as demo:
163
+ # gr.Markdown("### RAG Gradio Demo with JSON Query")
164
+
165
+ # json_input = gr.Textbox(label="JSON Query (as JSON string)")
166
+ # output = gr.JSON(label="Top Matches from Data")
167
+
168
+ # def process(json_input_str):
169
+ # try:
170
+ # query_json = json.loads(json_input_str)
171
+ # results = find_top_k(query_json)
172
+ # return results
173
+ # except Exception as e:
174
+ # return {"error": str(e)}
175
+
176
+ # submit_btn = gr.Button("Submit")
177
+ # submit_btn.click(fn=process, inputs=[json_input], outputs=[output])
178
+
179
+ # demo.launch()
180
+
181
+ # import torch
182
+ # import pandas as pd
183
+ # import numpy as np
184
+ # import gradio as gr
185
+ # from sklearn.metrics.pairwise import cosine_similarity
186
+ # from sentence_transformers import util,SentenceTransformer
187
+
188
+ # # Load embeddings and data
189
+ # embeddings = torch.load("embeddings.pth") # shape: [377, 768]
190
+ # data_df = pd.read_csv("data.csv")
191
+
192
+ # def clean_query_text(text):
193
+ # replacements = {
194
+ # "Java Script": "JavaScript",
195
+ # "java script": "JavaScript",
196
+ # "Java script": "JavaScript"
197
+ # }
198
+ # for wrong, correct in replacements.items():
199
+ # text = text.replace(wrong, correct)
200
+ # return text
201
+
202
+ # def prepare_input(data):
203
+ # cleaned_query = clean_query_text(data.query)
204
+ # input_text = f"{cleaned_query}. Candidate should complete assessment in {data.duration} minutes."
205
+ # return input_text.strip()
206
+
207
+ # def find_top_k(query_json, k=5):
208
+ # query_str = prepare_input(query_json)
209
+ # # Convert query to vector
210
+ # from sentence_transformers import SentenceTransformer
211
+ # model = SentenceTransformer("all-MiniLM-L6-v2")
212
+ # query_vec = model.encode([query_str], normalize_embeddings=True)
213
+ # scores = util.cos_sim(query_vec, embeddings)[0].numpy()
214
+ # ranked_indices = np.argsort(-scores)
215
+
216
+ # results = []
217
+ # for idx in ranked_indices:
218
+ # item = data_df.iloc[idx]
219
+ # print(f"Matched: {item['name']} with duration {item['assessment_length']}")
220
+
221
+ # result = {
222
+ # "name": item["name"],
223
+ # "url": item["url"],
224
+ # "remote_testing": item["remote"],
225
+ # "adaptive": item["adaptive"],
226
+ # "duration": item['assessment_length'],
227
+ # "test_type": item["test_types"],
228
+ # }
229
+ # results.append(result)
230
+
231
+ # if len(results) >= top_k:
232
+ # break
233
+
234
+ # return results
235
+
236
+
237
+ # # Compute similarity
238
+ # # similarities = cosine_similarity(query_vec, embeddings.numpy())[0]
239
+ # # top_indices = similarities.argsort()[-k:][::-1]
240
+
241
+ # # results = data_df.iloc[top_indices].to_dict(orient="records")
242
+ # # return results
243
+
244
+ # with gr.Blocks() as demo:
245
+ # gr.Markdown("### RAG Gradio Demo with JSON Query")
246
+
247
+ # json_input = gr.Textbox(label="JSON Query (as string)")
248
+ # output = gr.JSON(label="Top Matches from Data")
249
+
250
+ # def process(json_input_str):
251
+ # try:
252
+ # import json
253
+ # query_json = json.loads(json_input_str)
254
+ # results = find_top_k(query_json)
255
+ # return results
256
+ # except Exception as e:
257
+ # return {"error": str(e)}
258
+
259
+ # submit_btn = gr.Button("Submit")
260
+ # submit_btn.click(fn=process, inputs=[json_input], outputs=[output])
261
+
262
+ # demo.launch()