iwashuman0405 commited on
Commit
9548de1
·
verified ·
1 Parent(s): cad92ed

Update recommendation_engine.py

Browse files
Files changed (1) hide show
  1. recommendation_engine.py +68 -68
recommendation_engine.py CHANGED
@@ -1,69 +1,69 @@
1
- # recommendation_engine.py
2
- import requests
3
- from bs4 import BeautifulSoup
4
- import pandas as pd
5
- from sentence_transformers import SentenceTransformer, util
6
- import torch
7
- import numpy as np
8
- from langchain.callbacks.tracers import ConsoleCallbackHandler
9
- from langsmith import traceable
10
-
11
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1",trust_remote_code=True)
12
-
13
- catalog = pd.read_csv("data.csv")
14
- embeddings = torch.load("embeddings.pth")
15
-
16
- handler = ConsoleCallbackHandler()
17
-
18
- def scrape_url(url):
19
- try:
20
- page = requests.get(url)
21
- soup = BeautifulSoup(page.text, "html.parser")
22
- return soup.get_text(separator=' ')
23
- except Exception as e:
24
- return ""
25
-
26
- def clean_query_text(text):
27
- replacements = {
28
- "Java Script": "JavaScript",
29
- "java script": "JavaScript",
30
- "Java script": "JavaScript"
31
- }
32
- for wrong, correct in replacements.items():
33
- text = text.replace(wrong, correct)
34
- return text
35
-
36
- def prepare_input(query, duration, jd_text=""):
37
- cleaned_query = clean_query_text(query)
38
- input_text = f"{cleaned_query}. Candidate should complete assessment in {duration} minutes. {jd_text}"
39
- return input_text.strip()
40
-
41
- def get_recommendations(query_text, top_k=10,max_duration = None):
42
- query_embedding = model.encode(query_text)
43
- scores = util.cos_sim(query_embedding, embeddings)[0].numpy()
44
- ranked_indices = np.argsort(-scores)
45
-
46
- results = []
47
- for idx in ranked_indices:
48
- item = catalog.iloc[idx]
49
- print(f"Matched: {item['name']} with duration {item['assessment_length']}")
50
-
51
- result = {
52
- "name": item["name"],
53
- "url": item["url"],
54
- "remote_testing": item["remote"],
55
- "adaptive": item["adaptive"],
56
- "duration": item['assessment_length'],
57
- "test_type": item["test_types"],
58
- }
59
- results.append(result)
60
-
61
- if len(results) >= top_k:
62
- break
63
-
64
- return results
65
-
66
- @traceable(name="SHL Recommendation Trace")
67
- def traced_get_recommendations(query_text, top_k=10, max_duration=None):
68
- return get_recommendations(query_text, top_k, max_duration)
69
 
 
1
+ # recommendation_engine.py
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+ from sentence_transformers import SentenceTransformer, util
6
+ import torch
7
+ import numpy as np
8
+ from langchain.callbacks.tracers import ConsoleCallbackHandler
9
+ from langsmith import traceable
10
+
11
+ model = SentenceTransformer("nomic-ai/nomic-embed-text-v1",trust_remote_code=True)
12
+
13
+ catalog = pd.read_csv("data.csv")
14
+ embeddings = torch.load("embeddings.pth",weights_only=False)
15
+
16
+ handler = ConsoleCallbackHandler()
17
+
18
+ def scrape_url(url):
19
+ try:
20
+ page = requests.get(url)
21
+ soup = BeautifulSoup(page.text, "html.parser")
22
+ return soup.get_text(separator=' ')
23
+ except Exception as e:
24
+ return ""
25
+
26
+ def clean_query_text(text):
27
+ replacements = {
28
+ "Java Script": "JavaScript",
29
+ "java script": "JavaScript",
30
+ "Java script": "JavaScript"
31
+ }
32
+ for wrong, correct in replacements.items():
33
+ text = text.replace(wrong, correct)
34
+ return text
35
+
36
+ def prepare_input(query, duration, jd_text=""):
37
+ cleaned_query = clean_query_text(query)
38
+ input_text = f"{cleaned_query}. Candidate should complete assessment in {duration} minutes. {jd_text}"
39
+ return input_text.strip()
40
+
41
+ def get_recommendations(query_text, top_k=10,max_duration = None):
42
+ query_embedding = model.encode(query_text)
43
+ scores = util.cos_sim(query_embedding, embeddings)[0].numpy()
44
+ ranked_indices = np.argsort(-scores)
45
+
46
+ results = []
47
+ for idx in ranked_indices:
48
+ item = catalog.iloc[idx]
49
+ print(f"Matched: {item['name']} with duration {item['assessment_length']}")
50
+
51
+ result = {
52
+ "name": item["name"],
53
+ "url": item["url"],
54
+ "remote_testing": item["remote"],
55
+ "adaptive": item["adaptive"],
56
+ "duration": item['assessment_length'],
57
+ "test_type": item["test_types"],
58
+ }
59
+ results.append(result)
60
+
61
+ if len(results) >= top_k:
62
+ break
63
+
64
+ return results
65
+
66
+ @traceable(name="SHL Recommendation Trace")
67
+ def traced_get_recommendations(query_text, top_k=10, max_duration=None):
68
+ return get_recommendations(query_text, top_k, max_duration)
69