iwashuman0405 commited on
Commit
1a1949a
Β·
verified Β·
1 Parent(s): e4ca753

Upload 9 files

Browse files
Files changed (9) hide show
  1. .gitattributes +2 -35
  2. README.md +1 -12
  3. api.py +22 -0
  4. app.py +40 -0
  5. data.csv +0 -0
  6. embeddings.pth +3 -0
  7. evaluate.py +51 -0
  8. recommendation_engine.py +69 -0
  9. requirements.txt +11 -0
.gitattributes CHANGED
@@ -1,35 +1,2 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ nomic_model/* filter=lfs diff=lfs merge=lfs -text
2
+ embeddings.pth filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,12 +1 @@
1
- ---
2
- title: Rag App
3
- emoji: 🐨
4
- colorFrom: blue
5
- colorTo: indigo
6
- sdk: streamlit
7
- sdk_version: 1.44.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ "# SHL Assessment Recommender"
 
 
 
 
 
 
 
 
 
 
 
api.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from recommendation_engine import scrape_url, prepare_input, get_recommendations
4
+
5
+ app = FastAPI()
6
+
7
+ class QueryRequest(BaseModel):
8
+ query: str
9
+ duration: int
10
+ url: str = None
11
+
12
+ @app.get("/")
13
+ def root():
14
+ return {"message": "SHL Assessment Recommendation API is running."}
15
+
16
+
17
+ @app.post("/recommend")
18
+ def recommend(data: QueryRequest):
19
+ jd_text = scrape_url(data.url) if data.url else ""
20
+ input_text = prepare_input(data.query, data.duration, jd_text)
21
+ recommendations = get_recommendations(input_text, top_k=10, max_duration=data.duration)
22
+ return {"results": recommendations}
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import streamlit as st
3
+ from recommendation_engine import scrape_url, prepare_input, get_recommendations,traced_get_recommendations
4
+ from evaluate import evaluate
5
+ import json
6
+
7
+ st.title("SHL Assessment Recommender")
8
+
9
+ query = st.text_area("Enter job query")
10
+ duration = st.number_input("Max assessment duration (minutes)", min_value=5, max_value=120, value=40)
11
+ top_k = st.number_input("Number of result required", min_value=3, max_value=15, value=10)
12
+ url = st.text_input("Optional Job Description URL")
13
+
14
+ if st.button("Recommend Assessments"):
15
+ jd_text = scrape_url(url) if url else ""
16
+ query_text = prepare_input(query, duration, jd_text)
17
+ recommendations = traced_get_recommendations(query_text, top_k=10, max_duration=duration)
18
+ st.write("Query Input:", query_text)
19
+ st.subheader("Top Recommendations")
20
+ st.table(recommendations)
21
+
22
+ st.header("πŸ” Evaluation")
23
+
24
+ eval_json = st.text_area("Enter test queries as JSON array", height=300, value="""[
25
+ {
26
+ "query": "I am hiring for Java developers who can also collaborate effectively with my business teams. Looking for an assessment(s) that can be completed in 40 minutes.",
27
+ "duration": 40,
28
+ "url": "",
29
+ "relevant_assessments": ["Java Programming Test", "Team Collaboration Test"]
30
+ }
31
+ ]""")
32
+
33
+ if st.button("Run Evaluation"):
34
+ try:
35
+ test_queries = json.loads(eval_json)
36
+ evaluate(test_queries, k=3)
37
+ except Exception as e:
38
+ st.error(f"Error parsing input or running evaluation: {e}")
39
+
40
+
data.csv ADDED
The diff for this file is too large to render. See raw diff
 
embeddings.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b17237d1f2eb8b8fa8765c2dd87f8b18ed27ef4844067fb9898ce330bd8e5f5
3
+ size 1732204
evaluate.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from recommendation_engine import prepare_input, scrape_url, get_recommendations
2
+ import numpy as np
3
+ import streamlit as st
4
+
5
+ def precision_at_k(preds, relevant, k):
6
+ preds_k = preds[:k]
7
+ return sum([1 for p in preds_k if p in relevant]) / k
8
+
9
+ def recall_at_k(preds, relevant, k):
10
+ preds_k = preds[:k]
11
+ return sum([1 for p in preds_k if p in relevant]) / len(relevant)
12
+
13
+ def average_precision(preds, relevant, k):
14
+ ap = 0
15
+ num_relevant = 0
16
+ for i in range(min(k, len(preds))):
17
+ if preds[i] in relevant:
18
+ num_relevant += 1
19
+ ap += num_relevant / (i + 1)
20
+ return ap / min(len(relevant), k) if relevant else 0
21
+
22
+ def clean_names(name):
23
+ return name.replace("Java Script", "JavaScript")
24
+
25
+ def evaluate(test_queries, k=3):
26
+ recalls, maps = [], []
27
+
28
+ for item in test_queries:
29
+ jd_text = scrape_url(item["url"]) if item["url"] else ""
30
+ input_text = prepare_input(item["query"], item["duration"], jd_text)
31
+ recommendations = get_recommendations(input_text, top_k=k)
32
+
33
+ pred_names = [clean_names(rec["name"]) for rec in recommendations]
34
+ gt = [clean_names(g) for g in item["relevant_assessments"]]
35
+
36
+
37
+ r = recall_at_k(pred_names, gt, k)
38
+ ap = average_precision(pred_names, gt, k)
39
+
40
+ recalls.append(r)
41
+ maps.append(ap)
42
+
43
+ st.markdown(f"""
44
+ **Query:** {item['query']}
45
+ **Recall@{k}:** {r:.3f}
46
+ **AP@{k}:** {ap:.3f}
47
+ ---
48
+ """)
49
+
50
+ st.success(f"πŸ“Š Mean Recall@{k}: {np.mean(recalls):.3f}")
51
+ st.success(f"πŸ“Š MAP@{k}: {np.mean(maps):.3f}")
recommendation_engine.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # recommendation_engine.py
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+ from sentence_transformers import SentenceTransformer, util
6
+ import torch
7
+ import numpy as np
8
+ from langchain.callbacks.tracers import ConsoleCallbackHandler
9
+ from langsmith import traceable
10
+
11
+ model = SentenceTransformer("nomic-ai/nomic-embed-text-v1",trust_remote_code=True)
12
+
13
+ catalog = pd.read_csv("data.csv")
14
+ embeddings = torch.load("embeddings.pth")
15
+
16
+ handler = ConsoleCallbackHandler()
17
+
18
+ def scrape_url(url):
19
+ try:
20
+ page = requests.get(url)
21
+ soup = BeautifulSoup(page.text, "html.parser")
22
+ return soup.get_text(separator=' ')
23
+ except Exception as e:
24
+ return ""
25
+
26
+ def clean_query_text(text):
27
+ replacements = {
28
+ "Java Script": "JavaScript",
29
+ "java script": "JavaScript",
30
+ "Java script": "JavaScript"
31
+ }
32
+ for wrong, correct in replacements.items():
33
+ text = text.replace(wrong, correct)
34
+ return text
35
+
36
+ def prepare_input(query, duration, jd_text=""):
37
+ cleaned_query = clean_query_text(query)
38
+ input_text = f"{cleaned_query}. Candidate should complete assessment in {duration} minutes. {jd_text}"
39
+ return input_text.strip()
40
+
41
+ def get_recommendations(query_text, top_k=10,max_duration = None):
42
+ query_embedding = model.encode(query_text)
43
+ scores = util.cos_sim(query_embedding, embeddings)[0].numpy()
44
+ ranked_indices = np.argsort(-scores)
45
+
46
+ results = []
47
+ for idx in ranked_indices:
48
+ item = catalog.iloc[idx]
49
+ print(f"Matched: {item['name']} with duration {item['assessment_length']}")
50
+
51
+ result = {
52
+ "name": item["name"],
53
+ "url": item["url"],
54
+ "remote_testing": item["remote"],
55
+ "adaptive": item["adaptive"],
56
+ "duration": item['assessment_length'],
57
+ "test_type": item["test_types"],
58
+ }
59
+ results.append(result)
60
+
61
+ if len(results) >= top_k:
62
+ break
63
+
64
+ return results
65
+
66
+ @traceable(name="SHL Recommendation Trace")
67
+ def traced_get_recommendations(query_text, top_k=10, max_duration=None):
68
+ return get_recommendations(query_text, top_k, max_duration)
69
+
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ sentence-transformers
5
+ torch
6
+ requests
7
+ beautifulsoup
8
+ json
9
+ langchain
10
+ langsmith
11
+ pydantic