Spaces:
Runtime error
Runtime error
File size: 8,303 Bytes
a43664e db6e637 a43664e d607da0 a43664e d607da0 a43664e aa352fb a43664e d607da0 a43664e ceba453 a43664e 2f417d6 a43664e 2f417d6 a43664e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import streamlit as st
import pandas as pd
import numpy as np
import pymongo
from sentence_transformers import SentenceTransformer
import faiss
import pickle
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI", "mongodb://muhammadbinimran1001:[email protected]:27017,dsm-shard-00-01.inrzs.mongodb.net:27017,dsm-shard-00-02.inrzs.mongodb.net:27017/?ssl=true&replicaSet=atlas-nbg4er-shard-0&authSource=admin&retryWrites=true&w=majority")
# Connect to MongoDB
client = pymongo.MongoClient(MONGO_URI)
db = client['test']
users_collection = db['users']
jobs_collection = db['jobs']
courses_collection = db['courses']
# Load datasets
@st.cache_data
def load_datasets():
questions_df = pd.read_csv("Generated_Skill-Based_Questions.csv")
courses_df = pd.read_csv("coursera_course_dataset_v2_no_null.csv")
jobs_df = pd.read_csv("Updated_Job_Posting_Dataset.csv")
return questions_df, courses_df, jobs_df
questions_df, courses_df, jobs_df = load_datasets()
# Load precomputed resources
@st.cache_resource
def load_resources():
universal_model = SentenceTransformer("all-MiniLM-L6-v2")
with open("tfidf_vectorizer.pkl", "rb") as f: tfidf_vectorizer = pickle.load(f)
with open("skill_tfidf.pkl", "rb") as f: skill_tfidf = pickle.load(f)
with open("question_to_answer.pkl", "rb") as f: question_to_answer = pickle.load(f)
faiss_index = faiss.read_index("faiss_index.index")
with open("answer_embeddings.pkl", "rb") as f: answer_embeddings = pickle.load(f)
with open("course_similarity.pkl", "rb") as f: course_similarity = pickle.load(f)
with open("job_similarity.pkl", "rb") as f: job_similarity = pickle.load(f)
return universal_model, tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity
universal_model, tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity = load_resources()
# Evaluate response
def evaluate_response(skill, user_answer, question_idx):
if not user_answer or user_answer.lower() == "skip":
return skill, 0.0
user_embedding = universal_model.encode([user_answer])[0]
expected_embedding = answer_embeddings[question_idx]
score = np.dot(user_embedding, expected_embedding) / (np.linalg.norm(user_embedding) * np.linalg.norm(expected_embedding) + 1e-10) * 100
user_tfidf = tfidf_vectorizer.transform([user_answer]).toarray()[0]
skill_vec = skill_tfidf.get(skill.lower(), np.zeros_like(user_tfidf))
relevance = np.dot(user_tfidf, skill_vec) / (np.linalg.norm(user_tfidf) * np.linalg.norm(skill_vec) + 1e-10)
return skill, max(0, score * max(0.5, min(1.0, relevance)))
# Recommend courses
def recommend_courses(skills_to_improve, user_level, upgrade=False):
skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in skills_to_improve if skill in questions_df['Skill'].unique()]
if not skill_indices:
return []
similarities = course_similarity[skill_indices]
popularity = courses_df['popularity'].fillna(0.8).values
completion_rate = courses_df['completion_rate'].fillna(0.7).values
total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * popularity + 0.2 * completion_rate
target_level = 'Advanced' if upgrade else user_level
idx = np.argsort(-total_scores)[:5]
candidates = courses_df.iloc[idx]
filtered = candidates[candidates['level'].str.contains(target_level, case=False, na=False)]
return filtered[['course_title', 'Organization']].values.tolist()[:3] if not filtered.empty else candidates[['course_title', 'Organization']].values.tolist()[:3]
# Recommend jobs
def recommend_jobs(user_skills, user_level):
if jobs_df.empty:
return []
skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in user_skills if skill in questions_df['Skill'].unique()]
if not skill_indices:
return []
similarities = job_similarity[skill_indices]
total_scores = 0.5 * np.max(similarities, axis=0)
level_map = {'Beginner': 0, 'Intermediate': 1, 'Advanced': 2}
user_level_num = level_map.get(user_level, 1)
level_scores = jobs_df['level'].apply(lambda x: 1 - abs(level_map.get(x, 1) - user_level_num)/2).fillna(0.5)
location_pref = jobs_df['location'].apply(lambda x: 1.0 if x in ['Islamabad', 'Karachi'] else 0.7).fillna(0.7)
total_job_scores = total_scores + 0.2 * level_scores + 0.1 * location_pref
top_job_indices = np.argsort(-total_job_scores)[:5]
return [(jobs_df.iloc[i]['job_title'], jobs_df.iloc[i]['company_name'], jobs_df.iloc[i].get('location', 'Remote')) for i in top_job_indices]
# Streamlit UI
st.title("Skill Assessment and Recommendations")
# Simulate user signup and skill extraction
if 'user_skills' not in st.session_state:
st.session_state.user_skills = []
st.session_state.user_level = "Intermediate"
with st.form("signup_form"):
name = st.text_input("Name")
email = st.text_input("Email")
skills_input = st.text_area("Enter your skills (comma-separated)")
submit = st.form_submit_button("Sign Up")
if submit and name and email and skills_input:
st.session_state.user_skills = [s.strip() for s in skills_input.split(",") if s.strip()]
user_data = {
"name": name,
"email": email,
"skills": st.session_state.user_skills,
"createdAt": pd.Timestamp.now(),
"lastLogin": pd.Timestamp.now()
}
users_collection.insert_one(user_data)
st.success("User registered successfully!")
# Skill Assessment
if st.session_state.user_skills:
st.write("### Skill Assessment")
user_questions = []
for skill in st.session_state.user_skills:
skill_questions = questions_df[questions_df['Skill'] == skill]
if not skill_questions.empty:
user_questions.append(skill_questions.sample(1).iloc[0])
user_questions = pd.DataFrame(user_questions).reset_index(drop=True)
answers = {}
with st.form("assessment_form"):
for idx, row in user_questions.iterrows():
answers[row['Question']] = st.text_area(f"Question for {row['Skill']}: {row['Question']}", key=f"q_{idx}")
submit_assessment = st.form_submit_button("Submit Assessment")
if submit_assessment:
scores = {}
for idx, row in user_questions.iterrows():
question_idx = questions_df.index[questions_df['Question'] == row['Question']][0]
skill, score = evaluate_response(row['Skill'], answers.get(row['Question'], ""), question_idx)
scores[skill] = max(scores.get(skill, 0), score)
mean_score = np.mean(list(scores.values())) if scores else 50
dynamic_threshold = max(40, mean_score)
weak_skills = [skill for skill, score in scores.items() if score < dynamic_threshold]
st.session_state.scores = scores
st.session_state.weak_skills = weak_skills
st.session_state.mean_score = mean_score
# Update user scores in MongoDB
user = users_collection.find_one({"email": email})
if user:
users_collection.update_one(
{"_id": user["_id"]},
{"$set": {"skills_scores": scores}}
)
if 'scores' in st.session_state:
st.write("### Assessment Results")
for skill, score in st.session_state.scores.items():
st.write(f"{skill}: {score:.2f}%")
st.write(f"Mean Score: {st.session_state.mean_score:.2f}%")
st.write(f"Weak Skills: {', '.join(st.session_state.weak_skills)}")
# Recommendations
st.write("### Recommended Courses")
courses = recommend_courses(st.session_state.weak_skills or st.session_state.user_skills, st.session_state.user_level)
for course in courses:
st.write(f"- {course[0]} by {course[1]}")
st.write("### Recommended Jobs")
jobs = recommend_jobs(st.session_state.user_skills, st.session_state.user_level)
for job in jobs:
st.write(f"- {job[0]} at {job[1]} ({job[2]})")
# Run the app
if __name__ == "__main__":
st.set_page_config(layout="wide") |