Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import pymongo | |
from sentence_transformers import SentenceTransformer | |
import faiss | |
import pickle | |
import os | |
from dotenv import load_dotenv | |
# Load environment variables | |
load_dotenv() | |
MONGO_URI = os.getenv("MONGO_URI", "mongodb://muhammadbinimran1001:[email protected]:27017,dsm-shard-00-01.inrzs.mongodb.net:27017,dsm-shard-00-02.inrzs.mongodb.net:27017/?ssl=true&replicaSet=atlas-nbg4er-shard-0&authSource=admin&retryWrites=true&w=majority") | |
# Connect to MongoDB | |
client = pymongo.MongoClient(MONGO_URI) | |
db = client['test'] | |
users_collection = db['users'] | |
jobs_collection = db['jobs'] | |
courses_collection = db['courses'] | |
# Load datasets | |
def load_datasets(): | |
questions_df = pd.read_csv("Generated_Skill-Based_Questions.csv") | |
courses_df = pd.read_csv("coursera_course_dataset_v2_no_null.csv") | |
jobs_df = pd.read_csv("Updated_Job_Posting_Dataset.csv") | |
return questions_df, courses_df, jobs_df | |
questions_df, courses_df, jobs_df = load_datasets() | |
# Load precomputed resources | |
def load_resources(): | |
universal_model = SentenceTransformer("all-MiniLM-L6-v2") | |
with open("tfidf_vectorizer.pkl", "rb") as f: tfidf_vectorizer = pickle.load(f) | |
with open("skill_tfidf.pkl", "rb") as f: skill_tfidf = pickle.load(f) | |
with open("question_to_answer.pkl", "rb") as f: question_to_answer = pickle.load(f) | |
faiss_index = faiss.read_index("faiss_index.index") | |
with open("answer_embeddings.pkl", "rb") as f: answer_embeddings = pickle.load(f) | |
with open("course_similarity.pkl", "rb") as f: course_similarity = pickle.load(f) | |
with open("job_similarity.pkl", "rb") as f: job_similarity = pickle.load(f) | |
return universal_model, tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity | |
universal_model, tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity = load_resources() | |
# Evaluate response | |
def evaluate_response(skill, user_answer, question_idx): | |
if not user_answer or user_answer.lower() == "skip": | |
return skill, 0.0 | |
user_embedding = universal_model.encode([user_answer])[0] | |
expected_embedding = answer_embeddings[question_idx] | |
score = np.dot(user_embedding, expected_embedding) / (np.linalg.norm(user_embedding) * np.linalg.norm(expected_embedding) + 1e-10) * 100 | |
user_tfidf = tfidf_vectorizer.transform([user_answer]).toarray()[0] | |
skill_vec = skill_tfidf.get(skill.lower(), np.zeros_like(user_tfidf)) | |
relevance = np.dot(user_tfidf, skill_vec) / (np.linalg.norm(user_tfidf) * np.linalg.norm(skill_vec) + 1e-10) | |
return skill, max(0, score * max(0.5, min(1.0, relevance))) | |
# Recommend courses | |
def recommend_courses(skills_to_improve, user_level, upgrade=False): | |
skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in skills_to_improve if skill in questions_df['Skill'].unique()] | |
if not skill_indices: | |
return [] | |
similarities = course_similarity[skill_indices] | |
popularity = courses_df['popularity'].fillna(0.8).values | |
completion_rate = courses_df['completion_rate'].fillna(0.7).values | |
total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * popularity + 0.2 * completion_rate | |
target_level = 'Advanced' if upgrade else user_level | |
idx = np.argsort(-total_scores)[:5] | |
candidates = courses_df.iloc[idx] | |
filtered = candidates[candidates['level'].str.contains(target_level, case=False, na=False)] | |
return filtered[['course_title', 'Organization']].values.tolist()[:3] if not filtered.empty else candidates[['course_title', 'Organization']].values.tolist()[:3] | |
# Recommend jobs | |
def recommend_jobs(user_skills, user_level): | |
if jobs_df.empty: | |
return [] | |
skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in user_skills if skill in questions_df['Skill'].unique()] | |
if not skill_indices: | |
return [] | |
similarities = job_similarity[skill_indices] | |
total_scores = 0.5 * np.max(similarities, axis=0) | |
level_map = {'Beginner': 0, 'Intermediate': 1, 'Advanced': 2} | |
user_level_num = level_map.get(user_level, 1) | |
level_scores = jobs_df['level'].apply(lambda x: 1 - abs(level_map.get(x, 1) - user_level_num)/2).fillna(0.5) | |
location_pref = jobs_df['location'].apply(lambda x: 1.0 if x in ['Islamabad', 'Karachi'] else 0.7).fillna(0.7) | |
total_job_scores = total_scores + 0.2 * level_scores + 0.1 * location_pref | |
top_job_indices = np.argsort(-total_job_scores)[:5] | |
return [(jobs_df.iloc[i]['job_title'], jobs_df.iloc[i]['company_name'], jobs_df.iloc[i].get('location', 'Remote')) for i in top_job_indices] | |
# Streamlit UI | |
st.title("Skill Assessment and Recommendations") | |
# Simulate user signup and skill extraction | |
if 'user_skills' not in st.session_state: | |
st.session_state.user_skills = [] | |
st.session_state.user_level = "Intermediate" | |
with st.form("signup_form"): | |
name = st.text_input("Name") | |
email = st.text_input("Email") | |
skills_input = st.text_area("Enter your skills (comma-separated)") | |
submit = st.form_submit_button("Sign Up") | |
if submit and name and email and skills_input: | |
st.session_state.user_skills = [s.strip() for s in skills_input.split(",") if s.strip()] | |
user_data = { | |
"name": name, | |
"email": email, | |
"skills": st.session_state.user_skills, | |
"createdAt": pd.Timestamp.now(), | |
"lastLogin": pd.Timestamp.now() | |
} | |
users_collection.insert_one(user_data) | |
st.success("User registered successfully!") | |
# Skill Assessment | |
if st.session_state.user_skills: | |
st.write("### Skill Assessment") | |
user_questions = [] | |
for skill in st.session_state.user_skills: | |
skill_questions = questions_df[questions_df['Skill'] == skill] | |
if not skill_questions.empty: | |
user_questions.append(skill_questions.sample(1).iloc[0]) | |
user_questions = pd.DataFrame(user_questions).reset_index(drop=True) | |
answers = {} | |
with st.form("assessment_form"): | |
for idx, row in user_questions.iterrows(): | |
answers[row['Question']] = st.text_area(f"Question for {row['Skill']}: {row['Question']}", key=f"q_{idx}") | |
submit_assessment = st.form_submit_button("Submit Assessment") | |
if submit_assessment: | |
scores = {} | |
for idx, row in user_questions.iterrows(): | |
question_idx = questions_df.index[questions_df['Question'] == row['Question']][0] | |
skill, score = evaluate_response(row['Skill'], answers.get(row['Question'], ""), question_idx) | |
scores[skill] = max(scores.get(skill, 0), score) | |
mean_score = np.mean(list(scores.values())) if scores else 50 | |
dynamic_threshold = max(40, mean_score) | |
weak_skills = [skill for skill, score in scores.items() if score < dynamic_threshold] | |
st.session_state.scores = scores | |
st.session_state.weak_skills = weak_skills | |
st.session_state.mean_score = mean_score | |
# Update user scores in MongoDB | |
user = users_collection.find_one({"email": email}) | |
if user: | |
users_collection.update_one( | |
{"_id": user["_id"]}, | |
{"$set": {"skills_scores": scores}} | |
) | |
if 'scores' in st.session_state: | |
st.write("### Assessment Results") | |
for skill, score in st.session_state.scores.items(): | |
st.write(f"{skill}: {score:.2f}%") | |
st.write(f"Mean Score: {st.session_state.mean_score:.2f}%") | |
st.write(f"Weak Skills: {', '.join(st.session_state.weak_skills)}") | |
# Recommendations | |
st.write("### Recommended Courses") | |
courses = recommend_courses(st.session_state.weak_skills or st.session_state.user_skills, st.session_state.user_level) | |
for course in courses: | |
st.write(f"- {course[0]} by {course[1]}") | |
st.write("### Recommended Jobs") | |
jobs = recommend_jobs(st.session_state.user_skills, st.session_state.user_level) | |
for job in jobs: | |
st.write(f"- {job[0]} at {job[1]} ({job[2]})") | |
# Run the app | |
if __name__ == "__main__": | |
st.set_page_config(layout="wide") |