import streamlit as st import pandas as pd import numpy as np import pymongo from sentence_transformers import SentenceTransformer import faiss import pickle import os from dotenv import load_dotenv # Load environment variables load_dotenv() MONGO_URI = os.getenv("MONGO_URI", "mongodb://muhammadbinimran1001:1BjkVxy6khxEm845@dsm-shard-00-00.inrzs.mongodb.net:27017,dsm-shard-00-01.inrzs.mongodb.net:27017,dsm-shard-00-02.inrzs.mongodb.net:27017/?ssl=true&replicaSet=atlas-nbg4er-shard-0&authSource=admin&retryWrites=true&w=majority") # Connect to MongoDB client = pymongo.MongoClient(MONGO_URI) db = client['test'] users_collection = db['users'] jobs_collection = db['jobs'] courses_collection = db['courses'] # Load datasets @st.cache_data def load_datasets(): questions_df = pd.read_csv("Generated_Skill-Based_Questions.csv") courses_df = pd.read_csv("coursera_course_dataset_v2_no_null.csv") jobs_df = pd.read_csv("Updated_Job_Posting_Dataset.csv") return questions_df, courses_df, jobs_df questions_df, courses_df, jobs_df = load_datasets() # Load precomputed resources @st.cache_resource def load_resources(): universal_model = SentenceTransformer("all-MiniLM-L6-v2") with open("tfidf_vectorizer.pkl", "rb") as f: tfidf_vectorizer = pickle.load(f) with open("skill_tfidf.pkl", "rb") as f: skill_tfidf = pickle.load(f) with open("question_to_answer.pkl", "rb") as f: question_to_answer = pickle.load(f) faiss_index = faiss.read_index("faiss_index.index") with open("answer_embeddings.pkl", "rb") as f: answer_embeddings = pickle.load(f) with open("course_similarity.pkl", "rb") as f: course_similarity = pickle.load(f) with open("job_similarity.pkl", "rb") as f: job_similarity = pickle.load(f) return universal_model, tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity universal_model, tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity = load_resources() # Evaluate response def evaluate_response(skill, user_answer, question_idx): if not user_answer or user_answer.lower() == "skip": return skill, 0.0 user_embedding = universal_model.encode([user_answer])[0] expected_embedding = answer_embeddings[question_idx] score = np.dot(user_embedding, expected_embedding) / (np.linalg.norm(user_embedding) * np.linalg.norm(expected_embedding) + 1e-10) * 100 user_tfidf = tfidf_vectorizer.transform([user_answer]).toarray()[0] skill_vec = skill_tfidf.get(skill.lower(), np.zeros_like(user_tfidf)) relevance = np.dot(user_tfidf, skill_vec) / (np.linalg.norm(user_tfidf) * np.linalg.norm(skill_vec) + 1e-10) return skill, max(0, score * max(0.5, min(1.0, relevance))) # Recommend courses def recommend_courses(skills_to_improve, user_level, upgrade=False): skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in skills_to_improve if skill in questions_df['Skill'].unique()] if not skill_indices: return [] similarities = course_similarity[skill_indices] popularity = courses_df['popularity'].fillna(0.8).values completion_rate = courses_df['completion_rate'].fillna(0.7).values total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * popularity + 0.2 * completion_rate target_level = 'Advanced' if upgrade else user_level idx = np.argsort(-total_scores)[:5] candidates = courses_df.iloc[idx] filtered = candidates[candidates['level'].str.contains(target_level, case=False, na=False)] return filtered[['course_title', 'Organization']].values.tolist()[:3] if not filtered.empty else candidates[['course_title', 'Organization']].values.tolist()[:3] # Recommend jobs def recommend_jobs(user_skills, user_level): if jobs_df.empty: return [] skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in user_skills if skill in questions_df['Skill'].unique()] if not skill_indices: return [] similarities = job_similarity[skill_indices] total_scores = 0.5 * np.max(similarities, axis=0) level_map = {'Beginner': 0, 'Intermediate': 1, 'Advanced': 2} user_level_num = level_map.get(user_level, 1) level_scores = jobs_df['level'].apply(lambda x: 1 - abs(level_map.get(x, 1) - user_level_num)/2).fillna(0.5) location_pref = jobs_df['location'].apply(lambda x: 1.0 if x in ['Islamabad', 'Karachi'] else 0.7).fillna(0.7) total_job_scores = total_scores + 0.2 * level_scores + 0.1 * location_pref top_job_indices = np.argsort(-total_job_scores)[:5] return [(jobs_df.iloc[i]['job_title'], jobs_df.iloc[i]['company_name'], jobs_df.iloc[i].get('location', 'Remote')) for i in top_job_indices] # Streamlit UI st.title("Skill Assessment and Recommendations") # Simulate user signup and skill extraction if 'user_skills' not in st.session_state: st.session_state.user_skills = [] st.session_state.user_level = "Intermediate" with st.form("signup_form"): name = st.text_input("Name") email = st.text_input("Email") skills_input = st.text_area("Enter your skills (comma-separated)") submit = st.form_submit_button("Sign Up") if submit and name and email and skills_input: st.session_state.user_skills = [s.strip() for s in skills_input.split(",") if s.strip()] user_data = { "name": name, "email": email, "skills": st.session_state.user_skills, "createdAt": pd.Timestamp.now(), "lastLogin": pd.Timestamp.now() } users_collection.insert_one(user_data) st.success("User registered successfully!") # Skill Assessment if st.session_state.user_skills: st.write("### Skill Assessment") user_questions = [] for skill in st.session_state.user_skills: skill_questions = questions_df[questions_df['Skill'] == skill] if not skill_questions.empty: user_questions.append(skill_questions.sample(1).iloc[0]) user_questions = pd.DataFrame(user_questions).reset_index(drop=True) answers = {} with st.form("assessment_form"): for idx, row in user_questions.iterrows(): answers[row['Question']] = st.text_area(f"Question for {row['Skill']}: {row['Question']}", key=f"q_{idx}") submit_assessment = st.form_submit_button("Submit Assessment") if submit_assessment: scores = {} for idx, row in user_questions.iterrows(): question_idx = questions_df.index[questions_df['Question'] == row['Question']][0] skill, score = evaluate_response(row['Skill'], answers.get(row['Question'], ""), question_idx) scores[skill] = max(scores.get(skill, 0), score) mean_score = np.mean(list(scores.values())) if scores else 50 dynamic_threshold = max(40, mean_score) weak_skills = [skill for skill, score in scores.items() if score < dynamic_threshold] st.session_state.scores = scores st.session_state.weak_skills = weak_skills st.session_state.mean_score = mean_score # Update user scores in MongoDB user = users_collection.find_one({"email": email}) if user: users_collection.update_one( {"_id": user["_id"]}, {"$set": {"skills_scores": scores}} ) if 'scores' in st.session_state: st.write("### Assessment Results") for skill, score in st.session_state.scores.items(): st.write(f"{skill}: {score:.2f}%") st.write(f"Mean Score: {st.session_state.mean_score:.2f}%") st.write(f"Weak Skills: {', '.join(st.session_state.weak_skills)}") # Recommendations st.write("### Recommended Courses") courses = recommend_courses(st.session_state.weak_skills or st.session_state.user_skills, st.session_state.user_level) for course in courses: st.write(f"- {course[0]} by {course[1]}") st.write("### Recommended Jobs") jobs = recommend_jobs(st.session_state.user_skills, st.session_state.user_level) for job in jobs: st.write(f"- {job[0]} at {job[1]} ({job[2]})") # Run the app if __name__ == "__main__": st.set_page_config(layout="wide")