Muhammad541's picture
Update app.py
a43664e verified
raw
history blame
8.3 kB
import streamlit as st
import pandas as pd
import numpy as np
import pymongo
from sentence_transformers import SentenceTransformer
import faiss
import pickle
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI", "mongodb://muhammadbinimran1001:[email protected]:27017,dsm-shard-00-01.inrzs.mongodb.net:27017,dsm-shard-00-02.inrzs.mongodb.net:27017/?ssl=true&replicaSet=atlas-nbg4er-shard-0&authSource=admin&retryWrites=true&w=majority")
# Connect to MongoDB
client = pymongo.MongoClient(MONGO_URI)
db = client['test']
users_collection = db['users']
jobs_collection = db['jobs']
courses_collection = db['courses']
# Load datasets
@st.cache_data
def load_datasets():
questions_df = pd.read_csv("Generated_Skill-Based_Questions.csv")
courses_df = pd.read_csv("coursera_course_dataset_v2_no_null.csv")
jobs_df = pd.read_csv("Updated_Job_Posting_Dataset.csv")
return questions_df, courses_df, jobs_df
questions_df, courses_df, jobs_df = load_datasets()
# Load precomputed resources
@st.cache_resource
def load_resources():
universal_model = SentenceTransformer("all-MiniLM-L6-v2")
with open("tfidf_vectorizer.pkl", "rb") as f: tfidf_vectorizer = pickle.load(f)
with open("skill_tfidf.pkl", "rb") as f: skill_tfidf = pickle.load(f)
with open("question_to_answer.pkl", "rb") as f: question_to_answer = pickle.load(f)
faiss_index = faiss.read_index("faiss_index.index")
with open("answer_embeddings.pkl", "rb") as f: answer_embeddings = pickle.load(f)
with open("course_similarity.pkl", "rb") as f: course_similarity = pickle.load(f)
with open("job_similarity.pkl", "rb") as f: job_similarity = pickle.load(f)
return universal_model, tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity
universal_model, tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity = load_resources()
# Evaluate response
def evaluate_response(skill, user_answer, question_idx):
if not user_answer or user_answer.lower() == "skip":
return skill, 0.0
user_embedding = universal_model.encode([user_answer])[0]
expected_embedding = answer_embeddings[question_idx]
score = np.dot(user_embedding, expected_embedding) / (np.linalg.norm(user_embedding) * np.linalg.norm(expected_embedding) + 1e-10) * 100
user_tfidf = tfidf_vectorizer.transform([user_answer]).toarray()[0]
skill_vec = skill_tfidf.get(skill.lower(), np.zeros_like(user_tfidf))
relevance = np.dot(user_tfidf, skill_vec) / (np.linalg.norm(user_tfidf) * np.linalg.norm(skill_vec) + 1e-10)
return skill, max(0, score * max(0.5, min(1.0, relevance)))
# Recommend courses
def recommend_courses(skills_to_improve, user_level, upgrade=False):
skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in skills_to_improve if skill in questions_df['Skill'].unique()]
if not skill_indices:
return []
similarities = course_similarity[skill_indices]
popularity = courses_df['popularity'].fillna(0.8).values
completion_rate = courses_df['completion_rate'].fillna(0.7).values
total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * popularity + 0.2 * completion_rate
target_level = 'Advanced' if upgrade else user_level
idx = np.argsort(-total_scores)[:5]
candidates = courses_df.iloc[idx]
filtered = candidates[candidates['level'].str.contains(target_level, case=False, na=False)]
return filtered[['course_title', 'Organization']].values.tolist()[:3] if not filtered.empty else candidates[['course_title', 'Organization']].values.tolist()[:3]
# Recommend jobs
def recommend_jobs(user_skills, user_level):
if jobs_df.empty:
return []
skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in user_skills if skill in questions_df['Skill'].unique()]
if not skill_indices:
return []
similarities = job_similarity[skill_indices]
total_scores = 0.5 * np.max(similarities, axis=0)
level_map = {'Beginner': 0, 'Intermediate': 1, 'Advanced': 2}
user_level_num = level_map.get(user_level, 1)
level_scores = jobs_df['level'].apply(lambda x: 1 - abs(level_map.get(x, 1) - user_level_num)/2).fillna(0.5)
location_pref = jobs_df['location'].apply(lambda x: 1.0 if x in ['Islamabad', 'Karachi'] else 0.7).fillna(0.7)
total_job_scores = total_scores + 0.2 * level_scores + 0.1 * location_pref
top_job_indices = np.argsort(-total_job_scores)[:5]
return [(jobs_df.iloc[i]['job_title'], jobs_df.iloc[i]['company_name'], jobs_df.iloc[i].get('location', 'Remote')) for i in top_job_indices]
# Streamlit UI
st.title("Skill Assessment and Recommendations")
# Simulate user signup and skill extraction
if 'user_skills' not in st.session_state:
st.session_state.user_skills = []
st.session_state.user_level = "Intermediate"
with st.form("signup_form"):
name = st.text_input("Name")
email = st.text_input("Email")
skills_input = st.text_area("Enter your skills (comma-separated)")
submit = st.form_submit_button("Sign Up")
if submit and name and email and skills_input:
st.session_state.user_skills = [s.strip() for s in skills_input.split(",") if s.strip()]
user_data = {
"name": name,
"email": email,
"skills": st.session_state.user_skills,
"createdAt": pd.Timestamp.now(),
"lastLogin": pd.Timestamp.now()
}
users_collection.insert_one(user_data)
st.success("User registered successfully!")
# Skill Assessment
if st.session_state.user_skills:
st.write("### Skill Assessment")
user_questions = []
for skill in st.session_state.user_skills:
skill_questions = questions_df[questions_df['Skill'] == skill]
if not skill_questions.empty:
user_questions.append(skill_questions.sample(1).iloc[0])
user_questions = pd.DataFrame(user_questions).reset_index(drop=True)
answers = {}
with st.form("assessment_form"):
for idx, row in user_questions.iterrows():
answers[row['Question']] = st.text_area(f"Question for {row['Skill']}: {row['Question']}", key=f"q_{idx}")
submit_assessment = st.form_submit_button("Submit Assessment")
if submit_assessment:
scores = {}
for idx, row in user_questions.iterrows():
question_idx = questions_df.index[questions_df['Question'] == row['Question']][0]
skill, score = evaluate_response(row['Skill'], answers.get(row['Question'], ""), question_idx)
scores[skill] = max(scores.get(skill, 0), score)
mean_score = np.mean(list(scores.values())) if scores else 50
dynamic_threshold = max(40, mean_score)
weak_skills = [skill for skill, score in scores.items() if score < dynamic_threshold]
st.session_state.scores = scores
st.session_state.weak_skills = weak_skills
st.session_state.mean_score = mean_score
# Update user scores in MongoDB
user = users_collection.find_one({"email": email})
if user:
users_collection.update_one(
{"_id": user["_id"]},
{"$set": {"skills_scores": scores}}
)
if 'scores' in st.session_state:
st.write("### Assessment Results")
for skill, score in st.session_state.scores.items():
st.write(f"{skill}: {score:.2f}%")
st.write(f"Mean Score: {st.session_state.mean_score:.2f}%")
st.write(f"Weak Skills: {', '.join(st.session_state.weak_skills)}")
# Recommendations
st.write("### Recommended Courses")
courses = recommend_courses(st.session_state.weak_skills or st.session_state.user_skills, st.session_state.user_level)
for course in courses:
st.write(f"- {course[0]} by {course[1]}")
st.write("### Recommended Jobs")
jobs = recommend_jobs(st.session_state.user_skills, st.session_state.user_level)
for job in jobs:
st.write(f"- {job[0]} at {job[1]} ({job[2]})")
# Run the app
if __name__ == "__main__":
st.set_page_config(layout="wide")