File size: 8,303 Bytes
a43664e
db6e637
 
a43664e
 
 
d607da0
a43664e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d607da0
a43664e
 
aa352fb
a43664e
 
 
 
 
 
 
 
 
 
 
d607da0
a43664e
ceba453
a43664e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f417d6
a43664e
 
 
 
 
 
 
 
 
 
 
2f417d6
a43664e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import streamlit as st
import pandas as pd
import numpy as np
import pymongo
from sentence_transformers import SentenceTransformer
import faiss
import pickle
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI", "mongodb://muhammadbinimran1001:[email protected]:27017,dsm-shard-00-01.inrzs.mongodb.net:27017,dsm-shard-00-02.inrzs.mongodb.net:27017/?ssl=true&replicaSet=atlas-nbg4er-shard-0&authSource=admin&retryWrites=true&w=majority")

# Connect to MongoDB
client = pymongo.MongoClient(MONGO_URI)
db = client['test']
users_collection = db['users']
jobs_collection = db['jobs']
courses_collection = db['courses']

# Load datasets
@st.cache_data
def load_datasets():
    questions_df = pd.read_csv("Generated_Skill-Based_Questions.csv")
    courses_df = pd.read_csv("coursera_course_dataset_v2_no_null.csv")
    jobs_df = pd.read_csv("Updated_Job_Posting_Dataset.csv")
    return questions_df, courses_df, jobs_df

questions_df, courses_df, jobs_df = load_datasets()

# Load precomputed resources
@st.cache_resource
def load_resources():
    universal_model = SentenceTransformer("all-MiniLM-L6-v2")
    with open("tfidf_vectorizer.pkl", "rb") as f: tfidf_vectorizer = pickle.load(f)
    with open("skill_tfidf.pkl", "rb") as f: skill_tfidf = pickle.load(f)
    with open("question_to_answer.pkl", "rb") as f: question_to_answer = pickle.load(f)
    faiss_index = faiss.read_index("faiss_index.index")
    with open("answer_embeddings.pkl", "rb") as f: answer_embeddings = pickle.load(f)
    with open("course_similarity.pkl", "rb") as f: course_similarity = pickle.load(f)
    with open("job_similarity.pkl", "rb") as f: job_similarity = pickle.load(f)
    return universal_model, tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity

universal_model, tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity = load_resources()

# Evaluate response
def evaluate_response(skill, user_answer, question_idx):
    if not user_answer or user_answer.lower() == "skip":
        return skill, 0.0
    user_embedding = universal_model.encode([user_answer])[0]
    expected_embedding = answer_embeddings[question_idx]
    score = np.dot(user_embedding, expected_embedding) / (np.linalg.norm(user_embedding) * np.linalg.norm(expected_embedding) + 1e-10) * 100
    user_tfidf = tfidf_vectorizer.transform([user_answer]).toarray()[0]
    skill_vec = skill_tfidf.get(skill.lower(), np.zeros_like(user_tfidf))
    relevance = np.dot(user_tfidf, skill_vec) / (np.linalg.norm(user_tfidf) * np.linalg.norm(skill_vec) + 1e-10)
    return skill, max(0, score * max(0.5, min(1.0, relevance)))

# Recommend courses
def recommend_courses(skills_to_improve, user_level, upgrade=False):
    skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in skills_to_improve if skill in questions_df['Skill'].unique()]
    if not skill_indices:
        return []
    similarities = course_similarity[skill_indices]
    popularity = courses_df['popularity'].fillna(0.8).values
    completion_rate = courses_df['completion_rate'].fillna(0.7).values
    total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * popularity + 0.2 * completion_rate
    target_level = 'Advanced' if upgrade else user_level
    idx = np.argsort(-total_scores)[:5]
    candidates = courses_df.iloc[idx]
    filtered = candidates[candidates['level'].str.contains(target_level, case=False, na=False)]
    return filtered[['course_title', 'Organization']].values.tolist()[:3] if not filtered.empty else candidates[['course_title', 'Organization']].values.tolist()[:3]

# Recommend jobs
def recommend_jobs(user_skills, user_level):
    if jobs_df.empty:
        return []
    skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in user_skills if skill in questions_df['Skill'].unique()]
    if not skill_indices:
        return []
    similarities = job_similarity[skill_indices]
    total_scores = 0.5 * np.max(similarities, axis=0)
    level_map = {'Beginner': 0, 'Intermediate': 1, 'Advanced': 2}
    user_level_num = level_map.get(user_level, 1)
    level_scores = jobs_df['level'].apply(lambda x: 1 - abs(level_map.get(x, 1) - user_level_num)/2).fillna(0.5)
    location_pref = jobs_df['location'].apply(lambda x: 1.0 if x in ['Islamabad', 'Karachi'] else 0.7).fillna(0.7)
    total_job_scores = total_scores + 0.2 * level_scores + 0.1 * location_pref
    top_job_indices = np.argsort(-total_job_scores)[:5]
    return [(jobs_df.iloc[i]['job_title'], jobs_df.iloc[i]['company_name'], jobs_df.iloc[i].get('location', 'Remote')) for i in top_job_indices]

# Streamlit UI
st.title("Skill Assessment and Recommendations")

# Simulate user signup and skill extraction
if 'user_skills' not in st.session_state:
    st.session_state.user_skills = []
    st.session_state.user_level = "Intermediate"

with st.form("signup_form"):
    name = st.text_input("Name")
    email = st.text_input("Email")
    skills_input = st.text_area("Enter your skills (comma-separated)")
    submit = st.form_submit_button("Sign Up")
    if submit and name and email and skills_input:
        st.session_state.user_skills = [s.strip() for s in skills_input.split(",") if s.strip()]
        user_data = {
            "name": name,
            "email": email,
            "skills": st.session_state.user_skills,
            "createdAt": pd.Timestamp.now(),
            "lastLogin": pd.Timestamp.now()
        }
        users_collection.insert_one(user_data)
        st.success("User registered successfully!")

# Skill Assessment
if st.session_state.user_skills:
    st.write("### Skill Assessment")
    user_questions = []
    for skill in st.session_state.user_skills:
        skill_questions = questions_df[questions_df['Skill'] == skill]
        if not skill_questions.empty:
            user_questions.append(skill_questions.sample(1).iloc[0])
    user_questions = pd.DataFrame(user_questions).reset_index(drop=True)

    answers = {}
    with st.form("assessment_form"):
        for idx, row in user_questions.iterrows():
            answers[row['Question']] = st.text_area(f"Question for {row['Skill']}: {row['Question']}", key=f"q_{idx}")
        submit_assessment = st.form_submit_button("Submit Assessment")
    
    if submit_assessment:
        scores = {}
        for idx, row in user_questions.iterrows():
            question_idx = questions_df.index[questions_df['Question'] == row['Question']][0]
            skill, score = evaluate_response(row['Skill'], answers.get(row['Question'], ""), question_idx)
            scores[skill] = max(scores.get(skill, 0), score)
        
        mean_score = np.mean(list(scores.values())) if scores else 50
        dynamic_threshold = max(40, mean_score)
        weak_skills = [skill for skill, score in scores.items() if score < dynamic_threshold]

        st.session_state.scores = scores
        st.session_state.weak_skills = weak_skills
        st.session_state.mean_score = mean_score

        # Update user scores in MongoDB
        user = users_collection.find_one({"email": email})
        if user:
            users_collection.update_one(
                {"_id": user["_id"]},
                {"$set": {"skills_scores": scores}}
            )

    if 'scores' in st.session_state:
        st.write("### Assessment Results")
        for skill, score in st.session_state.scores.items():
            st.write(f"{skill}: {score:.2f}%")
        st.write(f"Mean Score: {st.session_state.mean_score:.2f}%")
        st.write(f"Weak Skills: {', '.join(st.session_state.weak_skills)}")

        # Recommendations
        st.write("### Recommended Courses")
        courses = recommend_courses(st.session_state.weak_skills or st.session_state.user_skills, st.session_state.user_level)
        for course in courses:
            st.write(f"- {course[0]} by {course[1]}")

        st.write("### Recommended Jobs")
        jobs = recommend_jobs(st.session_state.user_skills, st.session_state.user_level)
        for job in jobs:
            st.write(f"- {job[0]} at {job[1]} ({job[2]})")

# Run the app
if __name__ == "__main__":
    st.set_page_config(layout="wide")