File size: 6,120 Bytes
aedd875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import streamlit as st
import pandas as pd
import pymongo
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
import os
from datetime import datetime

# MongoDB Connection
MONGO_URI = "mongodb://muhammadbinimran1001:[email protected]:27017,dsm-shard-00-01.inrzs.mongodb.net:27017,dsm-shard-00-02.inrzs.mongodb.net:27017/?ssl=true&replicaSet=atlas-nbg4er-shard-0&authSource=admin&retryWrites=true&w=majority"
client = pymongo.MongoClient(MONGO_URI)
db = client['test']
users_collection = db['users']
jobs_collection = db['jobs']
courses_collection = db['courses']

# Load Datasets
@st.cache_data
def load_data():
    questions_df = pd.read_csv("Generated_Skill-Based_Questions.csv")
    jobs_df = pd.read_csv("Updated_Job_Posting_Dataset.csv")
    courses_df = pd.read_csv("coursera_course_dataset_v2_no_null.csv")
    return questions_df, jobs_df, courses_df

questions_df, jobs_df, courses_df = load_data()

# Load or Initialize Model
@st.cache_resource
def load_model():
    return SentenceTransformer('all-MiniLM-L6-v2')

model = load_model()
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

# Skill Extraction and Question Generation
def get_user_skills(user_id):
    user = users_collection.find_one({"_id": user_id})
    return user.get("skills", []) if user else []

def get_questions_for_skills(skills):
    questions = []
    for skill in skills:
        skill_questions = questions_df[questions_df['Skill'] == skill].sample(1)
        if not skill_questions.empty:
            questions.append(skill_questions.iloc[0])
    return pd.DataFrame(questions) if questions else None

# Answer Evaluation
def evaluate_answer(user_answer, expected_answer):
    user_embedding = model.encode([user_answer], convert_to_tensor=True)[0]
    expected_embedding = model.encode([expected_answer], convert_to_tensor=True)[0]
    score = util.pytorch_cos_sim(user_embedding, expected_embedding).item() * 100
    return max(0, round(score, 2))

# Recommendation Logic
def recommend_courses(skills, user_level):
    skill_indices = [questions_df.index[questions_df['Skill'] == skill].tolist()[0] for skill in skills if skill in questions_df['Skill'].values]
    if not skill_indices:
        return []
    course_skills = courses_df['skills'].fillna("").tolist()
    course_embeddings = model.encode(course_skills, convert_to_tensor=True)
    skill_embeddings = model.encode(skills, convert_to_tensor=True)
    similarities = util.pytorch_cos_sim(skill_embeddings, course_embeddings).cpu().numpy()
    total_scores = 0.6 * np.max(similarities, axis=0)
    idx = np.argsort(-total_scores)[:3]
    return courses_df.iloc[idx][['course_title', 'Organization']].values.tolist()

def recommend_jobs(skills, user_level):
    skill_indices = [questions_df.index[questions_df['Skill'] == skill].tolist()[0] for skill in skills if skill in questions_df['Skill'].values]
    if not skill_indices:
        return []
    job_skills = jobs_df['required_skills'].fillna("").tolist()
    job_embeddings = model.encode(job_skills, convert_to_tensor=True)
    skill_embeddings = model.encode(skills, convert_to_tensor=True)
    similarities = util.pytorch_cos_sim(skill_embeddings, job_embeddings).cpu().numpy()
    total_scores = 0.5 * np.max(similarities, axis=0)
    idx = np.argsort(-total_scores)[:3]
    return jobs_df.iloc[idx][['job_title', 'company_name', 'location']].values.tolist()

# Streamlit App
st.title("Skill Assessment & Recommendation")

# Simulate User Signup (for demo, replace with actual auth)
if 'user_id' not in st.session_state:
    st.session_state.user_id = "68233a6b7c0fd8f9d6994e"  # Example user ID
    st.session_state.skills = get_user_skills(st.session_state.user_id)
    st.session_state.scores = {}

if not st.session_state.skills:
    st.write("No skills found. Please update your profile with skills during signup.")
else:
    st.write(f"Detected Skills: {st.session_state.skills}")

    if 'questions' not in st.session_state:
        st.session_state.questions = get_questions_for_skills(st.session_state.skills)
        if st.session_state.questions is not None:
            st.session_state.questions = st.session_state.questions.reset_index(drop=True)

    if st.session_state.questions is not None and not st.session_state.questions.empty:
        for idx, row in st.session_state.questions.iterrows():
            st.subheader(f"Question for {row['Skill']}")
            user_answer = st.text_area(f"Question: {row['Question']}", key=f"answer_{idx}")
            if st.button(f"Submit Answer for {row['Skill']}", key=f"submit_{idx}"):
                score = evaluate_answer(user_answer, row['Answer'])
                st.session_state.scores[row['Skill']] = score
                st.success(f"Score for {row['Skill']}: {score}%")

        if all(skill in st.session_state.scores for skill in st.session_state.skills):
            st.write("Assessment Complete!")
            mean_score = np.mean(list(st.session_state.scores.values()))
            weak_skills = [s for s, score in st.session_state.scores.items() if score < 60]
            st.write(f"Mean Score: {mean_score:.2f}%")
            st.write(f"Weak Skills: {weak_skills}")

            courses = recommend_courses(weak_skills or st.session_state.skills, "Intermediate")
            jobs = recommend_jobs(st.session_state.skills, "Intermediate")
            st.write("Recommended Courses:", courses)
            st.write("Recommended Jobs:", jobs)

            # Update user score in MongoDB (simplified)
            users_collection.update_one(
                {"_id": st.session_state.user_id},
                {"$set": {"skills_scores": st.session_state.scores}}
            )
            st.session_state.pop('questions', None)
    else:
        st.write("No questions available for the detected skills.")

# Redirect to Dashboard (simulated)
if st.button("Go to Dashboard"):
    st.write("Redirecting to User Dashboard...")
    # In a real app, use st.experimental_rerun() or a navigation library