Spaces:
Running
Running
File size: 6,457 Bytes
edecf53 ed32658 edecf53 ed32658 edecf53 89f240b edecf53 89f240b edecf53 89f240b edecf53 89f240b edecf53 89f240b edecf53 89f240b edecf53 ed32658 89f240b ed32658 89f240b ed32658 89f240b ed32658 edecf53 ed32658 edecf53 eb65dfc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, Dict, List
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import time
import os
# Set cache directory
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
app = FastAPI()
# Load datasets
DATA_DIR = "/app/data/"
job_df = pd.read_csv(os.path.join(DATA_DIR, "Updated_Job_Posting_Dataset.csv"), encoding="latin1")
course_df = pd.read_csv(os.path.join(DATA_DIR, "coursera_course_dataset_v2_no_null.csv"))
coding_df = pd.read_csv(os.path.join(DATA_DIR, "Software Questions.csv"), encoding="latin1")
# Preprocess datasets
coding_df = coding_df.rename(columns={
'Question': 'question',
'Answer': 'solutions',
'Category': 'category',
'Difficulty': 'difficulty'
})
coding_df.dropna(subset=['question', 'solutions', 'category', 'difficulty'], inplace=True)
job_df.rename(columns={'company_name': 'company', 'required_skills': 'skills'}, inplace=True)
course_df.rename(columns={'Title': 'course_title', 'Skills': 'skills'}, inplace=True)
job_df["job_description"] = job_df["job_description"].fillna("")
# Load BERT model and vectorizer
bert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
vectorizer = TfidfVectorizer()
# Pydantic model for request body
class UserInput(BaseModel):
name: str
skills: List[str] # Required list of categories (e.g., "General Programming", "Data Structures")
answers: Optional[Dict[str, Dict[str, str]]] = None # Optional answers
# Evaluate coding answers
def evaluate_coding_with_time(user_code, correct_code, start_time):
end_time = time.time()
execution_time = end_time - start_time
vectorized = vectorizer.fit_transform([user_code, correct_code])
similarity = cosine_similarity(vectorized)[0][1] * 100
if execution_time > 120:
similarity -= (execution_time - 120) * 0.1
return round(max(similarity, 0), 2)
# Get coding challenges
def get_coding_challenges(categories: List[str], num_questions=5, difficulty: Optional[str] = None):
skill_challenges = {}
for category in categories:
relevant = coding_df[coding_df["category"].str.contains(category, case=False, na=False)]
if difficulty:
relevant = relevant[relevant["difficulty"].str.lower() == difficulty.lower()]
if not relevant.empty:
skill_challenges[category] = relevant.sample(min(num_questions, len(relevant)))[["question", "solutions", "difficulty"]].to_dict(orient="records")
else:
skill_challenges[category] = []
return skill_challenges
# Assign proficiency level
def get_proficiency_level(score):
if score >= 80:
return "Expert"
elif score >= 50:
return "Intermediate"
else:
return "Beginner"
# Recommend courses
def recommend_courses(weak_skills):
if not weak_skills:
return []
courses = course_df[course_df['skills'].str.contains('|'.join(weak_skills), case=False, na=False)]
return courses[['course_title', 'Organization']].head(5).to_dict(orient="records")
# Recommend jobs
def recommend_jobs(skills):
if not skills:
return []
job_df["job_embeddings"] = job_df["job_description"].apply(lambda x: bert_model.encode(str(x)))
user_embedding = bert_model.encode(" ".join(skills))
job_df["BERT_Similarity"] = job_df["job_embeddings"].apply(lambda x: cosine_similarity([x], [user_embedding])[0][0])
top_jobs = job_df.sort_values(by="BERT_Similarity", ascending=False).head(5)
return top_jobs[["job_title", "company", "location", "BERT_Similarity"]].to_dict(orient="records")
@app.get("/")
def read_root():
return {"message": "Skill Assessment API"}
# New endpoint to provide challenges
@app.get("/challenges")
def get_user_challenges(skills: List[str], difficulty: Optional[str] = None):
if not skills:
raise HTTPException(status_code=400, detail="Skills list cannot be empty")
challenges = get_coding_challenges(skills, difficulty=difficulty)
# Return only questions and difficulty (exclude solutions for the user)
return {
"challenges": {
category: [
{"question": challenge["question"], "difficulty": challenge["difficulty"]}
for challenge in challenge_list
]
for category, challenge_list in challenges.items()
}
}
@app.post("/assess")
def assess_skills(user_input: UserInput):
user_name = user_input.name
user_skills = user_input.skills
if not user_skills:
raise HTTPException(status_code=400, detail="Skills list cannot be empty")
challenges = get_coding_challenges(user_skills)
user_scores = {}
for skill, challenge_list in challenges.items():
if not challenge_list:
user_scores[skill] = 0
continue
total_score = 0
num_questions = len(challenge_list)
if user_input.answers and skill in user_input.answers:
for challenge in challenge_list:
question = challenge["question"]
if question in user_input.answers[skill]:
start_time = time.time() - 10 # Simulate execution time
user_code = user_input.answers[skill][question]
correct_code = challenge["solutions"]
score = evaluate_coding_with_time(user_code, correct_code, start_time)
total_score += score
else:
total_score += 0
else:
total_score = 50 * num_questions
user_scores[skill] = round(total_score / num_questions, 2)
proficiency_levels = {skill: get_proficiency_level(score) for skill, score in user_scores.items()}
weak_skills = [skill for skill, level in proficiency_levels.items() if level in ["Beginner", "Intermediate"]]
courses = recommend_courses(weak_skills)
jobs = recommend_jobs(user_skills)
return {
"name": user_name,
"skills": user_skills,
"scores": user_scores,
"proficiency_levels": proficiency_levels,
"recommended_courses": courses,
"recommended_jobs": jobs
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |