Muhammad541's picture
Upload 8 files
eb65dfc verified
raw
history blame
5.83 kB
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, Dict, List
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import time
import os
app = FastAPI()
# Load datasets (only those needed for questions, jobs, and courses)
DATA_DIR = "data/"
job_df = pd.read_csv(os.path.join(DATA_DIR, "Updated_Job_Posting_Dataset.csv"), encoding="latin1")
course_df = pd.read_csv(os.path.join(DATA_DIR, "coursera_course_dataset_v2_no_null.csv"))
coding_df = pd.read_csv(os.path.join(DATA_DIR, "Software Questions.csv"), encoding="latin1")
# Preprocess datasets
coding_df.rename(columns={'Question': 'question', 'Answer': 'solutions'}, inplace=True)
job_df.rename(columns={'company_name': 'company', 'required_skills': 'skills'}, inplace=True)
course_df.rename(columns={'Title': 'course_title', 'Skills': 'skills'}, inplace=True)
coding_df.dropna(subset=['question', 'solutions'], inplace=True)
job_df["job_description"] = job_df["job_description"].fillna("")
# Load BERT model and vectorizer
bert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
vectorizer = TfidfVectorizer()
# Pydantic model for request body
class UserInput(BaseModel):
name: str
skills: List[str] # Required list of skills
answers: Optional[Dict[str, Dict[str, str]]] = None # Optional answers
# Evaluate coding answers
def evaluate_coding_with_time(user_code, correct_code, start_time):
end_time = time.time()
execution_time = end_time - start_time
vectorized = vectorizer.fit_transform([user_code, correct_code])
similarity = cosine_similarity(vectorized)[0][1] * 100
if execution_time > 120:
similarity -= (execution_time - 120) * 0.1
return round(max(similarity, 0), 2)
# Get coding challenges
def get_coding_challenges(skills, num_questions=5):
skill_challenges = {}
for skill in skills:
relevant = coding_df[coding_df["question"].str.contains(skill, case=False, na=False)]
if not relevant.empty:
skill_challenges[skill] = relevant.sample(min(num_questions, len(relevant)))[["question", "solutions"]].to_dict(orient="records")
else:
skill_challenges[skill] = []
return skill_challenges
# Assign proficiency level
def get_proficiency_level(score):
if score >= 80:
return "Expert"
elif score >= 50:
return "Intermediate"
else:
return "Beginner"
# Recommend courses
def recommend_courses(weak_skills):
if not weak_skills:
return []
courses = course_df[course_df['skills'].str.contains('|'.join(weak_skills), case=False, na=False)]
return courses[['course_title', 'Organization']].head(5).to_dict(orient="records")
# Recommend jobs
def recommend_jobs(skills):
if not skills:
return []
job_df["job_embeddings"] = job_df["job_description"].apply(lambda x: bert_model.encode(str(x)))
user_embedding = bert_model.encode(" ".join(skills))
job_df["BERT_Similarity"] = job_df["job_embeddings"].apply(lambda x: cosine_similarity([x], [user_embedding])[0][0])
top_jobs = job_df.sort_values(by="BERT_Similarity", ascending=False).head(5)
return top_jobs[["job_title", "company", "location", "BERT_Similarity"]].to_dict(orient="records")
@app.get("/")
def read_root():
return {"message": "Skill Assessment API"}
@app.post("/assess")
def assess_skills(user_input: UserInput):
# Extract user data from request
user_name = user_input.name
user_skills = user_input.skills
if not user_skills:
raise HTTPException(status_code=400, detail="Skills list cannot be empty")
# Fetch coding challenges based on provided skills
challenges = get_coding_challenges(user_skills)
# Evaluate skills
user_scores = {}
for skill, challenge_list in challenges.items():
if not challenge_list:
user_scores[skill] = 0
continue
total_score = 0
num_questions = len(challenge_list)
if user_input.answers and skill in user_input.answers:
# Use provided answers
for challenge in challenge_list:
question = challenge["question"]
if question in user_input.answers[skill]:
start_time = time.time() - 10 # Simulate execution time
user_code = user_input.answers[skill][question]
correct_code = challenge["solutions"]
score = evaluate_coding_with_time(user_code, correct_code, start_time)
total_score += score
else:
total_score += 0 # No answer provided for this question
else:
# No answers provided; assign default score (50% per question)
total_score = 50 * num_questions
user_scores[skill] = round(total_score / num_questions, 2)
# Proficiency levels
proficiency_levels = {skill: get_proficiency_level(score) for skill, score in user_scores.items()}
weak_skills = [skill for skill, level in proficiency_levels.items() if level in ["Beginner", "Intermediate"]]
# Recommendations
courses = recommend_courses(weak_skills)
jobs = recommend_jobs(user_skills)
return {
"name": user_name,
"skills": user_skills,
"scores": user_scores,
"proficiency_levels": proficiency_levels,
"recommended_courses": courses,
"recommended_jobs": jobs
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)