Muhammad541's picture
Update app.py
edecf53 verified
raw
history blame
5.88 kB
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, Dict, List
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import time
import os
# Set cache directory explicitly (optional, as Dockerfile ENV should handle this)
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
app = FastAPI()
# Load datasets (updated path to match Dockerfile)
DATA_DIR = "/app/data/" # Changed from "data/" to "/app/data/"
job_df = pd.read_csv(os.path.join(DATA_DIR, "Updated_Job_Posting_Dataset.csv"), encoding="latin1")
course_df = pd.read_csv(os.path.join(DATA_DIR, "coursera_course_dataset_v2_no_null.csv"))
coding_df = pd.read_csv(os.path.join(DATA_DIR, "Software Questions.csv"), encoding="latin1")
# Preprocess datasets
coding_df.rename(columns={'Question': 'question', 'Answer': 'solutions'}, inplace=True)
job_df.rename(columns={'company_name': 'company', 'required_skills': 'skills'}, inplace=True)
course_df.rename(columns={'Title': 'course_title', 'Skills': 'skills'}, inplace=True)
coding_df.dropna(subset=['question', 'solutions'], inplace=True)
job_df["job_description"] = job_df["job_description"].fillna("")
# Load BERT model and vectorizer
bert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
vectorizer = TfidfVectorizer()
# Pydantic model for request body
class UserInput(BaseModel):
name: str
skills: List[str] # Required list of skills
answers: Optional[Dict[str, Dict[str, str]]] = None # Optional answers
# Evaluate coding answers
def evaluate_coding_with_time(user_code, correct_code, start_time):
end_time = time.time()
execution_time = end_time - start_time
vectorized = vectorizer.fit_transform([user_code, correct_code])
similarity = cosine_similarity(vectorized)[0][1] * 100
if execution_time > 120:
similarity -= (execution_time - 120) * 0.1
return round(max(similarity, 0), 2)
# Get coding challenges
def get_coding_challenges(skills, num_questions=5):
skill_challenges = {}
for skill in skills:
relevant = coding_df[coding_df["question"].str.contains(skill, case=False, na=False)]
if not relevant.empty:
skill_challenges[skill] = relevant.sample(min(num_questions, len(relevant)))[["question", "solutions"]].to_dict(orient="records")
else:
skill_challenges[skill] = []
return skill_challenges
# Assign proficiency level
def get_proficiency_level(score):
if score >= 80:
return "Expert"
elif score >= 50:
return "Intermediate"
else:
return "Beginner"
# Recommend courses
def recommend_courses(weak_skills):
if not weak_skills:
return []
courses = course_df[course_df['skills'].str.contains('|'.join(weak_skills), case=False, na=False)]
return courses[['course_title', 'Organization']].head(5).to_dict(orient="records")
# Recommend jobs
def recommend_jobs(skills):
if not skills:
return []
job_df["job_embeddings"] = job_df["job_description"].apply(lambda x: bert_model.encode(str(x)))
user_embedding = bert_model.encode(" ".join(skills))
job_df["BERT_Similarity"] = job_df["job_embeddings"].apply(lambda x: cosine_similarity([x], [user_embedding])[0][0])
top_jobs = job_df.sort_values(by="BERT_Similarity", ascending=False).head(5)
return top_jobs[["job_title", "company", "location", "BERT_Similarity"]].to_dict(orient="records")
@app.get("/")
def read_root():
return {"message": "Skill Assessment API"}
@app.post("/assess")
def assess_skills(user_input: UserInput):
# Extract user data from request
user_name = user_input.name
user_skills = user_input.skills
if not user_skills:
raise HTTPException(status_code=400, detail="Skills list cannot be empty")
# Fetch coding challenges based on provided skills
challenges = get_coding_challenges(user_skills)
# Evaluate skills
user_scores = {}
for skill, challenge_list in challenges.items():
if not challenge_list:
user_scores[skill] = 0
continue
total_score = 0
num_questions = len(challenge_list)
if user_input.answers and skill in user_input.answers:
# Use provided answers
for challenge in challenge_list:
question = challenge["question"]
if question in user_input.answers[skill]:
start_time = time.time() - 10 # Simulate execution time
user_code = user_input.answers[skill][question]
correct_code = challenge["solutions"]
score = evaluate_coding_with_time(user_code, correct_code, start_time)
total_score += score
else:
total_score += 0 # No answer provided for this question
else:
# No answers provided; assign default score (50% per question)
total_score = 50 * num_questions
user_scores[skill] = round(total_score / num_questions, 2)
# Proficiency levels
proficiency_levels = {skill: get_proficiency_level(score) for skill, score in user_scores.items()}
weak_skills = [skill for skill, level in proficiency_levels.items() if level in ["Beginner", "Intermediate"]]
# Recommendations
courses = recommend_courses(weak_skills)
jobs = recommend_jobs(user_skills)
return {
"name": user_name,
"skills": user_skills,
"scores": user_scores,
"proficiency_levels": proficiency_levels,
"recommended_courses": courses,
"recommended_jobs": jobs
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)