Muhammad541's picture
Update app.py
66f1fae verified
raw
history blame
3.23 kB
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, Dict, List
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import time
import os
# Set cache directory
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
app = FastAPI()
# Load datasets
DATA_DIR = "/app/data/"
job_df = pd.read_csv(os.path.join(DATA_DIR, "Updated_Job_Posting_Dataset.csv"), encoding="latin1")
course_df = pd.read_csv(os.path.join(DATA_DIR, "coursera_course_dataset_v2_no_null.csv"))
coding_df = pd.read_csv(os.path.join(DATA_DIR, "Software Questions.csv"), encoding="latin1")
# Preprocess datasets
coding_df = coding_df.rename(columns={
'Question': 'question',
'Answer': 'solutions',
'Category': 'category',
'Difficulty': 'difficulty'
})
coding_df.dropna(subset=['question', 'solutions', 'category', 'difficulty'], inplace=True)
job_df.rename(columns={'company_name': 'company', 'required_skills': 'skills'}, inplace=True)
course_df.rename(columns={'Title': 'course_title', 'Skills': 'skills'}, inplace=True)
job_df["job_description"] = job_df["job_description"].fillna("")
# Load BERT model and vectorizer
bert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
vectorizer = TfidfVectorizer()
# Pydantic model for request body
class ChallengeRequest(BaseModel):
skills: List[str] # List of categories (e.g., "General Programming", "Data Structures")
difficulty: Optional[str] = None # Optional difficulty level
# Get coding challenges
def get_coding_challenges(categories: List[str], num_questions=5, difficulty: Optional[str] = None):
skill_challenges = {}
for category in categories:
relevant = coding_df[coding_df["category"].str.contains(category, case=False, na=False)]
if difficulty:
relevant = relevant[relevant["difficulty"].str.lower() == difficulty.lower()]
if not relevant.empty:
skill_challenges[category] = relevant.sample(min(num_questions, len(relevant)))[["question", "solutions", "difficulty"]].to_dict(orient="records")
else:
skill_challenges[category] = []
return skill_challenges
@app.get("/")
def read_root():
return {"message": "Skill Assessment API"}
# **Updated `/challenges` Endpoint to Accept JSON Body**
@app.post("/challenges")
def get_user_challenges(request: ChallengeRequest):
skills = request.skills
difficulty = request.difficulty
if not skills:
raise HTTPException(status_code=400, detail="Skills list cannot be empty")
challenges = get_coding_challenges(skills, difficulty=difficulty)
# Return only questions and difficulty (exclude solutions for the user)
return {
"challenges": {
category: [
{"question": challenge["question"], "difficulty": challenge["difficulty"]}
for challenge in challenge_list
]
for category, challenge_list in challenges.items()
}
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)