Spaces:
Running
Running
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from typing import Optional, Dict, List | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import time | |
import os | |
# Set cache directory | |
os.environ["HF_HOME"] = "/app/cache" | |
os.environ["TRANSFORMERS_CACHE"] = "/app/cache" | |
app = FastAPI() | |
# Load datasets | |
DATA_DIR = "/app/data/" | |
job_df = pd.read_csv(os.path.join(DATA_DIR, "Updated_Job_Posting_Dataset.csv"), encoding="latin1") | |
course_df = pd.read_csv(os.path.join(DATA_DIR, "coursera_course_dataset_v2_no_null.csv")) | |
coding_df = pd.read_csv(os.path.join(DATA_DIR, "Software Questions.csv"), encoding="latin1") | |
# Preprocess datasets | |
coding_df = coding_df.rename(columns={ | |
'Question': 'question', | |
'Answer': 'solutions', | |
'Category': 'category', | |
'Difficulty': 'difficulty' | |
}) | |
coding_df.dropna(subset=['question', 'solutions', 'category', 'difficulty'], inplace=True) | |
job_df.rename(columns={'company_name': 'company', 'required_skills': 'skills'}, inplace=True) | |
course_df.rename(columns={'Title': 'course_title', 'Skills': 'skills'}, inplace=True) | |
job_df["job_description"] = job_df["job_description"].fillna("") | |
# Load BERT model and vectorizer | |
bert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
vectorizer = TfidfVectorizer() | |
# Pydantic model for request body | |
class ChallengeRequest(BaseModel): | |
skills: List[str] # List of categories (e.g., "General Programming", "Data Structures") | |
difficulty: Optional[str] = None # Optional difficulty level | |
# Get coding challenges | |
def get_coding_challenges(categories: List[str], num_questions=5, difficulty: Optional[str] = None): | |
skill_challenges = {} | |
for category in categories: | |
relevant = coding_df[coding_df["category"].str.contains(category, case=False, na=False)] | |
if difficulty: | |
relevant = relevant[relevant["difficulty"].str.lower() == difficulty.lower()] | |
if not relevant.empty: | |
skill_challenges[category] = relevant.sample(min(num_questions, len(relevant)))[["question", "solutions", "difficulty"]].to_dict(orient="records") | |
else: | |
skill_challenges[category] = [] | |
return skill_challenges | |
def read_root(): | |
return {"message": "Skill Assessment API"} | |
# **Updated `/challenges` Endpoint to Accept JSON Body** | |
def get_user_challenges(request: ChallengeRequest): | |
skills = request.skills | |
difficulty = request.difficulty | |
if not skills: | |
raise HTTPException(status_code=400, detail="Skills list cannot be empty") | |
challenges = get_coding_challenges(skills, difficulty=difficulty) | |
# Return only questions and difficulty (exclude solutions for the user) | |
return { | |
"challenges": { | |
category: [ | |
{"question": challenge["question"], "difficulty": challenge["difficulty"]} | |
for challenge in challenge_list | |
] | |
for category, challenge_list in challenges.items() | |
} | |
} | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=7860) | |