Spaces:
Running
Running
File size: 3,233 Bytes
edecf53 ed32658 edecf53 ed32658 edecf53 89f240b edecf53 66f1fae edecf53 89f240b edecf53 89f240b edecf53 89f240b edecf53 89f240b edecf53 66f1fae ed32658 66f1fae 89f240b 66f1fae 89f240b ed32658 89f240b ed32658 edecf53 66f1fae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, Dict, List
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import time
import os
# Set cache directory
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
app = FastAPI()
# Load datasets
DATA_DIR = "/app/data/"
job_df = pd.read_csv(os.path.join(DATA_DIR, "Updated_Job_Posting_Dataset.csv"), encoding="latin1")
course_df = pd.read_csv(os.path.join(DATA_DIR, "coursera_course_dataset_v2_no_null.csv"))
coding_df = pd.read_csv(os.path.join(DATA_DIR, "Software Questions.csv"), encoding="latin1")
# Preprocess datasets
coding_df = coding_df.rename(columns={
'Question': 'question',
'Answer': 'solutions',
'Category': 'category',
'Difficulty': 'difficulty'
})
coding_df.dropna(subset=['question', 'solutions', 'category', 'difficulty'], inplace=True)
job_df.rename(columns={'company_name': 'company', 'required_skills': 'skills'}, inplace=True)
course_df.rename(columns={'Title': 'course_title', 'Skills': 'skills'}, inplace=True)
job_df["job_description"] = job_df["job_description"].fillna("")
# Load BERT model and vectorizer
bert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
vectorizer = TfidfVectorizer()
# Pydantic model for request body
class ChallengeRequest(BaseModel):
skills: List[str] # List of categories (e.g., "General Programming", "Data Structures")
difficulty: Optional[str] = None # Optional difficulty level
# Get coding challenges
def get_coding_challenges(categories: List[str], num_questions=5, difficulty: Optional[str] = None):
skill_challenges = {}
for category in categories:
relevant = coding_df[coding_df["category"].str.contains(category, case=False, na=False)]
if difficulty:
relevant = relevant[relevant["difficulty"].str.lower() == difficulty.lower()]
if not relevant.empty:
skill_challenges[category] = relevant.sample(min(num_questions, len(relevant)))[["question", "solutions", "difficulty"]].to_dict(orient="records")
else:
skill_challenges[category] = []
return skill_challenges
@app.get("/")
def read_root():
return {"message": "Skill Assessment API"}
# **Updated `/challenges` Endpoint to Accept JSON Body**
@app.post("/challenges")
def get_user_challenges(request: ChallengeRequest):
skills = request.skills
difficulty = request.difficulty
if not skills:
raise HTTPException(status_code=400, detail="Skills list cannot be empty")
challenges = get_coding_challenges(skills, difficulty=difficulty)
# Return only questions and difficulty (exclude solutions for the user)
return {
"challenges": {
category: [
{"question": challenge["question"], "difficulty": challenge["difficulty"]}
for challenge in challenge_list
]
for category, challenge_list in challenges.items()
}
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
|