File size: 3,233 Bytes
edecf53
 
 
 
 
 
 
 
 
 
ed32658
edecf53
 
 
 
 
ed32658
 
edecf53
 
 
 
 
89f240b
 
 
 
 
 
 
edecf53
 
 
 
 
 
 
 
 
66f1fae
 
 
edecf53
 
89f240b
edecf53
89f240b
 
 
 
edecf53
89f240b
edecf53
89f240b
edecf53
 
 
 
 
 
66f1fae
 
 
 
 
 
ed32658
 
66f1fae
89f240b
66f1fae
89f240b
ed32658
89f240b
 
 
 
 
 
 
ed32658
 
edecf53
 
66f1fae
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, Dict, List
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import time
import os

# Set cache directory
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"

app = FastAPI()

# Load datasets
DATA_DIR = "/app/data/"
job_df = pd.read_csv(os.path.join(DATA_DIR, "Updated_Job_Posting_Dataset.csv"), encoding="latin1")
course_df = pd.read_csv(os.path.join(DATA_DIR, "coursera_course_dataset_v2_no_null.csv"))
coding_df = pd.read_csv(os.path.join(DATA_DIR, "Software Questions.csv"), encoding="latin1")

# Preprocess datasets
coding_df = coding_df.rename(columns={
    'Question': 'question',
    'Answer': 'solutions',
    'Category': 'category',
    'Difficulty': 'difficulty'
})
coding_df.dropna(subset=['question', 'solutions', 'category', 'difficulty'], inplace=True)
job_df.rename(columns={'company_name': 'company', 'required_skills': 'skills'}, inplace=True)
course_df.rename(columns={'Title': 'course_title', 'Skills': 'skills'}, inplace=True)
job_df["job_description"] = job_df["job_description"].fillna("")

# Load BERT model and vectorizer
bert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
vectorizer = TfidfVectorizer()

# Pydantic model for request body
class ChallengeRequest(BaseModel):
    skills: List[str]  # List of categories (e.g., "General Programming", "Data Structures")
    difficulty: Optional[str] = None  # Optional difficulty level

# Get coding challenges
def get_coding_challenges(categories: List[str], num_questions=5, difficulty: Optional[str] = None):
    skill_challenges = {}
    for category in categories:
        relevant = coding_df[coding_df["category"].str.contains(category, case=False, na=False)]
        if difficulty:
            relevant = relevant[relevant["difficulty"].str.lower() == difficulty.lower()]
        if not relevant.empty:
            skill_challenges[category] = relevant.sample(min(num_questions, len(relevant)))[["question", "solutions", "difficulty"]].to_dict(orient="records")
        else:
            skill_challenges[category] = []
    return skill_challenges

@app.get("/")
def read_root():
    return {"message": "Skill Assessment API"}

# **Updated `/challenges` Endpoint to Accept JSON Body**
@app.post("/challenges")
def get_user_challenges(request: ChallengeRequest):
    skills = request.skills
    difficulty = request.difficulty
    
    if not skills:
        raise HTTPException(status_code=400, detail="Skills list cannot be empty")

    challenges = get_coding_challenges(skills, difficulty=difficulty)

    # Return only questions and difficulty (exclude solutions for the user)
    return {
        "challenges": {
            category: [
                {"question": challenge["question"], "difficulty": challenge["difficulty"]}
                for challenge in challenge_list
            ]
            for category, challenge_list in challenges.items()
        }
    }

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)