File size: 3,237 Bytes
f9fd05d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from sentence_transformers import SentenceTransformer
from faiss import IndexFlatIP
import numpy as np
from huggingface_hub import InferenceClient
import json
import os

class RAGInterviewAgent:
    def __init__(self, job_role, cv_summary):
        self.job_role = job_role
        self.cv_summary = cv_summary
        self.current_q = 0
        
        # Initialize models
        self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
        self.llm = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct")
        
        # Setup FAISS index
        self.index = IndexFlatIP(384)  # Embedding dimension
        self._setup_knowledge_base()
        
        # Generate questions
        self.questions = self._generate_questions()
    
    def _setup_knowledge_base(self):
        """Load job requirements into vector store"""
        reqs = {
            "Software Engineer": [
                "Explain SOLID principles",
                "How would you optimize a slow SQL query?",
                "Describe your experience with unit testing"
            ],
            "Data Scientist": [
                "Explain bias-variance tradeoff",
                "How would you handle missing data?",
                "Describe a machine learning project you worked on"
            ]
        }
        
        self.knowledge = reqs[self.job_role]
        embeddings = self.embedder.encode(self.knowledge)
        self.index.add(embeddings)
    
    def _generate_questions(self):
        """Generate questions using RAG"""
        questions = []
        
        # Get most relevant requirements
        cv_embed = self.embedder.encode(self.cv_summary["text"])
        _, indices = self.index.search(np.array([cv_embed]), 3)
        
        for i in indices[0]:
            prompt = f"""
            Generate an interview question for a {self.job_role} position
            based on this requirement: {self.knowledge[i]}
            Candidate's CV summary: {self.cv_summary['text'][:1000]}
            """
            
            response = self.llm.text_generation(
                prompt,
                max_new_tokens=100,
                temperature=0.7
            )
            questions.append({
                "text": response.strip(),
                "base_requirement": self.knowledge[i]
            })
        
        return questions
    
    def get_current_question(self):
        return self.questions[self.current_q]
    
    def next_question(self):
        self.current_q += 1
    
    def evaluate_answer(self, answer):
        """Evaluate answer against knowledge base"""
        # Get relevant requirement
        req = self.questions[self.current_q]["base_requirement"]
        
        prompt = f"""
        Evaluate this interview answer on a scale of 1-10:
        Requirement: {req}
        Answer: {answer}
        
        Provide JSON output with: score, feedback
        """
        
        response = self.llm.text_generation(
            prompt,
            max_new_tokens=200,
            temperature=0.3
        )
        
        try:
            return json.loads(response.split("{")[1].split("}")[0] + "}")
        except:
            return {"score": 5, "feedback": "Evaluation failed"}