Delete src
Browse files- src/app.py +0 -210
- src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml +0 -4
- src/configs/llm/openai-gpt-3.5-turbo.yaml +0 -4
- src/configs/llm/openai-gpt-4o-mini.yaml +0 -4
- src/configs/parser/llamaparse_en.yaml +0 -7
- src/domain/candidate.py +0 -13
- src/domain/enums/emotion_types.py +0 -21
- src/domain/enums/interview_status.py +0 -11
- src/domain/interview.py +0 -28
- src/llm/base_llm_provider.py +0 -16
- src/llm/enums.py +0 -3
- src/llm/llm.py +0 -32
- src/llm/nvidia_llm.py +0 -29
- src/llm/openai_llm.py +0 -29
- src/main_test.py +0 -97
- src/sample_inputs.py +0 -98
- src/service/emotion_recognition.py +0 -136
- src/service/resume_parser.py +0 -42
- src/service/utils.py +0 -103
- src/template/grading_prompt.py +0 -111
- src/template/parser_prompt.py +0 -21
src/app.py
DELETED
@@ -1,210 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
from typing import Dict
|
3 |
-
import pandas as pd
|
4 |
-
|
5 |
-
# from src.application.services import InterviewAnalyzer
|
6 |
-
# from src.infrastructure.llm import LangchainService
|
7 |
-
# from src.infrastructure.emotion import DeepFaceService
|
8 |
-
# from src.infrastructure.speech import GoogleSpeechService
|
9 |
-
|
10 |
-
|
11 |
-
# class GradioInterface:
|
12 |
-
# def __init__(self):
|
13 |
-
# # Initialize services
|
14 |
-
# self.emotion_service = DeepFaceService()
|
15 |
-
# self.speech_service = GoogleSpeechService()
|
16 |
-
# self.llm_service = LangchainService()
|
17 |
-
#
|
18 |
-
# # Initialize analyzer
|
19 |
-
# self.analyzer = InterviewAnalyzer(
|
20 |
-
# emotion_service=self.emotion_service,
|
21 |
-
# speech_service=self.speech_service,
|
22 |
-
# llm_service=self.llm_service,
|
23 |
-
# )
|
24 |
-
#
|
25 |
-
# def create_interface(self) -> gr.Interface:
|
26 |
-
# def process_submission(
|
27 |
-
# video_file: str, resume_file: str, job_requirements: str
|
28 |
-
# ) -> Dict:
|
29 |
-
# # Implementation for processing submission
|
30 |
-
# pass
|
31 |
-
#
|
32 |
-
# # Create Gradio interface
|
33 |
-
# interface = gr.Interface(
|
34 |
-
# fn=process_submission,
|
35 |
-
# inputs=[
|
36 |
-
# gr.Video(label="Interview Recording"),
|
37 |
-
# gr.File(label="Resume"),
|
38 |
-
# gr.Textbox(label="Job Requirements", lines=5),
|
39 |
-
# ],
|
40 |
-
# outputs=gr.JSON(label="Analysis Results"),
|
41 |
-
# title="HR Interview Analysis System",
|
42 |
-
# description="Upload interview recording and resume to analyze candidate performance",
|
43 |
-
# )
|
44 |
-
#
|
45 |
-
# return interface
|
46 |
-
|
47 |
-
|
48 |
-
# Testing to setup the simple interface
|
49 |
-
class GradioInterface:
|
50 |
-
def __init__(self):
|
51 |
-
# DataFrame to List All Users' Feedbacks
|
52 |
-
self.candidate_feedback = pd.DataFrame(columns=["Name", "Score", "Feedback"])
|
53 |
-
|
54 |
-
def validate_file_format(self, file_path: str, valid_extensions: list) -> bool:
|
55 |
-
return isinstance(file_path, str) and any(
|
56 |
-
file_path.endswith(ext) for ext in valid_extensions
|
57 |
-
)
|
58 |
-
|
59 |
-
def process_video(self, video_path: str) -> str:
|
60 |
-
# Process transcript from the video
|
61 |
-
return "### Transcript\nExample of transcript of the interview video."
|
62 |
-
|
63 |
-
def process_resume(self, resume_path: str) -> str:
|
64 |
-
# Resume Parsing
|
65 |
-
return "### Resume Analysis\n- **Skills**: NLP, Machine Learning, Computer Vision\n- **Experience**: 5 years."
|
66 |
-
|
67 |
-
def analyze_emotions(self, video_path: str) -> str:
|
68 |
-
# Emotion Analysis
|
69 |
-
return "### Emotion Analysis\n- **Overall Emotion**: Positive\n- **Details**: Candidate displayed confidence and engagement."
|
70 |
-
|
71 |
-
def get_feedback(self, name: str, score: int, feedback: str) -> pd.DataFrame:
|
72 |
-
return pd.DataFrame({"Name": [name], "Score": [score], "Feedback": [feedback]})
|
73 |
-
|
74 |
-
def save_report(self):
|
75 |
-
# Save report
|
76 |
-
report_path = "report_path.docx"
|
77 |
-
with open(report_path, "w") as f:
|
78 |
-
# Pass fields to include in report here
|
79 |
-
f.write("Example report")
|
80 |
-
return report_path
|
81 |
-
|
82 |
-
def create_interface(self) -> gr.Blocks:
|
83 |
-
def process_submission(
|
84 |
-
video_path, resume_path, interview_questions, job_requirements
|
85 |
-
):
|
86 |
-
# Validate inputs and formats
|
87 |
-
if not video_path:
|
88 |
-
return (
|
89 |
-
"Please upload an interview video.",
|
90 |
-
None,
|
91 |
-
None,
|
92 |
-
self.candidate_feedback,
|
93 |
-
)
|
94 |
-
if not resume_path:
|
95 |
-
return (
|
96 |
-
"Please upload a resume (PDF).",
|
97 |
-
None,
|
98 |
-
None,
|
99 |
-
self.candidate_feedback,
|
100 |
-
)
|
101 |
-
if not interview_questions:
|
102 |
-
return (
|
103 |
-
"Please provide interview questions.",
|
104 |
-
None,
|
105 |
-
None,
|
106 |
-
self.candidate_feedback,
|
107 |
-
)
|
108 |
-
if not job_requirements:
|
109 |
-
return (
|
110 |
-
"Please provide job requirements.",
|
111 |
-
None,
|
112 |
-
None,
|
113 |
-
self.candidate_feedback,
|
114 |
-
)
|
115 |
-
if not self.validate_file_format(video_path, [".mp4", ".avi", ".mkv"]):
|
116 |
-
return "Invalid video format.", None, None, self.candidate_feedback
|
117 |
-
if not self.validate_file_format(resume_path, [".pdf"]):
|
118 |
-
return (
|
119 |
-
"Please submit resume in PDF format.",
|
120 |
-
None,
|
121 |
-
None,
|
122 |
-
self.candidate_feedback,
|
123 |
-
)
|
124 |
-
|
125 |
-
# Mock outputs for this submission
|
126 |
-
video_transcript = self.process_video(video_path)
|
127 |
-
emotion_analysis = self.analyze_emotions(video_path)
|
128 |
-
resume_analysis = self.process_resume(resume_path)
|
129 |
-
# Example of Feedback
|
130 |
-
feedback_list = self.get_feedback(
|
131 |
-
name="Johnson",
|
132 |
-
score=88,
|
133 |
-
feedback="Outstanding technical and soft skills.",
|
134 |
-
)
|
135 |
-
# Append the new candidate feedback to the DataFrame
|
136 |
-
self.candidate_feedback = pd.concat(
|
137 |
-
[self.candidate_feedback, feedback_list], ignore_index=True
|
138 |
-
)
|
139 |
-
|
140 |
-
# Return both the individual result and the list result
|
141 |
-
return (
|
142 |
-
video_transcript,
|
143 |
-
emotion_analysis,
|
144 |
-
resume_analysis,
|
145 |
-
self.candidate_feedback,
|
146 |
-
)
|
147 |
-
|
148 |
-
# Build the interface using Blocks
|
149 |
-
with gr.Blocks() as demo:
|
150 |
-
gr.Markdown("## HR Interview Analysis System")
|
151 |
-
|
152 |
-
# Inputs section
|
153 |
-
with gr.Row():
|
154 |
-
video_input = gr.Video(label="Upload Interview Video")
|
155 |
-
resume_input = gr.File(label="Upload Resume (PDF)")
|
156 |
-
with gr.Row():
|
157 |
-
question_input = gr.Textbox(
|
158 |
-
label="Interview Questions",
|
159 |
-
lines=5,
|
160 |
-
placeholder="Enter the interview question here",
|
161 |
-
)
|
162 |
-
requirements_input = gr.Textbox(
|
163 |
-
label="Job Requirements",
|
164 |
-
lines=5,
|
165 |
-
placeholder="Enter the job requirements here",
|
166 |
-
)
|
167 |
-
|
168 |
-
submit_button = gr.Button("Submit")
|
169 |
-
|
170 |
-
with gr.Tabs():
|
171 |
-
with gr.Tab("Result"):
|
172 |
-
transcript_output = gr.Markdown(label="Video Transcript")
|
173 |
-
emotion_output = gr.Markdown(label="Emotion Analysis")
|
174 |
-
resume_output = gr.Markdown(label="Resume Analysis")
|
175 |
-
|
176 |
-
with gr.Tab("List of Candidates"):
|
177 |
-
feedback_output = gr.Dataframe(
|
178 |
-
label="Candidate Feedback Lists", interactive=False
|
179 |
-
)
|
180 |
-
|
181 |
-
save_button = gr.Button("Save Report")
|
182 |
-
save_button.click(
|
183 |
-
fn=self.save_report,
|
184 |
-
inputs=[],
|
185 |
-
outputs=gr.File(label="Download Report"),
|
186 |
-
)
|
187 |
-
# Connect the button to the function
|
188 |
-
submit_button.click(
|
189 |
-
fn=process_submission,
|
190 |
-
inputs=[video_input, resume_input, question_input, requirements_input],
|
191 |
-
outputs=[
|
192 |
-
transcript_output,
|
193 |
-
emotion_output,
|
194 |
-
resume_output,
|
195 |
-
feedback_output,
|
196 |
-
],
|
197 |
-
)
|
198 |
-
|
199 |
-
return demo
|
200 |
-
|
201 |
-
|
202 |
-
def launch_app():
|
203 |
-
print(gr.__version__)
|
204 |
-
app = GradioInterface()
|
205 |
-
interface = app.create_interface()
|
206 |
-
interface.launch()
|
207 |
-
|
208 |
-
|
209 |
-
if __name__ == "__main__":
|
210 |
-
launch_app()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
PROVIDER: nvidia
|
2 |
-
BASE_URL: https://integrate.api.nvidia.com/v1
|
3 |
-
MODEL: nvidia/llama-3.1-nemotron-70b-instruct
|
4 |
-
TEMPERATURE: 0
|
|
|
|
|
|
|
|
|
|
src/configs/llm/openai-gpt-3.5-turbo.yaml
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
PROVIDER: openai
|
2 |
-
BASE_URL: default
|
3 |
-
MODEL: gpt-3.5-turbo
|
4 |
-
TEMPERATURE: 0
|
|
|
|
|
|
|
|
|
|
src/configs/llm/openai-gpt-4o-mini.yaml
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
PROVIDER: openai
|
2 |
-
BASE_URL: default
|
3 |
-
MODEL: gpt-4o-mini
|
4 |
-
TEMPERATURE: 0
|
|
|
|
|
|
|
|
|
|
src/configs/parser/llamaparse_en.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
LANGUAGE: en
|
2 |
-
DISABLE_OCR: false
|
3 |
-
PAGE_ROC_BBOX:
|
4 |
-
TOP: 0
|
5 |
-
RIGHT: 0
|
6 |
-
BOTTOM: 0
|
7 |
-
LEFT: 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/domain/candidate.py
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
from dataclasses import dataclass
|
2 |
-
from typing import Dict, List
|
3 |
-
|
4 |
-
|
5 |
-
@dataclass
|
6 |
-
class Candidate:
|
7 |
-
id: str
|
8 |
-
name: str
|
9 |
-
email: str
|
10 |
-
resume_data: Dict
|
11 |
-
interview_responses: List[str]
|
12 |
-
emotional_metrics: Dict
|
13 |
-
feedback: Dict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/domain/enums/emotion_types.py
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
from enum import Enum
|
2 |
-
|
3 |
-
|
4 |
-
class EmotionType(Enum):
|
5 |
-
|
6 |
-
SAD = "sad"
|
7 |
-
FEAR = "fear"
|
8 |
-
ANGRY = "angry"
|
9 |
-
DISGUST = "disgust"
|
10 |
-
|
11 |
-
HAPPY = "happy"
|
12 |
-
NEUTRAL = "neutral"
|
13 |
-
SURPRISE = "surprise"
|
14 |
-
|
15 |
-
@classmethod
|
16 |
-
def get_positive_emotions(cls):
|
17 |
-
return [cls.HAPPY, cls.NEUTRAL, cls.SURPRISE]
|
18 |
-
|
19 |
-
@classmethod
|
20 |
-
def get_negative_emotions(cls):
|
21 |
-
return [cls.SAD, cls.FEAR, cls.ANGRY, cls.DISGUST]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/domain/enums/interview_status.py
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
from enum import Enum, auto
|
2 |
-
|
3 |
-
|
4 |
-
class InterviewStatus(Enum):
|
5 |
-
SCHEDULED = auto()
|
6 |
-
IN_PROGRESS = auto()
|
7 |
-
COMPLETED = auto()
|
8 |
-
CANCELLED = auto()
|
9 |
-
PENDING_REVIEW = auto()
|
10 |
-
REVIEWED = auto()
|
11 |
-
FAILED = auto()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/domain/interview.py
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
from dataclasses import dataclass
|
2 |
-
from datetime import datetime
|
3 |
-
from typing import List, Dict
|
4 |
-
from src.domain.enums.interview_status import InterviewStatus
|
5 |
-
from src.domain.enums.emotion_types import EmotionType
|
6 |
-
|
7 |
-
|
8 |
-
@dataclass
|
9 |
-
class Interview:
|
10 |
-
id: str
|
11 |
-
candidate_id: str
|
12 |
-
job_id: str
|
13 |
-
video_path: str
|
14 |
-
status: InterviewStatus
|
15 |
-
questions: List[str]
|
16 |
-
responses_transcription: List[str]
|
17 |
-
timestamp: datetime
|
18 |
-
duration: int
|
19 |
-
emotional_analysis: Dict[EmotionType, float]
|
20 |
-
|
21 |
-
def is_completed(self) -> bool:
|
22 |
-
return self.status == InterviewStatus.COMPLETED
|
23 |
-
|
24 |
-
def is_reviewable(self) -> bool:
|
25 |
-
return self.status in [
|
26 |
-
InterviewStatus.COMPLETED,
|
27 |
-
InterviewStatus.PENDING_REVIEW,
|
28 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/llm/base_llm_provider.py
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
"""Base class for LLM providers"""
|
2 |
-
|
3 |
-
from abc import abstractmethod
|
4 |
-
from typing import Dict, Optional
|
5 |
-
|
6 |
-
|
7 |
-
class BaseLLMProvider:
|
8 |
-
@abstractmethod
|
9 |
-
def __init__(self):
|
10 |
-
"""LLM provider initialization"""
|
11 |
-
raise NotImplementedError
|
12 |
-
|
13 |
-
@abstractmethod
|
14 |
-
def complete(self, prompt: str = "") -> str:
|
15 |
-
"""LLM chat completion implementation by each provider"""
|
16 |
-
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/llm/enums.py
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
OPENAI_LLM = "openai"
|
2 |
-
NVIDIA_LLM = "nvidia"
|
3 |
-
DEFAULT_LLM_API_BASE = "default"
|
|
|
|
|
|
|
|
src/llm/llm.py
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
import yaml
|
2 |
-
|
3 |
-
from src.llm.enums import OPENAI_LLM, NVIDIA_LLM
|
4 |
-
from src.llm.base_llm_provider import BaseLLMProvider
|
5 |
-
from src.llm.openai_llm import OpenAILLM
|
6 |
-
from src.llm.nvidia_llm import NvidiaLLM
|
7 |
-
|
8 |
-
|
9 |
-
def get_llm(config_file_path: str = "config.yaml") -> BaseLLMProvider:
|
10 |
-
"""
|
11 |
-
Initiates LLM client from config file
|
12 |
-
"""
|
13 |
-
|
14 |
-
# load config
|
15 |
-
with open(config_file_path, "r") as f:
|
16 |
-
config = yaml.safe_load(f)
|
17 |
-
|
18 |
-
# init & return llm
|
19 |
-
if config["PROVIDER"] == OPENAI_LLM:
|
20 |
-
return OpenAILLM(
|
21 |
-
model=config["MODEL"],
|
22 |
-
temperature=config["TEMPERATURE"],
|
23 |
-
base_url=config["BASE_URL"],
|
24 |
-
)
|
25 |
-
elif config["PROVIDER"] == NVIDIA_LLM:
|
26 |
-
return NvidiaLLM(
|
27 |
-
model=config["MODEL"],
|
28 |
-
temperature=config["TEMPERATURE"],
|
29 |
-
base_url=config["BASE_URL"],
|
30 |
-
)
|
31 |
-
else:
|
32 |
-
raise ValueError(config["MODEL"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/llm/nvidia_llm.py
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
"""NVIDIA LLM Implementation"""
|
2 |
-
|
3 |
-
from llama_index.llms.nvidia import NVIDIA
|
4 |
-
|
5 |
-
from src.llm.base_llm_provider import BaseLLMProvider
|
6 |
-
from src.llm.enums import DEFAULT_LLM_API_BASE
|
7 |
-
|
8 |
-
|
9 |
-
class NvidiaLLM(BaseLLMProvider):
|
10 |
-
def __init__(
|
11 |
-
self,
|
12 |
-
model: str = "nvidia/llama-3.1-nemotron-70b-instruct",
|
13 |
-
temperature: float = 0.0,
|
14 |
-
base_url: str = "https://integrate.api.nvidia.com/v1",
|
15 |
-
):
|
16 |
-
"""Initiate NVIDIA client"""
|
17 |
-
|
18 |
-
if base_url == DEFAULT_LLM_API_BASE:
|
19 |
-
self._client = NVIDIA(
|
20 |
-
model=model,
|
21 |
-
temperature=temperature,
|
22 |
-
)
|
23 |
-
else:
|
24 |
-
self._client = NVIDIA(
|
25 |
-
model=model, temperature=temperature, base_url=base_url
|
26 |
-
)
|
27 |
-
|
28 |
-
def complete(self, prompt: str = "") -> str:
|
29 |
-
return str(self._client.complete(prompt))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/llm/openai_llm.py
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
"""OpenAI LLM Implementation"""
|
2 |
-
|
3 |
-
from llama_index.llms.openai import OpenAI
|
4 |
-
|
5 |
-
from src.llm.base_llm_provider import BaseLLMProvider
|
6 |
-
from src.llm.enums import DEFAULT_LLM_API_BASE
|
7 |
-
|
8 |
-
|
9 |
-
class OpenAILLM(BaseLLMProvider):
|
10 |
-
def __init__(
|
11 |
-
self,
|
12 |
-
model: str = "gpt-4o-mini",
|
13 |
-
temperature: float = 0.0,
|
14 |
-
base_url: str = DEFAULT_LLM_API_BASE,
|
15 |
-
):
|
16 |
-
"""Initiate OpenAI client"""
|
17 |
-
|
18 |
-
if base_url == DEFAULT_LLM_API_BASE:
|
19 |
-
self._client = OpenAI(
|
20 |
-
model=model,
|
21 |
-
temperature=temperature,
|
22 |
-
)
|
23 |
-
else:
|
24 |
-
self._client = OpenAI(
|
25 |
-
model=model, temperature=temperature, base_url=base_url
|
26 |
-
)
|
27 |
-
|
28 |
-
def complete(self, prompt: str = "") -> str:
|
29 |
-
return str(self._client.complete(prompt))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/main_test.py
DELETED
@@ -1,97 +0,0 @@
|
|
1 |
-
from dotenv import load_dotenv
|
2 |
-
from docx import Document
|
3 |
-
|
4 |
-
from src.llm.llm import get_llm
|
5 |
-
from src.service.resume_parser import ResumeParser
|
6 |
-
from src.service.emotion_recognition import EmotionRecognition
|
7 |
-
from src.service.utils import (
|
8 |
-
extract_audio,
|
9 |
-
audio2text,
|
10 |
-
sample_frames,
|
11 |
-
parse_yaml_string,
|
12 |
-
)
|
13 |
-
from src.template.grading_prompt import (
|
14 |
-
GRADE_RESPONSE_PROMPT,
|
15 |
-
RANKING_AND_FEEDBACK_PROMPT,
|
16 |
-
)
|
17 |
-
|
18 |
-
# sample input values
|
19 |
-
from src.sample_inputs import (
|
20 |
-
VIDEO_PATH,
|
21 |
-
RESUME_PATH,
|
22 |
-
INTERVIEW_QUESTION,
|
23 |
-
JOB_REQUIREMENTS,
|
24 |
-
)
|
25 |
-
|
26 |
-
|
27 |
-
# customise this part
|
28 |
-
LLM_CONFIG_FILE = "./src/configs/llm/openai-gpt-3.5-turbo.yaml"
|
29 |
-
# LLM_CONFIG_FILE = "./src/configs/llm/openai-gpt-4o-mini.yaml"
|
30 |
-
# LLM_CONFIG_FILE = "./src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml"
|
31 |
-
|
32 |
-
RESUME_PARSER_CONFIG_FILE = "./src/configs/parser/llamaparse_en.yaml"
|
33 |
-
OUTPUT_AUDIO_FILE = "/Users/gohyixian/Downloads/test_cases/outputs/audio_output.wav" # only supports .wav
|
34 |
-
OUTPUT_REPORT_FILE = "/Users/gohyixian/Downloads/test_cases/outputs/report.docx"
|
35 |
-
|
36 |
-
# init API keys as env variables
|
37 |
-
load_dotenv()
|
38 |
-
|
39 |
-
# init LLM & resume parser
|
40 |
-
llm = get_llm(LLM_CONFIG_FILE)
|
41 |
-
parser = ResumeParser(RESUME_PARSER_CONFIG_FILE)
|
42 |
-
|
43 |
-
|
44 |
-
# 1. extract audio from video
|
45 |
-
OUTPUT_AUDIO_FILE = extract_audio(VIDEO_PATH, OUTPUT_AUDIO_FILE)
|
46 |
-
assert OUTPUT_AUDIO_FILE is not None, f"Audio extraction failed."
|
47 |
-
|
48 |
-
# 2. audio to text
|
49 |
-
audio_text = audio2text(OUTPUT_AUDIO_FILE)
|
50 |
-
print(audio_text)
|
51 |
-
|
52 |
-
# 3. extract frames form video
|
53 |
-
frames = sample_frames(VIDEO_PATH, sample_rate=8)
|
54 |
-
print(frames)
|
55 |
-
|
56 |
-
# 4. deepface extract emotions & compite confidence scores
|
57 |
-
emotions = EmotionRecognition.detect_face_emotions(frames)
|
58 |
-
emotions_dict = EmotionRecognition.process_emotions(emotions)
|
59 |
-
conf_score = emotions_dict["conf"]
|
60 |
-
print(emotions_dict)
|
61 |
-
|
62 |
-
# 5. llamaparse parse resume into MD
|
63 |
-
resume_md = parser.parse_resume_to_markdown(RESUME_PATH)
|
64 |
-
print(resume_md)
|
65 |
-
|
66 |
-
# 6. llm grade question response
|
67 |
-
formatted_grading_prompt = GRADE_RESPONSE_PROMPT.format(
|
68 |
-
interview_question=INTERVIEW_QUESTION,
|
69 |
-
conf_score=conf_score,
|
70 |
-
response_text=audio_text,
|
71 |
-
)
|
72 |
-
grade = llm.complete(formatted_grading_prompt)
|
73 |
-
print(grade)
|
74 |
-
|
75 |
-
# 7. llm rank and output final feedback
|
76 |
-
formatted_ranking_prompt = RANKING_AND_FEEDBACK_PROMPT.format(
|
77 |
-
job_requirements=JOB_REQUIREMENTS, interview_feedback=grade, resume_text=resume_md
|
78 |
-
)
|
79 |
-
rank_and_feedback = llm.complete(formatted_ranking_prompt)
|
80 |
-
print(rank_and_feedback)
|
81 |
-
|
82 |
-
|
83 |
-
# 8. save to .docx report
|
84 |
-
expected_keys = ["name", "score", "feedback"]
|
85 |
-
rank_and_feedback_dict = parse_yaml_string(
|
86 |
-
yaml_string=rank_and_feedback, expected_keys=expected_keys, cleanup=True
|
87 |
-
)
|
88 |
-
print(rank_and_feedback_dict)
|
89 |
-
|
90 |
-
doc = Document()
|
91 |
-
doc.add_heading(f"{rank_and_feedback_dict['name']}", 0)
|
92 |
-
doc.add_heading(f"Overall Score: {rank_and_feedback_dict['score']}", 1)
|
93 |
-
doc.add_heading(f"Brief Overview", 1)
|
94 |
-
doc.add_paragraph(f"{rank_and_feedback_dict['feedback']}")
|
95 |
-
|
96 |
-
# Save the document
|
97 |
-
doc.save(OUTPUT_REPORT_FILE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/sample_inputs.py
DELETED
@@ -1,98 +0,0 @@
|
|
1 |
-
RESUME_PATH = "/Users/gohyixian/Downloads/test_cases/CV_2024_24_JUN.pdf"
|
2 |
-
|
3 |
-
VIDEO_PATH = "/Users/gohyixian/Downloads/test_cases/test.mp4"
|
4 |
-
|
5 |
-
INTERVIEW_QUESTION = """
|
6 |
-
Can you describe a project where you fine-tuned a transformer-based model (e.g., BERT, GPT, or T5) for a specific application?
|
7 |
-
Walk us through your approach to dataset preparation, model optimization, and deployment.
|
8 |
-
How did you handle challenges like ensuring the model's performance, scalability, and fairness?
|
9 |
-
"""
|
10 |
-
|
11 |
-
JOB_REQUIREMENTS = """
|
12 |
-
Job Title: LLM Engineer
|
13 |
-
|
14 |
-
Job Description:
|
15 |
-
################
|
16 |
-
- We are seeking a skilled and innovative LLM Engineer to join our AI team. The ideal candidate will
|
17 |
-
have hands-on experience in developing, fine-tuning, and deploying large language models (LLMs) for
|
18 |
-
various applications. You will collaborate with cross-functional teams to deliver cutting-edge AI
|
19 |
-
solutions, leveraging your expertise in natural language processing (NLP), deep learning, and
|
20 |
-
large-scale systems.
|
21 |
-
|
22 |
-
|
23 |
-
Key Responsibilities
|
24 |
-
####################
|
25 |
-
1. Model Development:
|
26 |
-
- Design and fine-tune large language models (e.g., GPT, LLaMA, or similar) for tasks like text generation,
|
27 |
-
summarization, question answering, and classification.
|
28 |
-
- Implement advanced techniques for model optimization, including pruning, quantization, and distillation.
|
29 |
-
|
30 |
-
2. Data Management:
|
31 |
-
- Curate, preprocess, and manage large datasets for training and evaluation.
|
32 |
-
- Ensure data quality by cleaning, augmenting, and annotating datasets.
|
33 |
-
|
34 |
-
3. Infrastructure & Deployment:
|
35 |
-
- Build scalable pipelines for training and deploying LLMs using frameworks like PyTorch, TensorFlow, or JAX.
|
36 |
-
- Optimize inference speed and memory usage for production-grade applications.
|
37 |
-
|
38 |
-
4. Model Evaluation:
|
39 |
-
- Develop benchmarks to evaluate model performance, fairness, and safety.
|
40 |
-
- Implement guardrails to mitigate bias and ensure ethical use of AI systems.
|
41 |
-
|
42 |
-
5. Collaboration:
|
43 |
-
- Work closely with product managers, data scientists, and software engineers to align model capabilities with business requirements.
|
44 |
-
- Provide mentorship to junior team members and contribute to knowledge sharing within the team.
|
45 |
-
|
46 |
-
6. Research & Innovation:
|
47 |
-
- Stay updated on the latest research in NLP and deep learning.
|
48 |
-
- Contribute to academic papers, patents, or open-source projects where appropriate.
|
49 |
-
|
50 |
-
|
51 |
-
Requirements
|
52 |
-
############
|
53 |
-
1. Technical Skills:
|
54 |
-
- Strong programming skills in Python.
|
55 |
-
- Proficiency with deep learning frameworks (e.g., PyTorch, TensorFlow, JAX).
|
56 |
-
- Experience in training and fine-tuning transformer-based models (e.g., BERT, GPT, T5).
|
57 |
-
- Familiarity with distributed training techniques and tools like Horovod or DeepSpeed.
|
58 |
-
- Knowledge of vector databases and retrieval-augmented generation (RAG) techniques.
|
59 |
-
- Hands-on experience with MLOps tools (e.g., MLflow, Docker, Kubernetes) for deployment.
|
60 |
-
- Expertise in working with APIs for integrating LLMs into production systems.
|
61 |
-
|
62 |
-
2. Educational Background:
|
63 |
-
- Bachelor’s or Master’s degree in Computer Science, Artificial Intelligence, Data Science, or a related field. Ph.D. preferred but not required.
|
64 |
-
|
65 |
-
3. Experience:
|
66 |
-
- 3+ years of experience in NLP, machine learning, or a related field.
|
67 |
-
- Demonstrated success in building and deploying LLM-powered applications.
|
68 |
-
- Contributions to open-source projects or research publications in NLP are a plus.
|
69 |
-
|
70 |
-
4. Soft Skills:
|
71 |
-
- Strong problem-solving abilities and attention to detail.
|
72 |
-
- Excellent communication and collaboration skills to work with cross-functional teams.
|
73 |
-
- Adaptable, with a passion for continuous learning and innovation.
|
74 |
-
- A proactive and goal-oriented mindset.
|
75 |
-
|
76 |
-
5. Target Personalities:
|
77 |
-
- Innovative Thinker: Always exploring new ways to improve model performance and usability.
|
78 |
-
- Team Player: Collaborates effectively across diverse teams to deliver AI solutions.
|
79 |
-
- Ethically Minded: Committed to ensuring the ethical and fair use of AI technologies.
|
80 |
-
- Detail-Oriented: Meticulous in coding, data handling, and model evaluation.
|
81 |
-
- Resilient Learner: Thrives in a fast-paced environment, keeping up with advancements in AI research.
|
82 |
-
|
83 |
-
|
84 |
-
Preferred Qualifications:
|
85 |
-
#########################
|
86 |
-
- Experience with foundation model APIs (e.g., OpenAI, Hugging Face).
|
87 |
-
- Knowledge of reinforcement learning techniques, particularly RLHF (Reinforcement Learning with Human Feedback).
|
88 |
-
- Familiarity with multi-modal LLMs and their integration.
|
89 |
-
- Experience working in cloud environments like AWS, Azure, or GCP.
|
90 |
-
- Contributions to community forums, blogs, or conferences related to LLMs or NLP.
|
91 |
-
|
92 |
-
What We Offer
|
93 |
-
#############
|
94 |
-
- Competitive salary and benefits package.
|
95 |
-
- Opportunities to work on groundbreaking AI projects.
|
96 |
-
- Flexible work environment, including remote options.
|
97 |
-
- Access to cutting-edge resources and infrastructure for AI development.
|
98 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/service/emotion_recognition.py
DELETED
@@ -1,136 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
from deepface import DeepFace
|
3 |
-
|
4 |
-
from src.domain.enums.emotion_types import EmotionType
|
5 |
-
|
6 |
-
|
7 |
-
class EmotionRecognition:
|
8 |
-
def __init__(self):
|
9 |
-
pass
|
10 |
-
|
11 |
-
@classmethod
|
12 |
-
def detect_face_emotions(cls, frames: list[np.ndarray] = None) -> list:
|
13 |
-
"""
|
14 |
-
Performs facial emotion detection using the DeepFace model
|
15 |
-
"""
|
16 |
-
emotions = []
|
17 |
-
for frame in frames:
|
18 |
-
frame_result = DeepFace.analyze(
|
19 |
-
frame, actions=["emotion"], enforce_detection=False
|
20 |
-
)
|
21 |
-
emotions.append(frame_result)
|
22 |
-
|
23 |
-
return emotions
|
24 |
-
|
25 |
-
@classmethod
|
26 |
-
def process_emotions(cls, emotions: list) -> dict:
|
27 |
-
"""
|
28 |
-
Processes the emotions by calculating the overall confidence score using a
|
29 |
-
custom weighted emotion balancing algorithm.
|
30 |
-
|
31 |
-
Returns:
|
32 |
-
- weighted normalized score
|
33 |
-
- signed, weighted normalized score
|
34 |
-
- confidence score
|
35 |
-
"""
|
36 |
-
|
37 |
-
count = 0
|
38 |
-
emots = {
|
39 |
-
str(EmotionType.SAD.value): 0,
|
40 |
-
str(EmotionType.FEAR.value): 0,
|
41 |
-
str(EmotionType.ANGRY.value): 0,
|
42 |
-
str(EmotionType.DISGUST.value): 0,
|
43 |
-
str(EmotionType.HAPPY.value): 0,
|
44 |
-
str(EmotionType.NEUTRAL.value): 0,
|
45 |
-
str(EmotionType.SURPRISE.value): 0,
|
46 |
-
}
|
47 |
-
|
48 |
-
for frame_result in emotions:
|
49 |
-
if len(frame_result) > 0:
|
50 |
-
emot = frame_result[0]["emotion"]
|
51 |
-
emots[str(EmotionType.SAD.value)] = (
|
52 |
-
emots.get(str(EmotionType.SAD.value), 0)
|
53 |
-
+ emot[str(EmotionType.SAD.value)]
|
54 |
-
)
|
55 |
-
emots[str(EmotionType.FEAR.value)] = (
|
56 |
-
emots.get(str(EmotionType.FEAR.value), 0)
|
57 |
-
+ emot[str(EmotionType.FEAR.value)]
|
58 |
-
)
|
59 |
-
emots[str(EmotionType.ANGRY.value)] = (
|
60 |
-
emots.get(str(EmotionType.ANGRY.value), 0)
|
61 |
-
+ emot[str(EmotionType.ANGRY.value)]
|
62 |
-
)
|
63 |
-
emots[str(EmotionType.DISGUST.value)] = (
|
64 |
-
emots.get(str(EmotionType.DISGUST.value), 0)
|
65 |
-
+ emot[str(EmotionType.DISGUST.value)]
|
66 |
-
)
|
67 |
-
emots[str(EmotionType.HAPPY.value)] = (
|
68 |
-
emots.get(str(EmotionType.HAPPY.value), 0)
|
69 |
-
+ emot[str(EmotionType.HAPPY.value)]
|
70 |
-
)
|
71 |
-
emots[str(EmotionType.NEUTRAL.value)] = (
|
72 |
-
emots.get(str(EmotionType.NEUTRAL.value), 0)
|
73 |
-
+ emot[str(EmotionType.NEUTRAL.value)]
|
74 |
-
)
|
75 |
-
emots[str(EmotionType.SURPRISE.value)] = (
|
76 |
-
emots.get(str(EmotionType.SURPRISE.value), 0)
|
77 |
-
+ emot[str(EmotionType.SURPRISE.value)]
|
78 |
-
)
|
79 |
-
count += 1
|
80 |
-
|
81 |
-
# prevent zero division
|
82 |
-
if count == 0:
|
83 |
-
count = 1
|
84 |
-
|
85 |
-
for i in list(emots.keys()):
|
86 |
-
emots[i] /= count * 100
|
87 |
-
|
88 |
-
# refactor according to custom weightage
|
89 |
-
sad_score = emots[str(EmotionType.SAD.value)] * 1.3
|
90 |
-
fear_score = emots[str(EmotionType.FEAR.value)] * 1.3
|
91 |
-
angry_score = emots[str(EmotionType.ANGRY.value)] * 1.3
|
92 |
-
disgust_score = emots[str(EmotionType.DISGUST.value)] * 10
|
93 |
-
happy_score = emots[str(EmotionType.HAPPY.value)] * 1.7
|
94 |
-
neutral_score = emots[str(EmotionType.NEUTRAL.value)] / 1.2
|
95 |
-
surprise_score = emots[str(EmotionType.SURPRISE.value)] * 1.4
|
96 |
-
|
97 |
-
score_list = [
|
98 |
-
sad_score,
|
99 |
-
angry_score,
|
100 |
-
surprise_score,
|
101 |
-
fear_score,
|
102 |
-
happy_score,
|
103 |
-
disgust_score,
|
104 |
-
neutral_score,
|
105 |
-
]
|
106 |
-
normalized_scores = cls.__normalize_scores(score_list)
|
107 |
-
mean = np.mean(normalized_scores)
|
108 |
-
|
109 |
-
result_scores = [
|
110 |
-
(-sad_score),
|
111 |
-
(-angry_score),
|
112 |
-
surprise_score,
|
113 |
-
(-fear_score),
|
114 |
-
happy_score,
|
115 |
-
(-disgust_score),
|
116 |
-
neutral_score,
|
117 |
-
]
|
118 |
-
normalized_result_scores = cls.__normalize_scores(result_scores)
|
119 |
-
result = np.mean(normalized_result_scores)
|
120 |
-
|
121 |
-
difference = abs((mean - result) / mean) * 100
|
122 |
-
|
123 |
-
# keep values in range of [0, 100]
|
124 |
-
difference = min(difference, 50)
|
125 |
-
|
126 |
-
if mean > result:
|
127 |
-
conf = 50 - difference
|
128 |
-
else:
|
129 |
-
conf = 50 + difference
|
130 |
-
|
131 |
-
return {"mean": mean, "result": result, "conf": conf}
|
132 |
-
|
133 |
-
@classmethod
|
134 |
-
def __normalize_scores(cls, scores: list) -> list:
|
135 |
-
min_val, max_val = min(scores), max(scores)
|
136 |
-
return [(score - min_val) / (max_val - min_val) for score in scores]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/service/resume_parser.py
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
import yaml
|
2 |
-
from llama_parse import LlamaParse
|
3 |
-
from llama_index.core import SimpleDirectoryReader
|
4 |
-
|
5 |
-
from src.template.parser_prompt import PARSE_RESUME_PROMPT
|
6 |
-
|
7 |
-
|
8 |
-
class ResumeParser:
|
9 |
-
def __init__(self, config_file_path: str = "config.yaml"):
|
10 |
-
"""
|
11 |
-
Initiates a resume parser client
|
12 |
-
"""
|
13 |
-
|
14 |
-
# load config
|
15 |
-
with open(config_file_path, "r") as f:
|
16 |
-
config = yaml.safe_load(f)
|
17 |
-
|
18 |
-
# set bbox size
|
19 |
-
bbox_margin = config["PAGE_ROC_BBOX"]
|
20 |
-
bbox = f"{bbox_margin['TOP']},{bbox_margin['RIGHT']},{bbox_margin['BOTTOM']},{bbox_margin['LEFT']}"
|
21 |
-
|
22 |
-
self._parser = LlamaParse(
|
23 |
-
language=config["LANGUAGE"],
|
24 |
-
disable_ocr=config["DISABLE_OCR"],
|
25 |
-
bounding_box=bbox,
|
26 |
-
result_type="markdown",
|
27 |
-
parsing_instruction=PARSE_RESUME_PROMPT,
|
28 |
-
is_formatting_instruction=False,
|
29 |
-
)
|
30 |
-
|
31 |
-
def parse_resume_to_markdown(self, resume_path: str = "") -> str:
|
32 |
-
"""
|
33 |
-
Parses the resume into markdown text.
|
34 |
-
|
35 |
-
Supported filetypes:
|
36 |
-
- .pdf
|
37 |
-
"""
|
38 |
-
document = SimpleDirectoryReader(
|
39 |
-
input_files=[resume_path], file_extractor={".pdf": self._parser}
|
40 |
-
).load_data()
|
41 |
-
|
42 |
-
return "\n".join([str(d.text) for d in document])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/service/utils.py
DELETED
@@ -1,103 +0,0 @@
|
|
1 |
-
import cv2
|
2 |
-
import yaml
|
3 |
-
import numpy as np
|
4 |
-
from pathlib import Path
|
5 |
-
import speech_recognition as sr
|
6 |
-
from moviepy import VideoFileClip
|
7 |
-
|
8 |
-
|
9 |
-
def extract_audio(
|
10 |
-
input_video_file: str = "",
|
11 |
-
output_audio_file: str = "",
|
12 |
-
) -> str:
|
13 |
-
"""
|
14 |
-
Extracts audio from input video file, and save it to the respective path.
|
15 |
-
Returns the path to the saved audio file if extraction is successful.
|
16 |
-
Supported input video file formats are:
|
17 |
-
- .mp4
|
18 |
-
- .mov
|
19 |
-
|
20 |
-
Supported output audio file formats are:
|
21 |
-
- .wav
|
22 |
-
"""
|
23 |
-
try:
|
24 |
-
input_video_file = str(Path(input_video_file))
|
25 |
-
output_audio_file = str(Path(output_audio_file))
|
26 |
-
|
27 |
-
# Load the video file
|
28 |
-
video = VideoFileClip(input_video_file)
|
29 |
-
|
30 |
-
# Extract audio and write to output file
|
31 |
-
video.audio.write_audiofile(output_audio_file)
|
32 |
-
|
33 |
-
print(f"[extract_audio()] : Audio extracted and saved to {output_audio_file}")
|
34 |
-
|
35 |
-
return output_audio_file
|
36 |
-
except Exception as e:
|
37 |
-
print(e)
|
38 |
-
return None
|
39 |
-
|
40 |
-
|
41 |
-
def audio2text(audio_file: str = "") -> str:
|
42 |
-
"""
|
43 |
-
Converts audio to text using Google's text-to-audio engine (Local),
|
44 |
-
and returns the text.
|
45 |
-
"""
|
46 |
-
r = sr.Recognizer()
|
47 |
-
with sr.AudioFile(audio_file) as source:
|
48 |
-
audio = r.record(source)
|
49 |
-
text = r.recognize_google(audio)
|
50 |
-
return text
|
51 |
-
|
52 |
-
|
53 |
-
def sample_frames(input_video_file: str = "", sample_rate: int = 2) -> list[np.ndarray]:
|
54 |
-
"""
|
55 |
-
Samples one frame every 'sample_rate' frames from the video file and returns
|
56 |
-
them in the form of a list of Numpy ndarray objects.
|
57 |
-
"""
|
58 |
-
cap = cv2.VideoCapture(input_video_file)
|
59 |
-
frames = []
|
60 |
-
count = 0
|
61 |
-
|
62 |
-
while cap.isOpened():
|
63 |
-
ret, frame = cap.read()
|
64 |
-
if not ret:
|
65 |
-
break
|
66 |
-
if count % sample_rate == 0:
|
67 |
-
frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
68 |
-
count += 1
|
69 |
-
cap.release()
|
70 |
-
|
71 |
-
return frames
|
72 |
-
|
73 |
-
|
74 |
-
def parse_yaml_string(
|
75 |
-
yaml_string: str = "", expected_keys: list[str] = None, cleanup: bool = True
|
76 |
-
) -> dict:
|
77 |
-
"""
|
78 |
-
Parses a YAML string into a Python dictionary based on a list of
|
79 |
-
expected keys.
|
80 |
-
"""
|
81 |
-
|
82 |
-
# removes ```YAML ``` heading and footers if present
|
83 |
-
if cleanup:
|
84 |
-
yaml_string = yaml_string.replace("YAML", "")
|
85 |
-
yaml_string = yaml_string.replace("yaml", "")
|
86 |
-
yaml_string = yaml_string.replace("`", "")
|
87 |
-
|
88 |
-
try:
|
89 |
-
parsed_data = yaml.safe_load(yaml_string)
|
90 |
-
|
91 |
-
# Handle missing keys with error handling
|
92 |
-
result = {}
|
93 |
-
for key in expected_keys:
|
94 |
-
if key in parsed_data:
|
95 |
-
result[key] = parsed_data[key]
|
96 |
-
else:
|
97 |
-
print(f"[parse_yaml_string()] : Missing key {key}")
|
98 |
-
|
99 |
-
return result
|
100 |
-
|
101 |
-
except KeyError as e:
|
102 |
-
print(e)
|
103 |
-
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/template/grading_prompt.py
DELETED
@@ -1,111 +0,0 @@
|
|
1 |
-
from llama_index.core.prompts import PromptTemplate
|
2 |
-
|
3 |
-
GRADE_RESPONSE_PROMPT = PromptTemplate(
|
4 |
-
"""
|
5 |
-
You are a Human Resource Manager and an interviewer.
|
6 |
-
Your task is to review an interviewee's overall performance based on multiple factors.
|
7 |
-
You will be provided with the interview question, the interviewee's facial confidence score, their response to the question in text form, and additional context on the interview.
|
8 |
-
|
9 |
-
The confidence score will range from 0 to 100, and you will also receive the text of their answers to the interview question.
|
10 |
-
Based on this information, evaluate the interviewee’s performance in the following areas:
|
11 |
-
|
12 |
-
1. **Answer Quality**:
|
13 |
-
Assess the clarity, relevance, and accuracy of their response to the interview question.
|
14 |
-
Did the interviewee address the key points effectively?
|
15 |
-
|
16 |
-
2. **Problem-Solving Skills**:
|
17 |
-
Evaluate how well the interviewee tackled any problem presented in the interview question.
|
18 |
-
Were they able to think critically, analyze the situation, and propose solutions?
|
19 |
-
|
20 |
-
3. **Confidence**:
|
21 |
-
Based on their facial confidence score (0 to 100) and their overall demeanor in the response, rate their confidence level and how it impacts their presentation and communication.
|
22 |
-
|
23 |
-
4. **Personality**:
|
24 |
-
Consider the tone, communication style, and interpersonal skills of the interviewee.
|
25 |
-
How well did they engage with the question and the interview process?
|
26 |
-
Do they demonstrate qualities like openness, empathy, or assertiveness?
|
27 |
-
|
28 |
-
5. **Overall Performance**:
|
29 |
-
Based on the combination of the above factors, provide a holistic evaluation of their performance in the interview.
|
30 |
-
Offer feedback on strengths and areas for improvement.
|
31 |
-
|
32 |
-
Ensure that your feedback is clear and actionable, so other HR professionals reviewing the interview can easily assess the interviewee's suitability for the position.
|
33 |
-
|
34 |
-
|
35 |
-
########################################
|
36 |
-
Interview Question:
|
37 |
-
{interview_question}
|
38 |
-
|
39 |
-
########################################
|
40 |
-
Interviewee's Facial Confidence Score:
|
41 |
-
{conf_score}
|
42 |
-
|
43 |
-
########################################
|
44 |
-
Interviewee's response in text:
|
45 |
-
{response_text}
|
46 |
-
|
47 |
-
########################################
|
48 |
-
output:
|
49 |
-
"""
|
50 |
-
)
|
51 |
-
|
52 |
-
|
53 |
-
RANKING_AND_FEEDBACK_PROMPT = PromptTemplate(
|
54 |
-
"""
|
55 |
-
You are an HR specialist evaluating an interviewee for a specific role.
|
56 |
-
Your task is to assess the suitability of the interviewee based on the following information:
|
57 |
-
|
58 |
-
1. **Job Requirements**:
|
59 |
-
A list of skills, experiences, and qualifications required for the role.
|
60 |
-
|
61 |
-
2. **Interview Feedback**:
|
62 |
-
The feedback and review of the interviewee’s performance in the interview, which includes assessments on their answer quality, problem-solving skills, confidence, personality, and overall performance.
|
63 |
-
|
64 |
-
3. **Resume Text**:
|
65 |
-
A parsed version of the interviewee's resume, which includes their work experience, skills, education, and other relevant information.
|
66 |
-
|
67 |
-
Using these inputs, generate an output strictly in the following YAML format:
|
68 |
-
|
69 |
-
###########################
|
70 |
-
name: <name>
|
71 |
-
score: <score>
|
72 |
-
feedback: <feedback text>
|
73 |
-
###########################
|
74 |
-
|
75 |
-
|
76 |
-
Details for the output:
|
77 |
-
1. **name**:
|
78 |
-
Name of the interviewee.
|
79 |
-
|
80 |
-
2. **score**:
|
81 |
-
A score ranging from 0 to 100, where 0 means the interviewee is not recommended for the position, and 100 means they are a perfect match for the job.
|
82 |
-
|
83 |
-
3. **feedback**:
|
84 |
-
- A detailed breakdown explaining how the interviewee’s experience, skills, and performance align or do not align with the job requirements.
|
85 |
-
- Discuss whether the interviewee’s skills, experiences, and overall traits match or fail to meet the required qualifications.
|
86 |
-
- Provide a short, concise sentence summarizing the interviewee’s suitability for the role.
|
87 |
-
|
88 |
-
Ensure that the feedback is comprehensive yet concise, offering actionable insights for HR professionals to make a decision about the interviewee’s fit for the role.
|
89 |
-
|
90 |
-
|
91 |
-
########################################
|
92 |
-
Job Requirements:
|
93 |
-
{job_requirements}
|
94 |
-
|
95 |
-
########################################
|
96 |
-
Interview Feedback:
|
97 |
-
{interview_feedback}
|
98 |
-
|
99 |
-
########################################
|
100 |
-
Resume Text:
|
101 |
-
{resume_text}
|
102 |
-
|
103 |
-
########################################
|
104 |
-
|
105 |
-
Output strictly following the below YAML format:
|
106 |
-
|
107 |
-
name: <name>
|
108 |
-
score: <score>
|
109 |
-
feedback: <feedback text>
|
110 |
-
"""
|
111 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/template/parser_prompt.py
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
from llama_index.core.prompts import PromptTemplate
|
2 |
-
|
3 |
-
PARSE_RESUME_PROMPT = """
|
4 |
-
You are tasked with parsing a resume.
|
5 |
-
|
6 |
-
**Your Focus**:
|
7 |
-
- Reproduce only the main body text, including section headers and bullet points, exactly as received.
|
8 |
-
- Do not skip section numbers in the format DIGIT.DIGIT (e.g., 10.1, 3.1), you must apply a markdown header level based on the depth (e.g., # for main sections, ## for subsections) to reflect the appropriate hierarchy, and output them.
|
9 |
-
- Do make sure that section numbers are always followed by the corresponding section title without a '\n' character in between or separating them into different headers. Valid examples are as below:
|
10 |
-
- '# 14 Experience'
|
11 |
-
- '# 2 Education'
|
12 |
-
Invalid examples are as below:
|
13 |
-
- '# 14\n # Experience'
|
14 |
-
- '# 2\n # Education'
|
15 |
-
- You may only add markdown header symbols (#, ##, ###, etc.) to denote the hierarchical levels of section headers.
|
16 |
-
- Do not make up any text and headers that are not present in the original text.
|
17 |
-
|
18 |
-
**Expected Output**:
|
19 |
-
- Text, section headers, and bullet points must be reproduced without any text edits, additions, or deletions, other than adding markdown header symbols (#, ##, ###, etc.).
|
20 |
-
- Use markdown headers to denote additional hierarchy (e.g., # for main sections, ## for subsections) based on the best interpretation of the document’s structure.
|
21 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|