RainPoo commited on
Commit
50406c0
·
verified ·
1 Parent(s): d1a02df

Delete src

Browse files
src/app.py DELETED
@@ -1,210 +0,0 @@
1
- import gradio as gr
2
- from typing import Dict
3
- import pandas as pd
4
-
5
- # from src.application.services import InterviewAnalyzer
6
- # from src.infrastructure.llm import LangchainService
7
- # from src.infrastructure.emotion import DeepFaceService
8
- # from src.infrastructure.speech import GoogleSpeechService
9
-
10
-
11
- # class GradioInterface:
12
- # def __init__(self):
13
- # # Initialize services
14
- # self.emotion_service = DeepFaceService()
15
- # self.speech_service = GoogleSpeechService()
16
- # self.llm_service = LangchainService()
17
- #
18
- # # Initialize analyzer
19
- # self.analyzer = InterviewAnalyzer(
20
- # emotion_service=self.emotion_service,
21
- # speech_service=self.speech_service,
22
- # llm_service=self.llm_service,
23
- # )
24
- #
25
- # def create_interface(self) -> gr.Interface:
26
- # def process_submission(
27
- # video_file: str, resume_file: str, job_requirements: str
28
- # ) -> Dict:
29
- # # Implementation for processing submission
30
- # pass
31
- #
32
- # # Create Gradio interface
33
- # interface = gr.Interface(
34
- # fn=process_submission,
35
- # inputs=[
36
- # gr.Video(label="Interview Recording"),
37
- # gr.File(label="Resume"),
38
- # gr.Textbox(label="Job Requirements", lines=5),
39
- # ],
40
- # outputs=gr.JSON(label="Analysis Results"),
41
- # title="HR Interview Analysis System",
42
- # description="Upload interview recording and resume to analyze candidate performance",
43
- # )
44
- #
45
- # return interface
46
-
47
-
48
- # Testing to setup the simple interface
49
- class GradioInterface:
50
- def __init__(self):
51
- # DataFrame to List All Users' Feedbacks
52
- self.candidate_feedback = pd.DataFrame(columns=["Name", "Score", "Feedback"])
53
-
54
- def validate_file_format(self, file_path: str, valid_extensions: list) -> bool:
55
- return isinstance(file_path, str) and any(
56
- file_path.endswith(ext) for ext in valid_extensions
57
- )
58
-
59
- def process_video(self, video_path: str) -> str:
60
- # Process transcript from the video
61
- return "### Transcript\nExample of transcript of the interview video."
62
-
63
- def process_resume(self, resume_path: str) -> str:
64
- # Resume Parsing
65
- return "### Resume Analysis\n- **Skills**: NLP, Machine Learning, Computer Vision\n- **Experience**: 5 years."
66
-
67
- def analyze_emotions(self, video_path: str) -> str:
68
- # Emotion Analysis
69
- return "### Emotion Analysis\n- **Overall Emotion**: Positive\n- **Details**: Candidate displayed confidence and engagement."
70
-
71
- def get_feedback(self, name: str, score: int, feedback: str) -> pd.DataFrame:
72
- return pd.DataFrame({"Name": [name], "Score": [score], "Feedback": [feedback]})
73
-
74
- def save_report(self):
75
- # Save report
76
- report_path = "report_path.docx"
77
- with open(report_path, "w") as f:
78
- # Pass fields to include in report here
79
- f.write("Example report")
80
- return report_path
81
-
82
- def create_interface(self) -> gr.Blocks:
83
- def process_submission(
84
- video_path, resume_path, interview_questions, job_requirements
85
- ):
86
- # Validate inputs and formats
87
- if not video_path:
88
- return (
89
- "Please upload an interview video.",
90
- None,
91
- None,
92
- self.candidate_feedback,
93
- )
94
- if not resume_path:
95
- return (
96
- "Please upload a resume (PDF).",
97
- None,
98
- None,
99
- self.candidate_feedback,
100
- )
101
- if not interview_questions:
102
- return (
103
- "Please provide interview questions.",
104
- None,
105
- None,
106
- self.candidate_feedback,
107
- )
108
- if not job_requirements:
109
- return (
110
- "Please provide job requirements.",
111
- None,
112
- None,
113
- self.candidate_feedback,
114
- )
115
- if not self.validate_file_format(video_path, [".mp4", ".avi", ".mkv"]):
116
- return "Invalid video format.", None, None, self.candidate_feedback
117
- if not self.validate_file_format(resume_path, [".pdf"]):
118
- return (
119
- "Please submit resume in PDF format.",
120
- None,
121
- None,
122
- self.candidate_feedback,
123
- )
124
-
125
- # Mock outputs for this submission
126
- video_transcript = self.process_video(video_path)
127
- emotion_analysis = self.analyze_emotions(video_path)
128
- resume_analysis = self.process_resume(resume_path)
129
- # Example of Feedback
130
- feedback_list = self.get_feedback(
131
- name="Johnson",
132
- score=88,
133
- feedback="Outstanding technical and soft skills.",
134
- )
135
- # Append the new candidate feedback to the DataFrame
136
- self.candidate_feedback = pd.concat(
137
- [self.candidate_feedback, feedback_list], ignore_index=True
138
- )
139
-
140
- # Return both the individual result and the list result
141
- return (
142
- video_transcript,
143
- emotion_analysis,
144
- resume_analysis,
145
- self.candidate_feedback,
146
- )
147
-
148
- # Build the interface using Blocks
149
- with gr.Blocks() as demo:
150
- gr.Markdown("## HR Interview Analysis System")
151
-
152
- # Inputs section
153
- with gr.Row():
154
- video_input = gr.Video(label="Upload Interview Video")
155
- resume_input = gr.File(label="Upload Resume (PDF)")
156
- with gr.Row():
157
- question_input = gr.Textbox(
158
- label="Interview Questions",
159
- lines=5,
160
- placeholder="Enter the interview question here",
161
- )
162
- requirements_input = gr.Textbox(
163
- label="Job Requirements",
164
- lines=5,
165
- placeholder="Enter the job requirements here",
166
- )
167
-
168
- submit_button = gr.Button("Submit")
169
-
170
- with gr.Tabs():
171
- with gr.Tab("Result"):
172
- transcript_output = gr.Markdown(label="Video Transcript")
173
- emotion_output = gr.Markdown(label="Emotion Analysis")
174
- resume_output = gr.Markdown(label="Resume Analysis")
175
-
176
- with gr.Tab("List of Candidates"):
177
- feedback_output = gr.Dataframe(
178
- label="Candidate Feedback Lists", interactive=False
179
- )
180
-
181
- save_button = gr.Button("Save Report")
182
- save_button.click(
183
- fn=self.save_report,
184
- inputs=[],
185
- outputs=gr.File(label="Download Report"),
186
- )
187
- # Connect the button to the function
188
- submit_button.click(
189
- fn=process_submission,
190
- inputs=[video_input, resume_input, question_input, requirements_input],
191
- outputs=[
192
- transcript_output,
193
- emotion_output,
194
- resume_output,
195
- feedback_output,
196
- ],
197
- )
198
-
199
- return demo
200
-
201
-
202
- def launch_app():
203
- print(gr.__version__)
204
- app = GradioInterface()
205
- interface = app.create_interface()
206
- interface.launch()
207
-
208
-
209
- if __name__ == "__main__":
210
- launch_app()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml DELETED
@@ -1,4 +0,0 @@
1
- PROVIDER: nvidia
2
- BASE_URL: https://integrate.api.nvidia.com/v1
3
- MODEL: nvidia/llama-3.1-nemotron-70b-instruct
4
- TEMPERATURE: 0
 
 
 
 
 
src/configs/llm/openai-gpt-3.5-turbo.yaml DELETED
@@ -1,4 +0,0 @@
1
- PROVIDER: openai
2
- BASE_URL: default
3
- MODEL: gpt-3.5-turbo
4
- TEMPERATURE: 0
 
 
 
 
 
src/configs/llm/openai-gpt-4o-mini.yaml DELETED
@@ -1,4 +0,0 @@
1
- PROVIDER: openai
2
- BASE_URL: default
3
- MODEL: gpt-4o-mini
4
- TEMPERATURE: 0
 
 
 
 
 
src/configs/parser/llamaparse_en.yaml DELETED
@@ -1,7 +0,0 @@
1
- LANGUAGE: en
2
- DISABLE_OCR: false
3
- PAGE_ROC_BBOX:
4
- TOP: 0
5
- RIGHT: 0
6
- BOTTOM: 0
7
- LEFT: 0
 
 
 
 
 
 
 
 
src/domain/candidate.py DELETED
@@ -1,13 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Dict, List
3
-
4
-
5
- @dataclass
6
- class Candidate:
7
- id: str
8
- name: str
9
- email: str
10
- resume_data: Dict
11
- interview_responses: List[str]
12
- emotional_metrics: Dict
13
- feedback: Dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/domain/enums/emotion_types.py DELETED
@@ -1,21 +0,0 @@
1
- from enum import Enum
2
-
3
-
4
- class EmotionType(Enum):
5
-
6
- SAD = "sad"
7
- FEAR = "fear"
8
- ANGRY = "angry"
9
- DISGUST = "disgust"
10
-
11
- HAPPY = "happy"
12
- NEUTRAL = "neutral"
13
- SURPRISE = "surprise"
14
-
15
- @classmethod
16
- def get_positive_emotions(cls):
17
- return [cls.HAPPY, cls.NEUTRAL, cls.SURPRISE]
18
-
19
- @classmethod
20
- def get_negative_emotions(cls):
21
- return [cls.SAD, cls.FEAR, cls.ANGRY, cls.DISGUST]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/domain/enums/interview_status.py DELETED
@@ -1,11 +0,0 @@
1
- from enum import Enum, auto
2
-
3
-
4
- class InterviewStatus(Enum):
5
- SCHEDULED = auto()
6
- IN_PROGRESS = auto()
7
- COMPLETED = auto()
8
- CANCELLED = auto()
9
- PENDING_REVIEW = auto()
10
- REVIEWED = auto()
11
- FAILED = auto()
 
 
 
 
 
 
 
 
 
 
 
 
src/domain/interview.py DELETED
@@ -1,28 +0,0 @@
1
- from dataclasses import dataclass
2
- from datetime import datetime
3
- from typing import List, Dict
4
- from src.domain.enums.interview_status import InterviewStatus
5
- from src.domain.enums.emotion_types import EmotionType
6
-
7
-
8
- @dataclass
9
- class Interview:
10
- id: str
11
- candidate_id: str
12
- job_id: str
13
- video_path: str
14
- status: InterviewStatus
15
- questions: List[str]
16
- responses_transcription: List[str]
17
- timestamp: datetime
18
- duration: int
19
- emotional_analysis: Dict[EmotionType, float]
20
-
21
- def is_completed(self) -> bool:
22
- return self.status == InterviewStatus.COMPLETED
23
-
24
- def is_reviewable(self) -> bool:
25
- return self.status in [
26
- InterviewStatus.COMPLETED,
27
- InterviewStatus.PENDING_REVIEW,
28
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/llm/base_llm_provider.py DELETED
@@ -1,16 +0,0 @@
1
- """Base class for LLM providers"""
2
-
3
- from abc import abstractmethod
4
- from typing import Dict, Optional
5
-
6
-
7
- class BaseLLMProvider:
8
- @abstractmethod
9
- def __init__(self):
10
- """LLM provider initialization"""
11
- raise NotImplementedError
12
-
13
- @abstractmethod
14
- def complete(self, prompt: str = "") -> str:
15
- """LLM chat completion implementation by each provider"""
16
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/llm/enums.py DELETED
@@ -1,3 +0,0 @@
1
- OPENAI_LLM = "openai"
2
- NVIDIA_LLM = "nvidia"
3
- DEFAULT_LLM_API_BASE = "default"
 
 
 
 
src/llm/llm.py DELETED
@@ -1,32 +0,0 @@
1
- import yaml
2
-
3
- from src.llm.enums import OPENAI_LLM, NVIDIA_LLM
4
- from src.llm.base_llm_provider import BaseLLMProvider
5
- from src.llm.openai_llm import OpenAILLM
6
- from src.llm.nvidia_llm import NvidiaLLM
7
-
8
-
9
- def get_llm(config_file_path: str = "config.yaml") -> BaseLLMProvider:
10
- """
11
- Initiates LLM client from config file
12
- """
13
-
14
- # load config
15
- with open(config_file_path, "r") as f:
16
- config = yaml.safe_load(f)
17
-
18
- # init & return llm
19
- if config["PROVIDER"] == OPENAI_LLM:
20
- return OpenAILLM(
21
- model=config["MODEL"],
22
- temperature=config["TEMPERATURE"],
23
- base_url=config["BASE_URL"],
24
- )
25
- elif config["PROVIDER"] == NVIDIA_LLM:
26
- return NvidiaLLM(
27
- model=config["MODEL"],
28
- temperature=config["TEMPERATURE"],
29
- base_url=config["BASE_URL"],
30
- )
31
- else:
32
- raise ValueError(config["MODEL"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/llm/nvidia_llm.py DELETED
@@ -1,29 +0,0 @@
1
- """NVIDIA LLM Implementation"""
2
-
3
- from llama_index.llms.nvidia import NVIDIA
4
-
5
- from src.llm.base_llm_provider import BaseLLMProvider
6
- from src.llm.enums import DEFAULT_LLM_API_BASE
7
-
8
-
9
- class NvidiaLLM(BaseLLMProvider):
10
- def __init__(
11
- self,
12
- model: str = "nvidia/llama-3.1-nemotron-70b-instruct",
13
- temperature: float = 0.0,
14
- base_url: str = "https://integrate.api.nvidia.com/v1",
15
- ):
16
- """Initiate NVIDIA client"""
17
-
18
- if base_url == DEFAULT_LLM_API_BASE:
19
- self._client = NVIDIA(
20
- model=model,
21
- temperature=temperature,
22
- )
23
- else:
24
- self._client = NVIDIA(
25
- model=model, temperature=temperature, base_url=base_url
26
- )
27
-
28
- def complete(self, prompt: str = "") -> str:
29
- return str(self._client.complete(prompt))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/llm/openai_llm.py DELETED
@@ -1,29 +0,0 @@
1
- """OpenAI LLM Implementation"""
2
-
3
- from llama_index.llms.openai import OpenAI
4
-
5
- from src.llm.base_llm_provider import BaseLLMProvider
6
- from src.llm.enums import DEFAULT_LLM_API_BASE
7
-
8
-
9
- class OpenAILLM(BaseLLMProvider):
10
- def __init__(
11
- self,
12
- model: str = "gpt-4o-mini",
13
- temperature: float = 0.0,
14
- base_url: str = DEFAULT_LLM_API_BASE,
15
- ):
16
- """Initiate OpenAI client"""
17
-
18
- if base_url == DEFAULT_LLM_API_BASE:
19
- self._client = OpenAI(
20
- model=model,
21
- temperature=temperature,
22
- )
23
- else:
24
- self._client = OpenAI(
25
- model=model, temperature=temperature, base_url=base_url
26
- )
27
-
28
- def complete(self, prompt: str = "") -> str:
29
- return str(self._client.complete(prompt))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/main_test.py DELETED
@@ -1,97 +0,0 @@
1
- from dotenv import load_dotenv
2
- from docx import Document
3
-
4
- from src.llm.llm import get_llm
5
- from src.service.resume_parser import ResumeParser
6
- from src.service.emotion_recognition import EmotionRecognition
7
- from src.service.utils import (
8
- extract_audio,
9
- audio2text,
10
- sample_frames,
11
- parse_yaml_string,
12
- )
13
- from src.template.grading_prompt import (
14
- GRADE_RESPONSE_PROMPT,
15
- RANKING_AND_FEEDBACK_PROMPT,
16
- )
17
-
18
- # sample input values
19
- from src.sample_inputs import (
20
- VIDEO_PATH,
21
- RESUME_PATH,
22
- INTERVIEW_QUESTION,
23
- JOB_REQUIREMENTS,
24
- )
25
-
26
-
27
- # customise this part
28
- LLM_CONFIG_FILE = "./src/configs/llm/openai-gpt-3.5-turbo.yaml"
29
- # LLM_CONFIG_FILE = "./src/configs/llm/openai-gpt-4o-mini.yaml"
30
- # LLM_CONFIG_FILE = "./src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml"
31
-
32
- RESUME_PARSER_CONFIG_FILE = "./src/configs/parser/llamaparse_en.yaml"
33
- OUTPUT_AUDIO_FILE = "/Users/gohyixian/Downloads/test_cases/outputs/audio_output.wav" # only supports .wav
34
- OUTPUT_REPORT_FILE = "/Users/gohyixian/Downloads/test_cases/outputs/report.docx"
35
-
36
- # init API keys as env variables
37
- load_dotenv()
38
-
39
- # init LLM & resume parser
40
- llm = get_llm(LLM_CONFIG_FILE)
41
- parser = ResumeParser(RESUME_PARSER_CONFIG_FILE)
42
-
43
-
44
- # 1. extract audio from video
45
- OUTPUT_AUDIO_FILE = extract_audio(VIDEO_PATH, OUTPUT_AUDIO_FILE)
46
- assert OUTPUT_AUDIO_FILE is not None, f"Audio extraction failed."
47
-
48
- # 2. audio to text
49
- audio_text = audio2text(OUTPUT_AUDIO_FILE)
50
- print(audio_text)
51
-
52
- # 3. extract frames form video
53
- frames = sample_frames(VIDEO_PATH, sample_rate=8)
54
- print(frames)
55
-
56
- # 4. deepface extract emotions & compite confidence scores
57
- emotions = EmotionRecognition.detect_face_emotions(frames)
58
- emotions_dict = EmotionRecognition.process_emotions(emotions)
59
- conf_score = emotions_dict["conf"]
60
- print(emotions_dict)
61
-
62
- # 5. llamaparse parse resume into MD
63
- resume_md = parser.parse_resume_to_markdown(RESUME_PATH)
64
- print(resume_md)
65
-
66
- # 6. llm grade question response
67
- formatted_grading_prompt = GRADE_RESPONSE_PROMPT.format(
68
- interview_question=INTERVIEW_QUESTION,
69
- conf_score=conf_score,
70
- response_text=audio_text,
71
- )
72
- grade = llm.complete(formatted_grading_prompt)
73
- print(grade)
74
-
75
- # 7. llm rank and output final feedback
76
- formatted_ranking_prompt = RANKING_AND_FEEDBACK_PROMPT.format(
77
- job_requirements=JOB_REQUIREMENTS, interview_feedback=grade, resume_text=resume_md
78
- )
79
- rank_and_feedback = llm.complete(formatted_ranking_prompt)
80
- print(rank_and_feedback)
81
-
82
-
83
- # 8. save to .docx report
84
- expected_keys = ["name", "score", "feedback"]
85
- rank_and_feedback_dict = parse_yaml_string(
86
- yaml_string=rank_and_feedback, expected_keys=expected_keys, cleanup=True
87
- )
88
- print(rank_and_feedback_dict)
89
-
90
- doc = Document()
91
- doc.add_heading(f"{rank_and_feedback_dict['name']}", 0)
92
- doc.add_heading(f"Overall Score: {rank_and_feedback_dict['score']}", 1)
93
- doc.add_heading(f"Brief Overview", 1)
94
- doc.add_paragraph(f"{rank_and_feedback_dict['feedback']}")
95
-
96
- # Save the document
97
- doc.save(OUTPUT_REPORT_FILE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/sample_inputs.py DELETED
@@ -1,98 +0,0 @@
1
- RESUME_PATH = "/Users/gohyixian/Downloads/test_cases/CV_2024_24_JUN.pdf"
2
-
3
- VIDEO_PATH = "/Users/gohyixian/Downloads/test_cases/test.mp4"
4
-
5
- INTERVIEW_QUESTION = """
6
- Can you describe a project where you fine-tuned a transformer-based model (e.g., BERT, GPT, or T5) for a specific application?
7
- Walk us through your approach to dataset preparation, model optimization, and deployment.
8
- How did you handle challenges like ensuring the model's performance, scalability, and fairness?
9
- """
10
-
11
- JOB_REQUIREMENTS = """
12
- Job Title: LLM Engineer
13
-
14
- Job Description:
15
- ################
16
- - We are seeking a skilled and innovative LLM Engineer to join our AI team. The ideal candidate will
17
- have hands-on experience in developing, fine-tuning, and deploying large language models (LLMs) for
18
- various applications. You will collaborate with cross-functional teams to deliver cutting-edge AI
19
- solutions, leveraging your expertise in natural language processing (NLP), deep learning, and
20
- large-scale systems.
21
-
22
-
23
- Key Responsibilities
24
- ####################
25
- 1. Model Development:
26
- - Design and fine-tune large language models (e.g., GPT, LLaMA, or similar) for tasks like text generation,
27
- summarization, question answering, and classification.
28
- - Implement advanced techniques for model optimization, including pruning, quantization, and distillation.
29
-
30
- 2. Data Management:
31
- - Curate, preprocess, and manage large datasets for training and evaluation.
32
- - Ensure data quality by cleaning, augmenting, and annotating datasets.
33
-
34
- 3. Infrastructure & Deployment:
35
- - Build scalable pipelines for training and deploying LLMs using frameworks like PyTorch, TensorFlow, or JAX.
36
- - Optimize inference speed and memory usage for production-grade applications.
37
-
38
- 4. Model Evaluation:
39
- - Develop benchmarks to evaluate model performance, fairness, and safety.
40
- - Implement guardrails to mitigate bias and ensure ethical use of AI systems.
41
-
42
- 5. Collaboration:
43
- - Work closely with product managers, data scientists, and software engineers to align model capabilities with business requirements.
44
- - Provide mentorship to junior team members and contribute to knowledge sharing within the team.
45
-
46
- 6. Research & Innovation:
47
- - Stay updated on the latest research in NLP and deep learning.
48
- - Contribute to academic papers, patents, or open-source projects where appropriate.
49
-
50
-
51
- Requirements
52
- ############
53
- 1. Technical Skills:
54
- - Strong programming skills in Python.
55
- - Proficiency with deep learning frameworks (e.g., PyTorch, TensorFlow, JAX).
56
- - Experience in training and fine-tuning transformer-based models (e.g., BERT, GPT, T5).
57
- - Familiarity with distributed training techniques and tools like Horovod or DeepSpeed.
58
- - Knowledge of vector databases and retrieval-augmented generation (RAG) techniques.
59
- - Hands-on experience with MLOps tools (e.g., MLflow, Docker, Kubernetes) for deployment.
60
- - Expertise in working with APIs for integrating LLMs into production systems.
61
-
62
- 2. Educational Background:
63
- - Bachelor’s or Master’s degree in Computer Science, Artificial Intelligence, Data Science, or a related field. Ph.D. preferred but not required.
64
-
65
- 3. Experience:
66
- - 3+ years of experience in NLP, machine learning, or a related field.
67
- - Demonstrated success in building and deploying LLM-powered applications.
68
- - Contributions to open-source projects or research publications in NLP are a plus.
69
-
70
- 4. Soft Skills:
71
- - Strong problem-solving abilities and attention to detail.
72
- - Excellent communication and collaboration skills to work with cross-functional teams.
73
- - Adaptable, with a passion for continuous learning and innovation.
74
- - A proactive and goal-oriented mindset.
75
-
76
- 5. Target Personalities:
77
- - Innovative Thinker: Always exploring new ways to improve model performance and usability.
78
- - Team Player: Collaborates effectively across diverse teams to deliver AI solutions.
79
- - Ethically Minded: Committed to ensuring the ethical and fair use of AI technologies.
80
- - Detail-Oriented: Meticulous in coding, data handling, and model evaluation.
81
- - Resilient Learner: Thrives in a fast-paced environment, keeping up with advancements in AI research.
82
-
83
-
84
- Preferred Qualifications:
85
- #########################
86
- - Experience with foundation model APIs (e.g., OpenAI, Hugging Face).
87
- - Knowledge of reinforcement learning techniques, particularly RLHF (Reinforcement Learning with Human Feedback).
88
- - Familiarity with multi-modal LLMs and their integration.
89
- - Experience working in cloud environments like AWS, Azure, or GCP.
90
- - Contributions to community forums, blogs, or conferences related to LLMs or NLP.
91
-
92
- What We Offer
93
- #############
94
- - Competitive salary and benefits package.
95
- - Opportunities to work on groundbreaking AI projects.
96
- - Flexible work environment, including remote options.
97
- - Access to cutting-edge resources and infrastructure for AI development.
98
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/service/emotion_recognition.py DELETED
@@ -1,136 +0,0 @@
1
- import numpy as np
2
- from deepface import DeepFace
3
-
4
- from src.domain.enums.emotion_types import EmotionType
5
-
6
-
7
- class EmotionRecognition:
8
- def __init__(self):
9
- pass
10
-
11
- @classmethod
12
- def detect_face_emotions(cls, frames: list[np.ndarray] = None) -> list:
13
- """
14
- Performs facial emotion detection using the DeepFace model
15
- """
16
- emotions = []
17
- for frame in frames:
18
- frame_result = DeepFace.analyze(
19
- frame, actions=["emotion"], enforce_detection=False
20
- )
21
- emotions.append(frame_result)
22
-
23
- return emotions
24
-
25
- @classmethod
26
- def process_emotions(cls, emotions: list) -> dict:
27
- """
28
- Processes the emotions by calculating the overall confidence score using a
29
- custom weighted emotion balancing algorithm.
30
-
31
- Returns:
32
- - weighted normalized score
33
- - signed, weighted normalized score
34
- - confidence score
35
- """
36
-
37
- count = 0
38
- emots = {
39
- str(EmotionType.SAD.value): 0,
40
- str(EmotionType.FEAR.value): 0,
41
- str(EmotionType.ANGRY.value): 0,
42
- str(EmotionType.DISGUST.value): 0,
43
- str(EmotionType.HAPPY.value): 0,
44
- str(EmotionType.NEUTRAL.value): 0,
45
- str(EmotionType.SURPRISE.value): 0,
46
- }
47
-
48
- for frame_result in emotions:
49
- if len(frame_result) > 0:
50
- emot = frame_result[0]["emotion"]
51
- emots[str(EmotionType.SAD.value)] = (
52
- emots.get(str(EmotionType.SAD.value), 0)
53
- + emot[str(EmotionType.SAD.value)]
54
- )
55
- emots[str(EmotionType.FEAR.value)] = (
56
- emots.get(str(EmotionType.FEAR.value), 0)
57
- + emot[str(EmotionType.FEAR.value)]
58
- )
59
- emots[str(EmotionType.ANGRY.value)] = (
60
- emots.get(str(EmotionType.ANGRY.value), 0)
61
- + emot[str(EmotionType.ANGRY.value)]
62
- )
63
- emots[str(EmotionType.DISGUST.value)] = (
64
- emots.get(str(EmotionType.DISGUST.value), 0)
65
- + emot[str(EmotionType.DISGUST.value)]
66
- )
67
- emots[str(EmotionType.HAPPY.value)] = (
68
- emots.get(str(EmotionType.HAPPY.value), 0)
69
- + emot[str(EmotionType.HAPPY.value)]
70
- )
71
- emots[str(EmotionType.NEUTRAL.value)] = (
72
- emots.get(str(EmotionType.NEUTRAL.value), 0)
73
- + emot[str(EmotionType.NEUTRAL.value)]
74
- )
75
- emots[str(EmotionType.SURPRISE.value)] = (
76
- emots.get(str(EmotionType.SURPRISE.value), 0)
77
- + emot[str(EmotionType.SURPRISE.value)]
78
- )
79
- count += 1
80
-
81
- # prevent zero division
82
- if count == 0:
83
- count = 1
84
-
85
- for i in list(emots.keys()):
86
- emots[i] /= count * 100
87
-
88
- # refactor according to custom weightage
89
- sad_score = emots[str(EmotionType.SAD.value)] * 1.3
90
- fear_score = emots[str(EmotionType.FEAR.value)] * 1.3
91
- angry_score = emots[str(EmotionType.ANGRY.value)] * 1.3
92
- disgust_score = emots[str(EmotionType.DISGUST.value)] * 10
93
- happy_score = emots[str(EmotionType.HAPPY.value)] * 1.7
94
- neutral_score = emots[str(EmotionType.NEUTRAL.value)] / 1.2
95
- surprise_score = emots[str(EmotionType.SURPRISE.value)] * 1.4
96
-
97
- score_list = [
98
- sad_score,
99
- angry_score,
100
- surprise_score,
101
- fear_score,
102
- happy_score,
103
- disgust_score,
104
- neutral_score,
105
- ]
106
- normalized_scores = cls.__normalize_scores(score_list)
107
- mean = np.mean(normalized_scores)
108
-
109
- result_scores = [
110
- (-sad_score),
111
- (-angry_score),
112
- surprise_score,
113
- (-fear_score),
114
- happy_score,
115
- (-disgust_score),
116
- neutral_score,
117
- ]
118
- normalized_result_scores = cls.__normalize_scores(result_scores)
119
- result = np.mean(normalized_result_scores)
120
-
121
- difference = abs((mean - result) / mean) * 100
122
-
123
- # keep values in range of [0, 100]
124
- difference = min(difference, 50)
125
-
126
- if mean > result:
127
- conf = 50 - difference
128
- else:
129
- conf = 50 + difference
130
-
131
- return {"mean": mean, "result": result, "conf": conf}
132
-
133
- @classmethod
134
- def __normalize_scores(cls, scores: list) -> list:
135
- min_val, max_val = min(scores), max(scores)
136
- return [(score - min_val) / (max_val - min_val) for score in scores]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/service/resume_parser.py DELETED
@@ -1,42 +0,0 @@
1
- import yaml
2
- from llama_parse import LlamaParse
3
- from llama_index.core import SimpleDirectoryReader
4
-
5
- from src.template.parser_prompt import PARSE_RESUME_PROMPT
6
-
7
-
8
- class ResumeParser:
9
- def __init__(self, config_file_path: str = "config.yaml"):
10
- """
11
- Initiates a resume parser client
12
- """
13
-
14
- # load config
15
- with open(config_file_path, "r") as f:
16
- config = yaml.safe_load(f)
17
-
18
- # set bbox size
19
- bbox_margin = config["PAGE_ROC_BBOX"]
20
- bbox = f"{bbox_margin['TOP']},{bbox_margin['RIGHT']},{bbox_margin['BOTTOM']},{bbox_margin['LEFT']}"
21
-
22
- self._parser = LlamaParse(
23
- language=config["LANGUAGE"],
24
- disable_ocr=config["DISABLE_OCR"],
25
- bounding_box=bbox,
26
- result_type="markdown",
27
- parsing_instruction=PARSE_RESUME_PROMPT,
28
- is_formatting_instruction=False,
29
- )
30
-
31
- def parse_resume_to_markdown(self, resume_path: str = "") -> str:
32
- """
33
- Parses the resume into markdown text.
34
-
35
- Supported filetypes:
36
- - .pdf
37
- """
38
- document = SimpleDirectoryReader(
39
- input_files=[resume_path], file_extractor={".pdf": self._parser}
40
- ).load_data()
41
-
42
- return "\n".join([str(d.text) for d in document])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/service/utils.py DELETED
@@ -1,103 +0,0 @@
1
- import cv2
2
- import yaml
3
- import numpy as np
4
- from pathlib import Path
5
- import speech_recognition as sr
6
- from moviepy import VideoFileClip
7
-
8
-
9
- def extract_audio(
10
- input_video_file: str = "",
11
- output_audio_file: str = "",
12
- ) -> str:
13
- """
14
- Extracts audio from input video file, and save it to the respective path.
15
- Returns the path to the saved audio file if extraction is successful.
16
- Supported input video file formats are:
17
- - .mp4
18
- - .mov
19
-
20
- Supported output audio file formats are:
21
- - .wav
22
- """
23
- try:
24
- input_video_file = str(Path(input_video_file))
25
- output_audio_file = str(Path(output_audio_file))
26
-
27
- # Load the video file
28
- video = VideoFileClip(input_video_file)
29
-
30
- # Extract audio and write to output file
31
- video.audio.write_audiofile(output_audio_file)
32
-
33
- print(f"[extract_audio()] : Audio extracted and saved to {output_audio_file}")
34
-
35
- return output_audio_file
36
- except Exception as e:
37
- print(e)
38
- return None
39
-
40
-
41
- def audio2text(audio_file: str = "") -> str:
42
- """
43
- Converts audio to text using Google's text-to-audio engine (Local),
44
- and returns the text.
45
- """
46
- r = sr.Recognizer()
47
- with sr.AudioFile(audio_file) as source:
48
- audio = r.record(source)
49
- text = r.recognize_google(audio)
50
- return text
51
-
52
-
53
- def sample_frames(input_video_file: str = "", sample_rate: int = 2) -> list[np.ndarray]:
54
- """
55
- Samples one frame every 'sample_rate' frames from the video file and returns
56
- them in the form of a list of Numpy ndarray objects.
57
- """
58
- cap = cv2.VideoCapture(input_video_file)
59
- frames = []
60
- count = 0
61
-
62
- while cap.isOpened():
63
- ret, frame = cap.read()
64
- if not ret:
65
- break
66
- if count % sample_rate == 0:
67
- frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
68
- count += 1
69
- cap.release()
70
-
71
- return frames
72
-
73
-
74
- def parse_yaml_string(
75
- yaml_string: str = "", expected_keys: list[str] = None, cleanup: bool = True
76
- ) -> dict:
77
- """
78
- Parses a YAML string into a Python dictionary based on a list of
79
- expected keys.
80
- """
81
-
82
- # removes ```YAML ``` heading and footers if present
83
- if cleanup:
84
- yaml_string = yaml_string.replace("YAML", "")
85
- yaml_string = yaml_string.replace("yaml", "")
86
- yaml_string = yaml_string.replace("`", "")
87
-
88
- try:
89
- parsed_data = yaml.safe_load(yaml_string)
90
-
91
- # Handle missing keys with error handling
92
- result = {}
93
- for key in expected_keys:
94
- if key in parsed_data:
95
- result[key] = parsed_data[key]
96
- else:
97
- print(f"[parse_yaml_string()] : Missing key {key}")
98
-
99
- return result
100
-
101
- except KeyError as e:
102
- print(e)
103
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/template/grading_prompt.py DELETED
@@ -1,111 +0,0 @@
1
- from llama_index.core.prompts import PromptTemplate
2
-
3
- GRADE_RESPONSE_PROMPT = PromptTemplate(
4
- """
5
- You are a Human Resource Manager and an interviewer.
6
- Your task is to review an interviewee's overall performance based on multiple factors.
7
- You will be provided with the interview question, the interviewee's facial confidence score, their response to the question in text form, and additional context on the interview.
8
-
9
- The confidence score will range from 0 to 100, and you will also receive the text of their answers to the interview question.
10
- Based on this information, evaluate the interviewee’s performance in the following areas:
11
-
12
- 1. **Answer Quality**:
13
- Assess the clarity, relevance, and accuracy of their response to the interview question.
14
- Did the interviewee address the key points effectively?
15
-
16
- 2. **Problem-Solving Skills**:
17
- Evaluate how well the interviewee tackled any problem presented in the interview question.
18
- Were they able to think critically, analyze the situation, and propose solutions?
19
-
20
- 3. **Confidence**:
21
- Based on their facial confidence score (0 to 100) and their overall demeanor in the response, rate their confidence level and how it impacts their presentation and communication.
22
-
23
- 4. **Personality**:
24
- Consider the tone, communication style, and interpersonal skills of the interviewee.
25
- How well did they engage with the question and the interview process?
26
- Do they demonstrate qualities like openness, empathy, or assertiveness?
27
-
28
- 5. **Overall Performance**:
29
- Based on the combination of the above factors, provide a holistic evaluation of their performance in the interview.
30
- Offer feedback on strengths and areas for improvement.
31
-
32
- Ensure that your feedback is clear and actionable, so other HR professionals reviewing the interview can easily assess the interviewee's suitability for the position.
33
-
34
-
35
- ########################################
36
- Interview Question:
37
- {interview_question}
38
-
39
- ########################################
40
- Interviewee's Facial Confidence Score:
41
- {conf_score}
42
-
43
- ########################################
44
- Interviewee's response in text:
45
- {response_text}
46
-
47
- ########################################
48
- output:
49
- """
50
- )
51
-
52
-
53
- RANKING_AND_FEEDBACK_PROMPT = PromptTemplate(
54
- """
55
- You are an HR specialist evaluating an interviewee for a specific role.
56
- Your task is to assess the suitability of the interviewee based on the following information:
57
-
58
- 1. **Job Requirements**:
59
- A list of skills, experiences, and qualifications required for the role.
60
-
61
- 2. **Interview Feedback**:
62
- The feedback and review of the interviewee’s performance in the interview, which includes assessments on their answer quality, problem-solving skills, confidence, personality, and overall performance.
63
-
64
- 3. **Resume Text**:
65
- A parsed version of the interviewee's resume, which includes their work experience, skills, education, and other relevant information.
66
-
67
- Using these inputs, generate an output strictly in the following YAML format:
68
-
69
- ###########################
70
- name: <name>
71
- score: <score>
72
- feedback: <feedback text>
73
- ###########################
74
-
75
-
76
- Details for the output:
77
- 1. **name**:
78
- Name of the interviewee.
79
-
80
- 2. **score**:
81
- A score ranging from 0 to 100, where 0 means the interviewee is not recommended for the position, and 100 means they are a perfect match for the job.
82
-
83
- 3. **feedback**:
84
- - A detailed breakdown explaining how the interviewee’s experience, skills, and performance align or do not align with the job requirements.
85
- - Discuss whether the interviewee’s skills, experiences, and overall traits match or fail to meet the required qualifications.
86
- - Provide a short, concise sentence summarizing the interviewee’s suitability for the role.
87
-
88
- Ensure that the feedback is comprehensive yet concise, offering actionable insights for HR professionals to make a decision about the interviewee’s fit for the role.
89
-
90
-
91
- ########################################
92
- Job Requirements:
93
- {job_requirements}
94
-
95
- ########################################
96
- Interview Feedback:
97
- {interview_feedback}
98
-
99
- ########################################
100
- Resume Text:
101
- {resume_text}
102
-
103
- ########################################
104
-
105
- Output strictly following the below YAML format:
106
-
107
- name: <name>
108
- score: <score>
109
- feedback: <feedback text>
110
- """
111
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/template/parser_prompt.py DELETED
@@ -1,21 +0,0 @@
1
- from llama_index.core.prompts import PromptTemplate
2
-
3
- PARSE_RESUME_PROMPT = """
4
- You are tasked with parsing a resume.
5
-
6
- **Your Focus**:
7
- - Reproduce only the main body text, including section headers and bullet points, exactly as received.
8
- - Do not skip section numbers in the format DIGIT.DIGIT (e.g., 10.1, 3.1), you must apply a markdown header level based on the depth (e.g., # for main sections, ## for subsections) to reflect the appropriate hierarchy, and output them.
9
- - Do make sure that section numbers are always followed by the corresponding section title without a '\n' character in between or separating them into different headers. Valid examples are as below:
10
- - '# 14 Experience'
11
- - '# 2 Education'
12
- Invalid examples are as below:
13
- - '# 14\n # Experience'
14
- - '# 2\n # Education'
15
- - You may only add markdown header symbols (#, ##, ###, etc.) to denote the hierarchical levels of section headers.
16
- - Do not make up any text and headers that are not present in the original text.
17
-
18
- **Expected Output**:
19
- - Text, section headers, and bullet points must be reproduced without any text edits, additions, or deletions, other than adding markdown header symbols (#, ##, ###, etc.).
20
- - Use markdown headers to denote additional hierarchy (e.g., # for main sections, ## for subsections) based on the best interpretation of the document’s structure.
21
- """