Spaces:

RainPoo
/

Automated-Interview-Filtering

Sleeping

App Files Files Community

RainPoo commited on Dec 22, 2024

Commit

50406c0

verified ·

1 Parent(s): d1a02df

Delete src

Browse files

Files changed (21) hide show

src/app.py +0 -210
src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml +0 -4
src/configs/llm/openai-gpt-3.5-turbo.yaml +0 -4
src/configs/llm/openai-gpt-4o-mini.yaml +0 -4
src/configs/parser/llamaparse_en.yaml +0 -7
src/domain/candidate.py +0 -13
src/domain/enums/emotion_types.py +0 -21
src/domain/enums/interview_status.py +0 -11
src/domain/interview.py +0 -28
src/llm/base_llm_provider.py +0 -16
src/llm/enums.py +0 -3
src/llm/llm.py +0 -32
src/llm/nvidia_llm.py +0 -29
src/llm/openai_llm.py +0 -29
src/main_test.py +0 -97
src/sample_inputs.py +0 -98
src/service/emotion_recognition.py +0 -136
src/service/resume_parser.py +0 -42
src/service/utils.py +0 -103
src/template/grading_prompt.py +0 -111
src/template/parser_prompt.py +0 -21

src/app.py DELETED Viewed

@@ -1,210 +0,0 @@
-import gradio as gr
-from typing import Dict
-import pandas as pd
-# from src.application.services import InterviewAnalyzer
-# from src.infrastructure.llm import LangchainService
-# from src.infrastructure.emotion import DeepFaceService
-# from src.infrastructure.speech import GoogleSpeechService
-# class GradioInterface:
-#     def __init__(self):
-#         # Initialize services
-#         self.emotion_service = DeepFaceService()
-#         self.speech_service = GoogleSpeechService()
-#         self.llm_service = LangchainService()
-#
-#         # Initialize analyzer
-#         self.analyzer = InterviewAnalyzer(
-#             emotion_service=self.emotion_service,
-#             speech_service=self.speech_service,
-#             llm_service=self.llm_service,
-#         )
-#
-#     def create_interface(self) -> gr.Interface:
-#         def process_submission(
-#             video_file: str, resume_file: str, job_requirements: str
-#         ) -> Dict:
-#             # Implementation for processing submission
-#             pass
-#
-#         # Create Gradio interface
-#         interface = gr.Interface(
-#             fn=process_submission,
-#             inputs=[
-#                 gr.Video(label="Interview Recording"),
-#                 gr.File(label="Resume"),
-#                 gr.Textbox(label="Job Requirements", lines=5),
-#             ],
-#             outputs=gr.JSON(label="Analysis Results"),
-#             title="HR Interview Analysis System",
-#             description="Upload interview recording and resume to analyze candidate performance",
-#         )
-#
-#         return interface
-# Testing to setup the simple interface
-class GradioInterface:
-    def __init__(self):
-        # DataFrame to List All Users' Feedbacks
-        self.candidate_feedback = pd.DataFrame(columns=["Name", "Score", "Feedback"])
-    def validate_file_format(self, file_path: str, valid_extensions: list) -> bool:
-        return isinstance(file_path, str) and any(
-            file_path.endswith(ext) for ext in valid_extensions
-        )
-    def process_video(self, video_path: str) -> str:
-        # Process transcript from the video
-        return "### Transcript\nExample of transcript of the interview video."
-    def process_resume(self, resume_path: str) -> str:
-        # Resume Parsing
-        return "### Resume Analysis\n- **Skills**: NLP, Machine Learning, Computer Vision\n- **Experience**: 5 years."
-    def analyze_emotions(self, video_path: str) -> str:
-        # Emotion Analysis
-        return "### Emotion Analysis\n- **Overall Emotion**: Positive\n- **Details**: Candidate displayed confidence and engagement."
-    def get_feedback(self, name: str, score: int, feedback: str) -> pd.DataFrame:
-        return pd.DataFrame({"Name": [name], "Score": [score], "Feedback": [feedback]})
-    def save_report(self):
-        # Save report
-        report_path = "report_path.docx"
-        with open(report_path, "w") as f:
-            # Pass fields to include in report here
-            f.write("Example report")
-        return report_path
-    def create_interface(self) -> gr.Blocks:
-        def process_submission(
-            video_path, resume_path, interview_questions, job_requirements
-        ):
-            # Validate inputs and formats
-            if not video_path:
-                return (
-                    "Please upload an interview video.",
-                    None,
-                    None,
-                    self.candidate_feedback,
-                )
-            if not resume_path:
-                return (
-                    "Please upload a resume (PDF).",
-                    None,
-                    None,
-                    self.candidate_feedback,
-                )
-            if not interview_questions:
-                return (
-                    "Please provide interview questions.",
-                    None,
-                    None,
-                    self.candidate_feedback,
-                )
-            if not job_requirements:
-                return (
-                    "Please provide job requirements.",
-                    None,
-                    None,
-                    self.candidate_feedback,
-                )
-            if not self.validate_file_format(video_path, [".mp4", ".avi", ".mkv"]):
-                return "Invalid video format.", None, None, self.candidate_feedback
-            if not self.validate_file_format(resume_path, [".pdf"]):
-                return (
-                    "Please submit resume in PDF format.",
-                    None,
-                    None,
-                    self.candidate_feedback,
-                )
-            # Mock outputs for this submission
-            video_transcript = self.process_video(video_path)
-            emotion_analysis = self.analyze_emotions(video_path)
-            resume_analysis = self.process_resume(resume_path)
-            # Example of Feedback
-            feedback_list = self.get_feedback(
-                name="Johnson",
-                score=88,
-                feedback="Outstanding technical and soft skills.",
-            )
-            # Append the new candidate feedback to the DataFrame
-            self.candidate_feedback = pd.concat(
-                [self.candidate_feedback, feedback_list], ignore_index=True
-            )
-            # Return both the individual result and the list result
-            return (
-                video_transcript,
-                emotion_analysis,
-                resume_analysis,
-                self.candidate_feedback,
-            )
-        # Build the interface using Blocks
-        with gr.Blocks() as demo:
-            gr.Markdown("## HR Interview Analysis System")
-            # Inputs section
-            with gr.Row():
-                video_input = gr.Video(label="Upload Interview Video")
-                resume_input = gr.File(label="Upload Resume (PDF)")
-            with gr.Row():
-                question_input = gr.Textbox(
-                    label="Interview Questions",
-                    lines=5,
-                    placeholder="Enter the interview question here",
-                )
-                requirements_input = gr.Textbox(
-                    label="Job Requirements",
-                    lines=5,
-                    placeholder="Enter the job requirements here",
-                )
-            submit_button = gr.Button("Submit")
-            with gr.Tabs():
-                with gr.Tab("Result"):
-                    transcript_output = gr.Markdown(label="Video Transcript")
-                    emotion_output = gr.Markdown(label="Emotion Analysis")
-                    resume_output = gr.Markdown(label="Resume Analysis")
-                with gr.Tab("List of Candidates"):
-                    feedback_output = gr.Dataframe(
-                        label="Candidate Feedback Lists", interactive=False
-                    )
-            save_button = gr.Button("Save Report")
-            save_button.click(
-                fn=self.save_report,
-                inputs=[],
-                outputs=gr.File(label="Download Report"),
-            )
-            # Connect the button to the function
-            submit_button.click(
-                fn=process_submission,
-                inputs=[video_input, resume_input, question_input, requirements_input],
-                outputs=[
-                    transcript_output,
-                    emotion_output,
-                    resume_output,
-                    feedback_output,
-                ],
-            )
-        return demo
-def launch_app():
-    print(gr.__version__)
-    app = GradioInterface()
-    interface = app.create_interface()
-    interface.launch()
-if __name__ == "__main__":
-    launch_app()

src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml DELETED Viewed

@@ -1,4 +0,0 @@
-PROVIDER: nvidia
-BASE_URL: https://integrate.api.nvidia.com/v1
-MODEL: nvidia/llama-3.1-nemotron-70b-instruct
-TEMPERATURE: 0

src/configs/llm/openai-gpt-3.5-turbo.yaml DELETED Viewed

@@ -1,4 +0,0 @@
-PROVIDER: openai
-BASE_URL: default
-MODEL: gpt-3.5-turbo
-TEMPERATURE: 0

src/configs/llm/openai-gpt-4o-mini.yaml DELETED Viewed

@@ -1,4 +0,0 @@
-PROVIDER: openai
-BASE_URL: default
-MODEL: gpt-4o-mini
-TEMPERATURE: 0

src/configs/parser/llamaparse_en.yaml DELETED Viewed

@@ -1,7 +0,0 @@
-LANGUAGE: en
-DISABLE_OCR: false
-PAGE_ROC_BBOX:
-  TOP: 0
-  RIGHT: 0
-  BOTTOM: 0
-  LEFT: 0

src/domain/candidate.py DELETED Viewed

@@ -1,13 +0,0 @@
-from dataclasses import dataclass
-from typing import Dict, List
-@dataclass
-class Candidate:
-    id: str
-    name: str
-    email: str
-    resume_data: Dict
-    interview_responses: List[str]
-    emotional_metrics: Dict
-    feedback: Dict

src/domain/enums/emotion_types.py DELETED Viewed

@@ -1,21 +0,0 @@
-from enum import Enum
-class EmotionType(Enum):
-    SAD = "sad"
-    FEAR = "fear"
-    ANGRY = "angry"
-    DISGUST = "disgust"
-    HAPPY = "happy"
-    NEUTRAL = "neutral"
-    SURPRISE = "surprise"
-    @classmethod
-    def get_positive_emotions(cls):
-        return [cls.HAPPY, cls.NEUTRAL, cls.SURPRISE]
-    @classmethod
-    def get_negative_emotions(cls):
-        return [cls.SAD, cls.FEAR, cls.ANGRY, cls.DISGUST]

src/domain/enums/interview_status.py DELETED Viewed

@@ -1,11 +0,0 @@
-from enum import Enum, auto
-class InterviewStatus(Enum):
-    SCHEDULED = auto()
-    IN_PROGRESS = auto()
-    COMPLETED = auto()
-    CANCELLED = auto()
-    PENDING_REVIEW = auto()
-    REVIEWED = auto()
-    FAILED = auto()

src/domain/interview.py DELETED Viewed

@@ -1,28 +0,0 @@
-from dataclasses import dataclass
-from datetime import datetime
-from typing import List, Dict
-from src.domain.enums.interview_status import InterviewStatus
-from src.domain.enums.emotion_types import EmotionType
-@dataclass
-class Interview:
-    id: str
-    candidate_id: str
-    job_id: str
-    video_path: str
-    status: InterviewStatus
-    questions: List[str]
-    responses_transcription: List[str]
-    timestamp: datetime
-    duration: int
-    emotional_analysis: Dict[EmotionType, float]
-    def is_completed(self) -> bool:
-        return self.status == InterviewStatus.COMPLETED
-    def is_reviewable(self) -> bool:
-        return self.status in [
-            InterviewStatus.COMPLETED,
-            InterviewStatus.PENDING_REVIEW,
-        ]

src/llm/base_llm_provider.py DELETED Viewed

@@ -1,16 +0,0 @@
-"""Base class for LLM providers"""
-from abc import abstractmethod
-from typing import Dict, Optional
-class BaseLLMProvider:
-    @abstractmethod
-    def __init__(self):
-        """LLM provider initialization"""
-        raise NotImplementedError
-    @abstractmethod
-    def complete(self, prompt: str = "") -> str:
-        """LLM chat completion implementation by each provider"""
-        raise NotImplementedError

src/llm/enums.py DELETED Viewed

@@ -1,3 +0,0 @@
-OPENAI_LLM = "openai"
-NVIDIA_LLM = "nvidia"
-DEFAULT_LLM_API_BASE = "default"

src/llm/llm.py DELETED Viewed

@@ -1,32 +0,0 @@
-import yaml
-from src.llm.enums import OPENAI_LLM, NVIDIA_LLM
-from src.llm.base_llm_provider import BaseLLMProvider
-from src.llm.openai_llm import OpenAILLM
-from src.llm.nvidia_llm import NvidiaLLM
-def get_llm(config_file_path: str = "config.yaml") -> BaseLLMProvider:
-    """
-    Initiates LLM client from config file
-    """
-    # load config
-    with open(config_file_path, "r") as f:
-        config = yaml.safe_load(f)
-    # init & return llm
-    if config["PROVIDER"] == OPENAI_LLM:
-        return OpenAILLM(
-            model=config["MODEL"],
-            temperature=config["TEMPERATURE"],
-            base_url=config["BASE_URL"],
-        )
-    elif config["PROVIDER"] == NVIDIA_LLM:
-        return NvidiaLLM(
-            model=config["MODEL"],
-            temperature=config["TEMPERATURE"],
-            base_url=config["BASE_URL"],
-        )
-    else:
-        raise ValueError(config["MODEL"])

src/llm/nvidia_llm.py DELETED Viewed

@@ -1,29 +0,0 @@
-"""NVIDIA LLM Implementation"""
-from llama_index.llms.nvidia import NVIDIA
-from src.llm.base_llm_provider import BaseLLMProvider
-from src.llm.enums import DEFAULT_LLM_API_BASE
-class NvidiaLLM(BaseLLMProvider):
-    def __init__(
-        self,
-        model: str = "nvidia/llama-3.1-nemotron-70b-instruct",
-        temperature: float = 0.0,
-        base_url: str = "https://integrate.api.nvidia.com/v1",
-    ):
-        """Initiate NVIDIA client"""
-        if base_url == DEFAULT_LLM_API_BASE:
-            self._client = NVIDIA(
-                model=model,
-                temperature=temperature,
-            )
-        else:
-            self._client = NVIDIA(
-                model=model, temperature=temperature, base_url=base_url
-            )
-    def complete(self, prompt: str = "") -> str:
-        return str(self._client.complete(prompt))

src/llm/openai_llm.py DELETED Viewed

@@ -1,29 +0,0 @@
-"""OpenAI LLM Implementation"""
-from llama_index.llms.openai import OpenAI
-from src.llm.base_llm_provider import BaseLLMProvider
-from src.llm.enums import DEFAULT_LLM_API_BASE
-class OpenAILLM(BaseLLMProvider):
-    def __init__(
-        self,
-        model: str = "gpt-4o-mini",
-        temperature: float = 0.0,
-        base_url: str = DEFAULT_LLM_API_BASE,
-    ):
-        """Initiate OpenAI client"""
-        if base_url == DEFAULT_LLM_API_BASE:
-            self._client = OpenAI(
-                model=model,
-                temperature=temperature,
-            )
-        else:
-            self._client = OpenAI(
-                model=model, temperature=temperature, base_url=base_url
-            )
-    def complete(self, prompt: str = "") -> str:
-        return str(self._client.complete(prompt))

src/main_test.py DELETED Viewed

@@ -1,97 +0,0 @@
-from dotenv import load_dotenv
-from docx import Document
-from src.llm.llm import get_llm
-from src.service.resume_parser import ResumeParser
-from src.service.emotion_recognition import EmotionRecognition
-from src.service.utils import (
-    extract_audio,
-    audio2text,
-    sample_frames,
-    parse_yaml_string,
-)
-from src.template.grading_prompt import (
-    GRADE_RESPONSE_PROMPT,
-    RANKING_AND_FEEDBACK_PROMPT,
-)
-# sample input values
-from src.sample_inputs import (
-    VIDEO_PATH,
-    RESUME_PATH,
-    INTERVIEW_QUESTION,
-    JOB_REQUIREMENTS,
-)
-# customise this part
-LLM_CONFIG_FILE = "./src/configs/llm/openai-gpt-3.5-turbo.yaml"
-# LLM_CONFIG_FILE = "./src/configs/llm/openai-gpt-4o-mini.yaml"
-# LLM_CONFIG_FILE = "./src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml"
-RESUME_PARSER_CONFIG_FILE = "./src/configs/parser/llamaparse_en.yaml"
-OUTPUT_AUDIO_FILE = "/Users/gohyixian/Downloads/test_cases/outputs/audio_output.wav"  # only supports .wav
-OUTPUT_REPORT_FILE = "/Users/gohyixian/Downloads/test_cases/outputs/report.docx"
-# init API keys as env variables
-load_dotenv()
-# init LLM & resume parser
-llm = get_llm(LLM_CONFIG_FILE)
-parser = ResumeParser(RESUME_PARSER_CONFIG_FILE)
-# 1. extract audio from video
-OUTPUT_AUDIO_FILE = extract_audio(VIDEO_PATH, OUTPUT_AUDIO_FILE)
-assert OUTPUT_AUDIO_FILE is not None, f"Audio extraction failed."
-# 2. audio to text
-audio_text = audio2text(OUTPUT_AUDIO_FILE)
-print(audio_text)
-# 3. extract frames form video
-frames = sample_frames(VIDEO_PATH, sample_rate=8)
-print(frames)
-# 4. deepface extract emotions & compite confidence scores
-emotions = EmotionRecognition.detect_face_emotions(frames)
-emotions_dict = EmotionRecognition.process_emotions(emotions)
-conf_score = emotions_dict["conf"]
-print(emotions_dict)
-# 5. llamaparse parse resume into MD
-resume_md = parser.parse_resume_to_markdown(RESUME_PATH)
-print(resume_md)
-# 6. llm grade question response
-formatted_grading_prompt = GRADE_RESPONSE_PROMPT.format(
-    interview_question=INTERVIEW_QUESTION,
-    conf_score=conf_score,
-    response_text=audio_text,
-)
-grade = llm.complete(formatted_grading_prompt)
-print(grade)
-# 7. llm rank and output final feedback
-formatted_ranking_prompt = RANKING_AND_FEEDBACK_PROMPT.format(
-    job_requirements=JOB_REQUIREMENTS, interview_feedback=grade, resume_text=resume_md
-)
-rank_and_feedback = llm.complete(formatted_ranking_prompt)
-print(rank_and_feedback)
-# 8. save to .docx report
-expected_keys = ["name", "score", "feedback"]
-rank_and_feedback_dict = parse_yaml_string(
-    yaml_string=rank_and_feedback, expected_keys=expected_keys, cleanup=True
-)
-print(rank_and_feedback_dict)
-doc = Document()
-doc.add_heading(f"{rank_and_feedback_dict['name']}", 0)
-doc.add_heading(f"Overall Score: {rank_and_feedback_dict['score']}", 1)
-doc.add_heading(f"Brief Overview", 1)
-doc.add_paragraph(f"{rank_and_feedback_dict['feedback']}")
-# Save the document
-doc.save(OUTPUT_REPORT_FILE)

src/sample_inputs.py DELETED Viewed

@@ -1,98 +0,0 @@
-RESUME_PATH = "/Users/gohyixian/Downloads/test_cases/CV_2024_24_JUN.pdf"
-VIDEO_PATH = "/Users/gohyixian/Downloads/test_cases/test.mp4"
-INTERVIEW_QUESTION = """
-Can you describe a project where you fine-tuned a transformer-based model (e.g., BERT, GPT, or T5) for a specific application?
-Walk us through your approach to dataset preparation, model optimization, and deployment.
-How did you handle challenges like ensuring the model's performance, scalability, and fairness?
-"""
-JOB_REQUIREMENTS = """
-Job Title: LLM Engineer
-Job Description:
-################
- - We are seeking a skilled and innovative LLM Engineer to join our AI team. The ideal candidate will
-   have hands-on experience in developing, fine-tuning, and deploying large language models (LLMs) for
-   various applications. You will collaborate with cross-functional teams to deliver cutting-edge AI
-   solutions, leveraging your expertise in natural language processing (NLP), deep learning, and
-   large-scale systems.
-Key Responsibilities
-####################
-1.  Model Development:
-    - Design and fine-tune large language models (e.g., GPT, LLaMA, or similar) for tasks like text generation,
-      summarization, question answering, and classification.
-    - Implement advanced techniques for model optimization, including pruning, quantization, and distillation.
-2.  Data Management:
-    - Curate, preprocess, and manage large datasets for training and evaluation.
-    - Ensure data quality by cleaning, augmenting, and annotating datasets.
-3.  Infrastructure & Deployment:
-    - Build scalable pipelines for training and deploying LLMs using frameworks like PyTorch, TensorFlow, or JAX.
-    - Optimize inference speed and memory usage for production-grade applications.
-4.  Model Evaluation:
-    - Develop benchmarks to evaluate model performance, fairness, and safety.
-    - Implement guardrails to mitigate bias and ensure ethical use of AI systems.
-5.  Collaboration:
-    - Work closely with product managers, data scientists, and software engineers to align model capabilities with business requirements.
-    - Provide mentorship to junior team members and contribute to knowledge sharing within the team.
-6.  Research & Innovation:
-    - Stay updated on the latest research in NLP and deep learning.
-    - Contribute to academic papers, patents, or open-source projects where appropriate.
-Requirements
-############
-1.  Technical Skills:
-    - Strong programming skills in Python.
-    - Proficiency with deep learning frameworks (e.g., PyTorch, TensorFlow, JAX).
-    - Experience in training and fine-tuning transformer-based models (e.g., BERT, GPT, T5).
-    - Familiarity with distributed training techniques and tools like Horovod or DeepSpeed.
-    - Knowledge of vector databases and retrieval-augmented generation (RAG) techniques.
-    - Hands-on experience with MLOps tools (e.g., MLflow, Docker, Kubernetes) for deployment.
-    - Expertise in working with APIs for integrating LLMs into production systems.
-2.  Educational Background:
-    - Bachelor’s or Master’s degree in Computer Science, Artificial Intelligence, Data Science, or a related field. Ph.D. preferred but not required.
-3.  Experience:
-    - 3+ years of experience in NLP, machine learning, or a related field.
-    - Demonstrated success in building and deploying LLM-powered applications.
-    - Contributions to open-source projects or research publications in NLP are a plus.
-4.  Soft Skills:
-    - Strong problem-solving abilities and attention to detail.
-    - Excellent communication and collaboration skills to work with cross-functional teams.
-    - Adaptable, with a passion for continuous learning and innovation.
-    - A proactive and goal-oriented mindset.
-5.  Target Personalities:
-    - Innovative Thinker: Always exploring new ways to improve model performance and usability.
-    - Team Player: Collaborates effectively across diverse teams to deliver AI solutions.
-    - Ethically Minded: Committed to ensuring the ethical and fair use of AI technologies.
-    - Detail-Oriented: Meticulous in coding, data handling, and model evaluation.
-    - Resilient Learner: Thrives in a fast-paced environment, keeping up with advancements in AI research.
-Preferred Qualifications:
-#########################
-- Experience with foundation model APIs (e.g., OpenAI, Hugging Face).
-- Knowledge of reinforcement learning techniques, particularly RLHF (Reinforcement Learning with Human Feedback).
-- Familiarity with multi-modal LLMs and their integration.
-- Experience working in cloud environments like AWS, Azure, or GCP.
-- Contributions to community forums, blogs, or conferences related to LLMs or NLP.
-What We Offer
-#############
-- Competitive salary and benefits package.
-- Opportunities to work on groundbreaking AI projects.
-- Flexible work environment, including remote options.
-- Access to cutting-edge resources and infrastructure for AI development.
-"""

src/service/emotion_recognition.py DELETED Viewed

@@ -1,136 +0,0 @@
-import numpy as np
-from deepface import DeepFace
-from src.domain.enums.emotion_types import EmotionType
-class EmotionRecognition:
-    def __init__(self):
-        pass
-    @classmethod
-    def detect_face_emotions(cls, frames: list[np.ndarray] = None) -> list:
-        """
-        Performs facial emotion detection using the DeepFace model
-        """
-        emotions = []
-        for frame in frames:
-            frame_result = DeepFace.analyze(
-                frame, actions=["emotion"], enforce_detection=False
-            )
-            emotions.append(frame_result)
-        return emotions
-    @classmethod
-    def process_emotions(cls, emotions: list) -> dict:
-        """
-        Processes the emotions by calculating the overall confidence score using a
-        custom weighted emotion balancing algorithm.
-        Returns:
-        - weighted normalized score
-        - signed, weighted normalized score
-        - confidence score
-        """
-        count = 0
-        emots = {
-            str(EmotionType.SAD.value): 0,
-            str(EmotionType.FEAR.value): 0,
-            str(EmotionType.ANGRY.value): 0,
-            str(EmotionType.DISGUST.value): 0,
-            str(EmotionType.HAPPY.value): 0,
-            str(EmotionType.NEUTRAL.value): 0,
-            str(EmotionType.SURPRISE.value): 0,
-        }
-        for frame_result in emotions:
-            if len(frame_result) > 0:
-                emot = frame_result[0]["emotion"]
-                emots[str(EmotionType.SAD.value)] = (
-                    emots.get(str(EmotionType.SAD.value), 0)
-                    + emot[str(EmotionType.SAD.value)]
-                )
-                emots[str(EmotionType.FEAR.value)] = (
-                    emots.get(str(EmotionType.FEAR.value), 0)
-                    + emot[str(EmotionType.FEAR.value)]
-                )
-                emots[str(EmotionType.ANGRY.value)] = (
-                    emots.get(str(EmotionType.ANGRY.value), 0)
-                    + emot[str(EmotionType.ANGRY.value)]
-                )
-                emots[str(EmotionType.DISGUST.value)] = (
-                    emots.get(str(EmotionType.DISGUST.value), 0)
-                    + emot[str(EmotionType.DISGUST.value)]
-                )
-                emots[str(EmotionType.HAPPY.value)] = (
-                    emots.get(str(EmotionType.HAPPY.value), 0)
-                    + emot[str(EmotionType.HAPPY.value)]
-                )
-                emots[str(EmotionType.NEUTRAL.value)] = (
-                    emots.get(str(EmotionType.NEUTRAL.value), 0)
-                    + emot[str(EmotionType.NEUTRAL.value)]
-                )
-                emots[str(EmotionType.SURPRISE.value)] = (
-                    emots.get(str(EmotionType.SURPRISE.value), 0)
-                    + emot[str(EmotionType.SURPRISE.value)]
-                )
-                count += 1
-        # prevent zero division
-        if count == 0:
-            count = 1
-        for i in list(emots.keys()):
-            emots[i] /= count * 100
-        # refactor according to custom weightage
-        sad_score = emots[str(EmotionType.SAD.value)] * 1.3
-        fear_score = emots[str(EmotionType.FEAR.value)] * 1.3
-        angry_score = emots[str(EmotionType.ANGRY.value)] * 1.3
-        disgust_score = emots[str(EmotionType.DISGUST.value)] * 10
-        happy_score = emots[str(EmotionType.HAPPY.value)] * 1.7
-        neutral_score = emots[str(EmotionType.NEUTRAL.value)] / 1.2
-        surprise_score = emots[str(EmotionType.SURPRISE.value)] * 1.4
-        score_list = [
-            sad_score,
-            angry_score,
-            surprise_score,
-            fear_score,
-            happy_score,
-            disgust_score,
-            neutral_score,
-        ]
-        normalized_scores = cls.__normalize_scores(score_list)
-        mean = np.mean(normalized_scores)
-        result_scores = [
-            (-sad_score),
-            (-angry_score),
-            surprise_score,
-            (-fear_score),
-            happy_score,
-            (-disgust_score),
-            neutral_score,
-        ]
-        normalized_result_scores = cls.__normalize_scores(result_scores)
-        result = np.mean(normalized_result_scores)
-        difference = abs((mean - result) / mean) * 100
-        # keep values in range of [0, 100]
-        difference = min(difference, 50)
-        if mean > result:
-            conf = 50 - difference
-        else:
-            conf = 50 + difference
-        return {"mean": mean, "result": result, "conf": conf}
-    @classmethod
-    def __normalize_scores(cls, scores: list) -> list:
-        min_val, max_val = min(scores), max(scores)
-        return [(score - min_val) / (max_val - min_val) for score in scores]

src/service/resume_parser.py DELETED Viewed

@@ -1,42 +0,0 @@
-import yaml
-from llama_parse import LlamaParse
-from llama_index.core import SimpleDirectoryReader
-from src.template.parser_prompt import PARSE_RESUME_PROMPT
-class ResumeParser:
-    def __init__(self, config_file_path: str = "config.yaml"):
-        """
-        Initiates a resume parser client
-        """
-        # load config
-        with open(config_file_path, "r") as f:
-            config = yaml.safe_load(f)
-        # set bbox size
-        bbox_margin = config["PAGE_ROC_BBOX"]
-        bbox = f"{bbox_margin['TOP']},{bbox_margin['RIGHT']},{bbox_margin['BOTTOM']},{bbox_margin['LEFT']}"
-        self._parser = LlamaParse(
-            language=config["LANGUAGE"],
-            disable_ocr=config["DISABLE_OCR"],
-            bounding_box=bbox,
-            result_type="markdown",
-            parsing_instruction=PARSE_RESUME_PROMPT,
-            is_formatting_instruction=False,
-        )
-    def parse_resume_to_markdown(self, resume_path: str = "") -> str:
-        """
-        Parses the resume into markdown text.
-        Supported filetypes:
-        - .pdf
-        """
-        document = SimpleDirectoryReader(
-            input_files=[resume_path], file_extractor={".pdf": self._parser}
-        ).load_data()
-        return "\n".join([str(d.text) for d in document])

src/service/utils.py DELETED Viewed

@@ -1,103 +0,0 @@
-import cv2
-import yaml
-import numpy as np
-from pathlib import Path
-import speech_recognition as sr
-from moviepy import VideoFileClip
-def extract_audio(
-    input_video_file: str = "",
-    output_audio_file: str = "",
-) -> str:
-    """
-    Extracts audio from input video file, and save it to the respective path.
-    Returns the path to the saved audio file if extraction is successful.
-    Supported input video file formats are:
-     - .mp4
-     - .mov
-    Supported output audio file formats are:
-     - .wav
-    """
-    try:
-        input_video_file = str(Path(input_video_file))
-        output_audio_file = str(Path(output_audio_file))
-        # Load the video file
-        video = VideoFileClip(input_video_file)
-        # Extract audio and write to output file
-        video.audio.write_audiofile(output_audio_file)
-        print(f"[extract_audio()] : Audio extracted and saved to {output_audio_file}")
-        return output_audio_file
-    except Exception as e:
-        print(e)
-        return None
-def audio2text(audio_file: str = "") -> str:
-    """
-    Converts audio to text using Google's text-to-audio engine (Local),
-    and returns the text.
-    """
-    r = sr.Recognizer()
-    with sr.AudioFile(audio_file) as source:
-        audio = r.record(source)
-        text = r.recognize_google(audio)
-    return text
-def sample_frames(input_video_file: str = "", sample_rate: int = 2) -> list[np.ndarray]:
-    """
-    Samples one frame every 'sample_rate' frames from the video file and returns
-    them in the form of a list of Numpy ndarray objects.
-    """
-    cap = cv2.VideoCapture(input_video_file)
-    frames = []
-    count = 0
-    while cap.isOpened():
-        ret, frame = cap.read()
-        if not ret:
-            break
-        if count % sample_rate == 0:
-            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-        count += 1
-    cap.release()
-    return frames
-def parse_yaml_string(
-    yaml_string: str = "", expected_keys: list[str] = None, cleanup: bool = True
-) -> dict:
-    """
-    Parses a YAML string into a Python dictionary based on a list of
-    expected keys.
-    """
-    # removes ```YAML ``` heading and footers if present
-    if cleanup:
-        yaml_string = yaml_string.replace("YAML", "")
-        yaml_string = yaml_string.replace("yaml", "")
-        yaml_string = yaml_string.replace("`", "")
-    try:
-        parsed_data = yaml.safe_load(yaml_string)
-        # Handle missing keys with error handling
-        result = {}
-        for key in expected_keys:
-            if key in parsed_data:
-                result[key] = parsed_data[key]
-            else:
-                print(f"[parse_yaml_string()] : Missing key {key}")
-        return result
-    except KeyError as e:
-        print(e)
-        return None

src/template/grading_prompt.py DELETED Viewed

@@ -1,111 +0,0 @@
-from llama_index.core.prompts import PromptTemplate
-GRADE_RESPONSE_PROMPT = PromptTemplate(
-    """
-You are a Human Resource Manager and an interviewer.
-Your task is to review an interviewee's overall performance based on multiple factors.
-You will be provided with the interview question, the interviewee's facial confidence score, their response to the question in text form, and additional context on the interview.
-The confidence score will range from 0 to 100, and you will also receive the text of their answers to the interview question.
-Based on this information, evaluate the interviewee’s performance in the following areas:
-1.  **Answer Quality**:
-    Assess the clarity, relevance, and accuracy of their response to the interview question.
-    Did the interviewee address the key points effectively?
-2.  **Problem-Solving Skills**:
-    Evaluate how well the interviewee tackled any problem presented in the interview question.
-    Were they able to think critically, analyze the situation, and propose solutions?
-3.  **Confidence**:
-    Based on their facial confidence score (0 to 100) and their overall demeanor in the response, rate their confidence level and how it impacts their presentation and communication.
-4.  **Personality**:
-    Consider the tone, communication style, and interpersonal skills of the interviewee.
-    How well did they engage with the question and the interview process?
-    Do they demonstrate qualities like openness, empathy, or assertiveness?
-5.  **Overall Performance**:
-    Based on the combination of the above factors, provide a holistic evaluation of their performance in the interview.
-    Offer feedback on strengths and areas for improvement.
-Ensure that your feedback is clear and actionable, so other HR professionals reviewing the interview can easily assess the interviewee's suitability for the position.
-########################################
-Interview Question:
-{interview_question}
-########################################
-Interviewee's Facial Confidence Score:
-{conf_score}
-########################################
-Interviewee's response in text:
-{response_text}
-########################################
-output:
-"""
-)
-RANKING_AND_FEEDBACK_PROMPT = PromptTemplate(
-    """
-You are an HR specialist evaluating an interviewee for a specific role.
-Your task is to assess the suitability of the interviewee based on the following information:
-1.  **Job Requirements**:
-    A list of skills, experiences, and qualifications required for the role.
-2.  **Interview Feedback**:
-    The feedback and review of the interviewee’s performance in the interview, which includes assessments on their answer quality, problem-solving skills, confidence, personality, and overall performance.
-3.  **Resume Text**:
-    A parsed version of the interviewee's resume, which includes their work experience, skills, education, and other relevant information.
-Using these inputs, generate an output strictly in the following YAML format:
-###########################
-name: <name>
-score: <score>
-feedback: <feedback text>
-###########################
-Details for the output:
-1.  **name**:
-    Name of the interviewee.
-2.  **score**:
-    A score ranging from 0 to 100, where 0 means the interviewee is not recommended for the position, and 100 means they are a perfect match for the job.
-3.  **feedback**:
-    - A detailed breakdown explaining how the interviewee’s experience, skills, and performance align or do not align with the job requirements.
-    - Discuss whether the interviewee’s skills, experiences, and overall traits match or fail to meet the required qualifications.
-    - Provide a short, concise sentence summarizing the interviewee’s suitability for the role.
-Ensure that the feedback is comprehensive yet concise, offering actionable insights for HR professionals to make a decision about the interviewee’s fit for the role.
-########################################
-Job Requirements:
-{job_requirements}
-########################################
-Interview Feedback:
-{interview_feedback}
-########################################
-Resume Text:
-{resume_text}
-########################################
-Output strictly following the below YAML format:
-name: <name>
-score: <score>
-feedback: <feedback text>
-"""
-)

src/template/parser_prompt.py DELETED Viewed

@@ -1,21 +0,0 @@
-from llama_index.core.prompts import PromptTemplate
-PARSE_RESUME_PROMPT = """
-You are tasked with parsing a resume.
-**Your Focus**:
- - Reproduce only the main body text, including section headers and bullet points, exactly as received.
- - Do not skip section numbers in the format DIGIT.DIGIT (e.g., 10.1, 3.1), you must apply a markdown header level based on the depth (e.g., # for main sections, ## for subsections) to reflect the appropriate hierarchy, and output them.
- - Do make sure that section numbers are always followed by the corresponding section title without a '\n' character in between or separating them into different headers. Valid examples are as below:
-     - '# 14 Experience'
-     - '# 2 Education'
-   Invalid examples are as below:
-     - '# 14\n # Experience'
-     - '# 2\n # Education'
- - You may only add markdown header symbols (#, ##, ###, etc.) to denote the hierarchical levels of section headers.
- - Do not make up any text and headers that are not present in the original text.
-**Expected Output**:
- - Text, section headers, and bullet points must be reproduced without any text edits, additions, or deletions, other than adding markdown header symbols (#, ##, ###, etc.).
- - Use markdown headers to denote additional hierarchy (e.g., # for main sections, ## for subsections) based on the best interpretation of the document’s structure.
-"""