File size: 4,106 Bytes
09f8068
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os
import time
import json
import pandas as pd
import streamlit as st
import docx
import fitz as pymupdf
from dotenv import load_dotenv
import google.generativeai as genai
from prompt import extract_skill, prompt_first_chunks

# Load environment variables
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-1.0-pro')

def extract_skills_from_job_description(job_description):
    """Extract skills from the provided job description."""
    jd_prompt = extract_skill.format(job_description=job_description)
    response = model.generate_content(jd_prompt, generation_config=genai.types.GenerationConfig(temperature=0.0))
    if response._result.candidates[0].content:
        generated_response = response._result.candidates[0].content.parts[0].text.replace("```json\n", "").rstrip("\n").replace("```", "")
        json_array = json.loads(generated_response)
        elements_to_prepend = ['Name', 'Address', 'EmailId', 'Mobile_number']
        return elements_to_prepend + json_array
    else:
        st.error("Error with Generative AI Model.")
        return []

def process_resume(file, jd_skills):
    """Extract skills and ratings from a single resume file."""
    text = ""
    if file.name.endswith('.pdf'):
        # Process PDF files
        document = pymupdf.open(stream=file.read(), filetype="pdf")
        for page_num in range(len(document)):
            page = document.load_page(page_num)
            text += page.get_text()
        document.close()
    elif file.name.endswith('.docx'):
        # Process DOCX files
        document = docx.Document(file)
        for paragraph in document.paragraphs:
            text += paragraph.text + "\n"
        for table in document.tables:
            for row in table.rows:
                for cell in row.cells:
                    text += cell.text + "\n"
    
    # Generate response from model
    resume_prompt = prompt_first_chunks.format(resume=text, jd_skill=jd_skills)
    response = model.generate_content(resume_prompt, generation_config=genai.types.GenerationConfig(temperature=0.0))
    try:
        json_array = json.loads(response._result.candidates[0].content.parts[0].text)
        return pd.DataFrame([json_array], columns=jd_skills)
    except Exception as e:
        st.error(f"Error processing file {file.name}: {e}")
        return pd.DataFrame()

def main():
    st.title("Resume Filtering Based on Job Description")

    # Upload resumes
    uploaded_files = st.file_uploader("Upload Resumes (PDF/DOCX)", type=["pdf", "docx"], accept_multiple_files=True)

    # Input job description
    job_description = st.text_area("Enter Job Description")

    # Process resumes and display results
    if st.button("Process Resumes"):
        if not uploaded_files or not job_description:
            st.warning("Please upload resumes and provide a job description.")
            return

        jd_skills = extract_skills_from_job_description(job_description)
        if not jd_skills:
            return

        all_data = pd.DataFrame(columns=jd_skills)

        for file in uploaded_files:
            resume_data = process_resume(file, jd_skills)
            if not resume_data.empty:
                resume_data["resume_path"] = file.name
                all_data = pd.concat([all_data, resume_data], ignore_index=True)

        if not all_data.empty:
            # Calculate total skill ratings
            skills_columns = all_data.columns[4:-1]
            all_data['total_skill_rating'] = round((all_data[skills_columns].sum(axis=1) / len(skills_columns)) * 100, 2)

            # Display dataframe in Streamlit
            st.write("### Processed Resume Data:", all_data)

            # Save to CSV
            csv_path = "processed_resumes.csv"
            all_data.to_csv(csv_path, index=False)
            st.success(f"Data saved to {csv_path}")
            st.download_button(label="Download CSV", data=all_data.to_csv(index=False), file_name="processed_resumes.csv", mime="text/csv")

if __name__ == "__main__":
    main()