Spaces:

Sobit
/

DocuMentorAI

Sleeping

File size: 7,887 Bytes

import streamlit as st
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFaceHub
import fitz  # PyMuPDF for PDF extraction
import pytesseract
from PIL import Image
import os
import re

# Set Hugging Face API Key (Set this in Hugging Face Secrets)
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]

# Load Free LLM from Hugging Face
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})

# Streamlit App Configuration
st.set_page_config(page_title="DocuMentorAI", layout="wide")
st.title("📄 DocuMentorAI")
st.write("Upload job openings and your CV/Resume to generate professional application documents.")

# Upload Job Opening (PDF/Image/Text)
st.subheader("📢 Upload Job Opening Details")
job_opening_file = st.file_uploader("Upload Job Opening (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"])

# Upload CV/Resume
st.subheader("📄 Upload CV/Resume")
cv_resume_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])

# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
    pdf_bytes = pdf_file.read()
    with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
        return " ".join([page.get_text() for page in doc])

# Function to extract text from Image using OCR
def extract_text_from_image(image_file):
    image = Image.open(image_file)
    return pytesseract.image_to_string(image)

# Function to extract text from uploaded files
def extract_text(uploaded_file):
    if uploaded_file:
        file_type = uploaded_file.type
        if file_type == "application/pdf":
            return extract_text_from_pdf(uploaded_file)
        else:
            return extract_text_from_image(uploaded_file)
    return ""

# Extract text from job opening and CV/Resume
job_opening_text = extract_text(job_opening_file)
cv_resume_text = extract_text(cv_resume_file)

# Display Extracted Text
if job_opening_text:
    st.subheader("Extracted Job Opening Details")
    st.text_area("Preview:", job_opening_text, height=150)

if cv_resume_text:
    st.subheader("Extracted CV/Resume Details")
    st.text_area("Preview:", cv_resume_text, height=150)

# Function to extract professor name, designation, and university
def extract_professor_details(text):
    professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
    university_pattern = r"(University|Institute|College|School of [A-Za-z]+)"
    
    professor_match = re.search(professor_pattern, text)
    university_match = re.search(university_pattern, text)

    professor_name = professor_match.group(0) if professor_match else "Not Found"
    university_name = university_match.group(0) if university_match else "Not Found"

    return professor_name, university_name

# Extract professor details if job opening is uploaded
professor_name, university_name = extract_professor_details(job_opening_text)

# LLM Prompt Templates
email_template = PromptTemplate.from_template("""
Write a professional cold email for a research position.
- Address the professor formally.
- Introduce yourself and academic background.
- Express interest in their research.
- Highlight key skills from your CV.
- Conclude with a polite request.
### Input:
- Professor: {professor_name}
- University: {university_name}
- Research Interests: {research_interests}
- Why This Lab: {reason}
- CV Highlights: {resume_text}
### Output:
A well-structured, professional cold email.
""")

cover_letter_template = PromptTemplate.from_template("""
Write a compelling job application cover letter.
- Address the employer formally.
- Mention job title and where you found it.
- Highlight key skills and experiences.
- Relate background to the company.
- Conclude with enthusiasm.
### Input:
- Job Title: {job_title}
- Company: {company}
- Key Skills: {key_skills}
- CV Highlights: {resume_text}
### Output:
A strong, well-formatted cover letter.
""")

research_statement_template = PromptTemplate.from_template("""
Write a research statement for Ph.D. applications.
- Discuss research background and motivation.
- Explain key research experiences and findings.
- Outline future research interests and goals.
- Highlight contributions to the field.
### Input:
- Research Background: {research_background}
- Key Research Projects: {key_projects}
- Future Goals: {future_goals}
### Output:
A well-structured, professional research statement.
""")

sop_template = PromptTemplate.from_template("""
Write a compelling Statement of Purpose (SOP).
- Introduce motivation for graduate studies.
- Discuss academic background.
- Explain relevant experiences and research.
- Outline career goals.
- Justify fit for the program.
### Input:
- Motivation: {motivation}
- Academic Background: {academic_background}
- Research & Projects: {research_experiences}
- Career Goals: {career_goals}
- Why This Program: {why_this_program}
### Output:
A well-structured SOP.
""")

# LangChain Chains
email_chain = LLMChain(llm=llm, prompt=email_template)
cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
sop_chain = LLMChain(llm=llm, prompt=sop_template)

# User Inputs
st.subheader("📩 Generate Application Documents")
tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])

# Cold Email Generation
with tab1:
    st.write(f"🧑‍🏫 **Detected Professor:** {professor_name} at {university_name}")
    research_interests = st.text_area("Research Interests")
    reason = st.text_area("Why this professor/lab?")
    
    if st.button("Generate Cold Email"):
        email = email_chain.run({
            "professor_name": professor_name,
            "university_name": university_name,
            "research_interests": research_interests,
            "reason": reason,
            "resume_text": cv_resume_text
        })
        st.text_area("Generated Cold Email", email, height=250)

# Cover Letter Generation
with tab2:
    job_title = st.text_input("Job Title")
    company_name = university_name if university_name != "Not Found" else st.text_input("Company/University")
    key_skills = st.text_area("Key Skills")

    if st.button("Generate Cover Letter"):
        cover_letter = cover_letter_chain.run({
            "job_title": job_title,
            "company": company_name,
            "key_skills": key_skills,
            "resume_text": cv_resume_text
        })
        st.text_area("Generated Cover Letter", cover_letter, height=250)

# Research Statement Generation
with tab3:
    research_background = st.text_area("Research Background")
    key_projects = st.text_area("Key Research Projects")
    future_goals = st.text_area("Future Research Goals")

    if st.button("Generate Research Statement"):
        research_statement = research_statement_chain.run({
            "research_background": research_background,
            "key_projects": key_projects,
            "future_goals": future_goals
        })
        st.text_area("Generated Research Statement", research_statement, height=250)

# SOP Generation
with tab4:
    motivation = st.text_area("Motivation for Graduate Studies")
    academic_background = st.text_area("Academic Background")
    research_experiences = st.text_area("Research & Projects")
    career_goals = st.text_area("Career Goals")
    why_this_program = st.text_area("Why This Program")

    if st.button("Generate SOP"):
        sop = sop_chain.run({
            "motivation": motivation,
            "academic_background": academic_background,
            "research_experiences": research_experiences,
            "career_goals": career_goals,
            "why_this_program": why_this_program
        })
        st.text_area("Generated SOP", sop, height=250)