DocuMentorAI / app.py
Sobit's picture
Update app.py
8d56069 verified
raw
history blame
7.89 kB
import streamlit as st
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFaceHub
import fitz # PyMuPDF for PDF extraction
import pytesseract
from PIL import Image
import os
import re
# Set Hugging Face API Key (Set this in Hugging Face Secrets)
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
# Load Free LLM from Hugging Face
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})
# Streamlit App Configuration
st.set_page_config(page_title="DocuMentorAI", layout="wide")
st.title("📄 DocuMentorAI")
st.write("Upload job openings and your CV/Resume to generate professional application documents.")
# Upload Job Opening (PDF/Image/Text)
st.subheader("📢 Upload Job Opening Details")
job_opening_file = st.file_uploader("Upload Job Opening (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"])
# Upload CV/Resume
st.subheader("📄 Upload CV/Resume")
cv_resume_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
pdf_bytes = pdf_file.read()
with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
return " ".join([page.get_text() for page in doc])
# Function to extract text from Image using OCR
def extract_text_from_image(image_file):
image = Image.open(image_file)
return pytesseract.image_to_string(image)
# Function to extract text from uploaded files
def extract_text(uploaded_file):
if uploaded_file:
file_type = uploaded_file.type
if file_type == "application/pdf":
return extract_text_from_pdf(uploaded_file)
else:
return extract_text_from_image(uploaded_file)
return ""
# Extract text from job opening and CV/Resume
job_opening_text = extract_text(job_opening_file)
cv_resume_text = extract_text(cv_resume_file)
# Display Extracted Text
if job_opening_text:
st.subheader("Extracted Job Opening Details")
st.text_area("Preview:", job_opening_text, height=150)
if cv_resume_text:
st.subheader("Extracted CV/Resume Details")
st.text_area("Preview:", cv_resume_text, height=150)
# Function to extract professor name, designation, and university
def extract_professor_details(text):
professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
university_pattern = r"(University|Institute|College|School of [A-Za-z]+)"
professor_match = re.search(professor_pattern, text)
university_match = re.search(university_pattern, text)
professor_name = professor_match.group(0) if professor_match else "Not Found"
university_name = university_match.group(0) if university_match else "Not Found"
return professor_name, university_name
# Extract professor details if job opening is uploaded
professor_name, university_name = extract_professor_details(job_opening_text)
# LLM Prompt Templates
email_template = PromptTemplate.from_template("""
Write a professional cold email for a research position.
- Address the professor formally.
- Introduce yourself and academic background.
- Express interest in their research.
- Highlight key skills from your CV.
- Conclude with a polite request.
### Input:
- Professor: {professor_name}
- University: {university_name}
- Research Interests: {research_interests}
- Why This Lab: {reason}
- CV Highlights: {resume_text}
### Output:
A well-structured, professional cold email.
""")
cover_letter_template = PromptTemplate.from_template("""
Write a compelling job application cover letter.
- Address the employer formally.
- Mention job title and where you found it.
- Highlight key skills and experiences.
- Relate background to the company.
- Conclude with enthusiasm.
### Input:
- Job Title: {job_title}
- Company: {company}
- Key Skills: {key_skills}
- CV Highlights: {resume_text}
### Output:
A strong, well-formatted cover letter.
""")
research_statement_template = PromptTemplate.from_template("""
Write a research statement for Ph.D. applications.
- Discuss research background and motivation.
- Explain key research experiences and findings.
- Outline future research interests and goals.
- Highlight contributions to the field.
### Input:
- Research Background: {research_background}
- Key Research Projects: {key_projects}
- Future Goals: {future_goals}
### Output:
A well-structured, professional research statement.
""")
sop_template = PromptTemplate.from_template("""
Write a compelling Statement of Purpose (SOP).
- Introduce motivation for graduate studies.
- Discuss academic background.
- Explain relevant experiences and research.
- Outline career goals.
- Justify fit for the program.
### Input:
- Motivation: {motivation}
- Academic Background: {academic_background}
- Research & Projects: {research_experiences}
- Career Goals: {career_goals}
- Why This Program: {why_this_program}
### Output:
A well-structured SOP.
""")
# LangChain Chains
email_chain = LLMChain(llm=llm, prompt=email_template)
cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
sop_chain = LLMChain(llm=llm, prompt=sop_template)
# User Inputs
st.subheader("📩 Generate Application Documents")
tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
# Cold Email Generation
with tab1:
st.write(f"🧑‍🏫 **Detected Professor:** {professor_name} at {university_name}")
research_interests = st.text_area("Research Interests")
reason = st.text_area("Why this professor/lab?")
if st.button("Generate Cold Email"):
email = email_chain.run({
"professor_name": professor_name,
"university_name": university_name,
"research_interests": research_interests,
"reason": reason,
"resume_text": cv_resume_text
})
st.text_area("Generated Cold Email", email, height=250)
# Cover Letter Generation
with tab2:
job_title = st.text_input("Job Title")
company_name = university_name if university_name != "Not Found" else st.text_input("Company/University")
key_skills = st.text_area("Key Skills")
if st.button("Generate Cover Letter"):
cover_letter = cover_letter_chain.run({
"job_title": job_title,
"company": company_name,
"key_skills": key_skills,
"resume_text": cv_resume_text
})
st.text_area("Generated Cover Letter", cover_letter, height=250)
# Research Statement Generation
with tab3:
research_background = st.text_area("Research Background")
key_projects = st.text_area("Key Research Projects")
future_goals = st.text_area("Future Research Goals")
if st.button("Generate Research Statement"):
research_statement = research_statement_chain.run({
"research_background": research_background,
"key_projects": key_projects,
"future_goals": future_goals
})
st.text_area("Generated Research Statement", research_statement, height=250)
# SOP Generation
with tab4:
motivation = st.text_area("Motivation for Graduate Studies")
academic_background = st.text_area("Academic Background")
research_experiences = st.text_area("Research & Projects")
career_goals = st.text_area("Career Goals")
why_this_program = st.text_area("Why This Program")
if st.button("Generate SOP"):
sop = sop_chain.run({
"motivation": motivation,
"academic_background": academic_background,
"research_experiences": research_experiences,
"career_goals": career_goals,
"why_this_program": why_this_program
})
st.text_area("Generated SOP", sop, height=250)