DocuMentorAI / app.py
Sobit's picture
Update app.py
1ad6ea2 verified
raw
history blame
11.4 kB
import streamlit as st
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFaceHub
import fitz # PyMuPDF for PDF extraction
from PIL import Image
import os
import pytesseract
import re
# Set Hugging Face API Key (Set this in Hugging Face Secrets)
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
# Load Free LLM from Hugging Face
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})
# Streamlit App Configuration
st.set_page_config(page_title="DocuMentorAI", layout="wide", page_icon="📄")
st.title("📄 DocuMentorAI")
st.write("Generate professional application documents with ease!")
# Custom CSS for better UI
st.markdown("""
<style>
.stTextArea textarea { font-size: 16px !important; }
.stButton button { width: 100%; background-color: #4CAF50; color: white; }
.stDownloadButton button { width: 100%; background-color: #008CBA; color: white; }
.stMarkdown { font-size: 18px; }
.stSpinner div { margin: auto; }
</style>
""", unsafe_allow_html=True)
# Text Input for Job Opening Details
st.subheader("📢 Enter Opening Details")
job_opening_text = st.text_area(
"Paste the job/research opening details here...",
height=150,
placeholder="Example: 'We are hiring a Research Assistant at XYZ University. The ideal candidate has experience in machine learning and data analysis...'"
)
# Upload CV/Resume
st.subheader("📄 Upload CV/Resume")
cv_resume_file = st.file_uploader(
"Upload your CV/Resume (PDF or Image)",
type=["pdf", "png", "jpg", "jpeg"],
help="Upload a PDF or image of your CV/Resume for text extraction."
)
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
try:
pdf_bytes = pdf_file.read()
with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
return " ".join([page.get_text() for page in doc])
except Exception as e:
st.error(f"Error extracting text from PDF: {e}")
return ""
# Function to extract text from Image using OCR
def extract_text_from_image(image_file):
try:
image = Image.open(image_file)
return pytesseract.image_to_string(image)
except Exception as e:
st.error(f"Error extracting text from image: {e}")
return ""
# Function to extract text from uploaded files
def extract_text(uploaded_file):
if uploaded_file:
file_type = uploaded_file.type
if file_type == "application/pdf":
return extract_text_from_pdf(uploaded_file)
else:
return extract_text_from_image(uploaded_file)
return ""
# Extract text from CV/Resume
cv_resume_text = extract_text(cv_resume_file)
# Display Extracted Text
if job_opening_text:
with st.expander("🔍 View Entered Opening Details"):
st.markdown(f"**Job Opening Details:**\n\n{job_opening_text}")
if cv_resume_text:
with st.expander("🔍 View Extracted CV/Resume Details"):
st.markdown(f"**CV/Resume Details:**\n\n{cv_resume_text}")
# Function to extract professor name, designation, and university
def extract_professor_details(text):
professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
university_pattern = r"(University|Institute|College|School of [A-Za-z]+)"
professor_match = re.search(professor_pattern, text)
university_match = re.search(university_pattern, text)
professor_name = professor_match.group(0) if professor_match else "Not Found"
university_name = university_match.group(0) if university_match else "Not Found"
return professor_name, university_name
# Extract professor details if job opening is uploaded
professor_name, university_name = extract_professor_details(job_opening_text)
# LLM Prompt Templates
email_template = PromptTemplate.from_template("""
Write a professional cold email for a research position.
- Address the professor formally.
- Introduce yourself and academic background.
- Express interest in their research.
- Highlight key skills from your CV.
- Conclude with a polite request.
### Input:
- Professor: {professor_name}
- University: {university_name}
- Research Interests: {research_interests}
- Why This Lab: {reason}
- CV Highlights: {resume_text}
### Output:
A well-structured, professional cold email.
""")
cover_letter_template = PromptTemplate.from_template("""
Write a compelling job application cover letter.
- Address the employer formally.
- Mention job title and where you found it.
- Highlight key skills and experiences.
- Relate background to the company.
- Conclude with enthusiasm.
### Input:
- Job Title: {job_title}
- Company: {company}
- Key Skills: {key_skills}
- CV Highlights: {resume_text}
### Output:
A strong, well-formatted cover letter.
""")
research_statement_template = PromptTemplate.from_template("""
Write a research statement for Ph.D. applications.
- Discuss research background and motivation.
- Explain key research experiences and findings.
- Outline future research interests and goals.
- Highlight contributions to the field.
### Input:
- Research Background: {research_background}
- Key Research Projects: {key_projects}
- Future Goals: {future_goals}
### Output:
A well-structured, professional research statement.
""")
sop_template = PromptTemplate.from_template("""
Write a compelling Statement of Purpose (SOP).
- Introduce motivation for graduate studies.
- Discuss academic background.
- Explain relevant experiences and research.
- Outline career goals.
- Justify fit for the program.
### Input:
- Motivation: {motivation}
- Academic Background: {academic_background}
- Research & Projects: {research_experiences}
- Career Goals: {career_goals}
- Why This Program: {why_this_program}
### Output:
A well-structured SOP.
""")
# LangChain Chains
email_chain = LLMChain(llm=llm, prompt=email_template)
cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
sop_chain = LLMChain(llm=llm, prompt=sop_template)
# User Inputs
st.subheader("📩 Generate Application Documents")
tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
# Cold Email Generation
with tab1:
st.write(f"🧑‍🏫 **Detected Professor:** {professor_name} at {university_name}")
research_interests = st.text_area("Research Interests", placeholder="Example: Machine Learning, Data Analysis, etc.")
reason = st.text_area("Why this professor/lab?", placeholder="Example: I am particularly interested in your work on...")
if st.button("Generate Cold Email"):
if not job_opening_text or not cv_resume_text:
st.error("Please provide job opening details and upload your CV/Resume.")
else:
with st.spinner("Generating Cold Email..."):
try:
email = email_chain.run({
"professor_name": professor_name,
"university_name": university_name,
"research_interests": research_interests,
"reason": reason,
"resume_text": cv_resume_text
})
st.markdown("**Generated Cold Email:**")
st.markdown(email)
st.download_button("Download Email", email, file_name="cold_email.txt")
except Exception as e:
st.error(f"Error generating cold email: {e}")
# Cover Letter Generation
with tab2:
job_title = st.text_input("Job Title", placeholder="Example: Research Assistant")
company_name = university_name if university_name != "Not Found" else st.text_input("Company/University", placeholder="Example: XYZ University")
key_skills = st.text_area("Key Skills", placeholder="Example: Python, Machine Learning, Data Analysis")
if st.button("Generate Cover Letter"):
if not job_opening_text or not cv_resume_text:
st.error("Please provide job opening details and upload your CV/Resume.")
else:
with st.spinner("Generating Cover Letter..."):
try:
cover_letter = cover_letter_chain.run({
"job_title": job_title,
"company": company_name,
"key_skills": key_skills,
"resume_text": cv_resume_text
})
st.markdown("**Generated Cover Letter:**")
st.markdown(cover_letter)
st.download_button("Download Cover Letter", cover_letter, file_name="cover_letter.txt")
except Exception as e:
st.error(f"Error generating cover letter: {e}")
# Research Statement Generation
with tab3:
research_background = st.text_area("Research Background", placeholder="Example: My research focuses on...")
key_projects = st.text_area("Key Research Projects", placeholder="Example: Developed a machine learning model for...")
future_goals = st.text_area("Future Research Goals", placeholder="Example: I aim to explore...")
if st.button("Generate Research Statement"):
with st.spinner("Generating Research Statement..."):
try:
research_statement = research_statement_chain.run({
"research_background": research_background,
"key_projects": key_projects,
"future_goals": future_goals
})
st.markdown("**Generated Research Statement:**")
st.markdown(research_statement)
st.download_button("Download Research Statement", research_statement, file_name="research_statement.txt")
except Exception as e:
st.error(f"Error generating research statement: {e}")
# SOP Generation
with tab4:
motivation = st.text_area("Motivation for Graduate Studies", placeholder="Example: I have always been passionate about...")
academic_background = st.text_area("Academic Background", placeholder="Example: I completed my undergraduate degree in...")
research_experiences = st.text_area("Research & Projects", placeholder="Example: During my undergraduate studies, I worked on...")
career_goals = st.text_area("Career Goals", placeholder="Example: My long-term goal is to...")
why_this_program = st.text_area("Why This Program", placeholder="Example: This program aligns with my research interests because...")
if st.button("Generate SOP"):
with st.spinner("Generating SOP..."):
try:
sop = sop_chain.run({
"motivation": motivation,
"academic_background": academic_background,
"research_experiences": research_experiences,
"career_goals": career_goals,
"why_this_program": why_this_program
})
st.markdown("**Generated SOP:**")
st.markdown(sop)
st.download_button("Download SOP", sop, file_name="sop.txt")
except Exception as e:
st.error(f"Error generating SOP: {e}")
# Reset Button
if st.button("🔄 Reset All Inputs and Outputs"):
st.experimental_rerun()