Spaces:
Sleeping
Sleeping
import streamlit as st | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
from langchain.llms import HuggingFaceHub | |
import fitz # PyMuPDF for PDF extraction | |
import pytesseract | |
from PIL import Image | |
import os | |
import re | |
# Set Hugging Face API Key (Set this in Hugging Face Secrets) | |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"] | |
# Load Free LLM from Hugging Face | |
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5}) | |
# Streamlit App Configuration | |
st.set_page_config(page_title="DocuMentorAI", layout="wide") | |
st.title("📄 DocuMentorAI") | |
st.write("Upload job openings and your CV/Resume to generate professional application documents.") | |
# Upload Job Opening (PDF/Image/Text) | |
st.subheader("📢 Upload Job Opening Details") | |
job_opening_file = st.file_uploader("Upload Job Opening (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"]) | |
# Upload CV/Resume | |
st.subheader("📄 Upload CV/Resume") | |
cv_resume_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"]) | |
# Function to extract text from PDF | |
def extract_text_from_pdf(pdf_file): | |
pdf_bytes = pdf_file.read() | |
with fitz.open(stream=pdf_bytes, filetype="pdf") as doc: | |
return " ".join([page.get_text() for page in doc]) | |
# Function to extract text from Image using OCR | |
def extract_text_from_image(image_file): | |
image = Image.open(image_file) | |
return pytesseract.image_to_string(image) | |
# Function to extract text from uploaded files | |
def extract_text(uploaded_file): | |
if uploaded_file: | |
file_type = uploaded_file.type | |
if file_type == "application/pdf": | |
return extract_text_from_pdf(uploaded_file) | |
else: | |
return extract_text_from_image(uploaded_file) | |
return "" | |
# Extract text from job opening and CV/Resume | |
job_opening_text = extract_text(job_opening_file) | |
cv_resume_text = extract_text(cv_resume_file) | |
# Display Extracted Text | |
if job_opening_text: | |
st.subheader("Extracted Job Opening Details") | |
st.text_area("Preview:", job_opening_text, height=150) | |
if cv_resume_text: | |
st.subheader("Extracted CV/Resume Details") | |
st.text_area("Preview:", cv_resume_text, height=150) | |
# Function to extract professor name, designation, and university | |
def extract_professor_details(text): | |
professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)" | |
university_pattern = r"(University|Institute|College|School of [A-Za-z]+)" | |
professor_match = re.search(professor_pattern, text) | |
university_match = re.search(university_pattern, text) | |
professor_name = professor_match.group(0) if professor_match else "Not Found" | |
university_name = university_match.group(0) if university_match else "Not Found" | |
return professor_name, university_name | |
# Extract professor details if job opening is uploaded | |
professor_name, university_name = extract_professor_details(job_opening_text) | |
# LLM Prompt Templates | |
email_template = PromptTemplate.from_template(""" | |
Write a professional cold email for a research position. | |
- Address the professor formally. | |
- Introduce yourself and academic background. | |
- Express interest in their research. | |
- Highlight key skills from your CV. | |
- Conclude with a polite request. | |
### Input: | |
- Professor: {professor_name} | |
- University: {university_name} | |
- Research Interests: {research_interests} | |
- Why This Lab: {reason} | |
- CV Highlights: {resume_text} | |
### Output: | |
A well-structured, professional cold email. | |
""") | |
cover_letter_template = PromptTemplate.from_template(""" | |
Write a compelling job application cover letter. | |
- Address the employer formally. | |
- Mention job title and where you found it. | |
- Highlight key skills and experiences. | |
- Relate background to the company. | |
- Conclude with enthusiasm. | |
### Input: | |
- Job Title: {job_title} | |
- Company: {company} | |
- Key Skills: {key_skills} | |
- CV Highlights: {resume_text} | |
### Output: | |
A strong, well-formatted cover letter. | |
""") | |
research_statement_template = PromptTemplate.from_template(""" | |
Write a research statement for Ph.D. applications. | |
- Discuss research background and motivation. | |
- Explain key research experiences and findings. | |
- Outline future research interests and goals. | |
- Highlight contributions to the field. | |
### Input: | |
- Research Background: {research_background} | |
- Key Research Projects: {key_projects} | |
- Future Goals: {future_goals} | |
### Output: | |
A well-structured, professional research statement. | |
""") | |
sop_template = PromptTemplate.from_template(""" | |
Write a compelling Statement of Purpose (SOP). | |
- Introduce motivation for graduate studies. | |
- Discuss academic background. | |
- Explain relevant experiences and research. | |
- Outline career goals. | |
- Justify fit for the program. | |
### Input: | |
- Motivation: {motivation} | |
- Academic Background: {academic_background} | |
- Research & Projects: {research_experiences} | |
- Career Goals: {career_goals} | |
- Why This Program: {why_this_program} | |
### Output: | |
A well-structured SOP. | |
""") | |
# LangChain Chains | |
email_chain = LLMChain(llm=llm, prompt=email_template) | |
cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template) | |
research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template) | |
sop_chain = LLMChain(llm=llm, prompt=sop_template) | |
# User Inputs | |
st.subheader("📩 Generate Application Documents") | |
tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"]) | |
# Cold Email Generation | |
with tab1: | |
st.write(f"🧑🏫 **Detected Professor:** {professor_name} at {university_name}") | |
research_interests = st.text_area("Research Interests") | |
reason = st.text_area("Why this professor/lab?") | |
if st.button("Generate Cold Email"): | |
email = email_chain.run({ | |
"professor_name": professor_name, | |
"university_name": university_name, | |
"research_interests": research_interests, | |
"reason": reason, | |
"resume_text": cv_resume_text | |
}) | |
st.text_area("Generated Cold Email", email, height=250) | |
# Cover Letter Generation | |
with tab2: | |
job_title = st.text_input("Job Title") | |
company_name = university_name if university_name != "Not Found" else st.text_input("Company/University") | |
key_skills = st.text_area("Key Skills") | |
if st.button("Generate Cover Letter"): | |
cover_letter = cover_letter_chain.run({ | |
"job_title": job_title, | |
"company": company_name, | |
"key_skills": key_skills, | |
"resume_text": cv_resume_text | |
}) | |
st.text_area("Generated Cover Letter", cover_letter, height=250) | |
# Research Statement Generation | |
with tab3: | |
research_background = st.text_area("Research Background") | |
key_projects = st.text_area("Key Research Projects") | |
future_goals = st.text_area("Future Research Goals") | |
if st.button("Generate Research Statement"): | |
research_statement = research_statement_chain.run({ | |
"research_background": research_background, | |
"key_projects": key_projects, | |
"future_goals": future_goals | |
}) | |
st.text_area("Generated Research Statement", research_statement, height=250) | |
# SOP Generation | |
with tab4: | |
motivation = st.text_area("Motivation for Graduate Studies") | |
academic_background = st.text_area("Academic Background") | |
research_experiences = st.text_area("Research & Projects") | |
career_goals = st.text_area("Career Goals") | |
why_this_program = st.text_area("Why This Program") | |
if st.button("Generate SOP"): | |
sop = sop_chain.run({ | |
"motivation": motivation, | |
"academic_background": academic_background, | |
"research_experiences": research_experiences, | |
"career_goals": career_goals, | |
"why_this_program": why_this_program | |
}) | |
st.text_area("Generated SOP", sop, height=250) | |