Spaces:
Sleeping
Sleeping
import streamlit as st | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
from langchain.llms import HuggingFaceHub | |
import fitz # PyMuPDF for PDF extraction | |
from PIL import Image | |
import os | |
import pytesseract | |
import re | |
# Set Hugging Face API Key (Set this in Hugging Face Secrets) | |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"] | |
# Load Free LLM from Hugging Face | |
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5}) | |
# Streamlit App Configuration | |
st.set_page_config(page_title="DocuMentorAI", layout="wide", page_icon="📄") | |
st.title("📄 DocuMentorAI") | |
st.write("Generate professional application documents with ease!") | |
# Custom CSS for better UI | |
st.markdown(""" | |
<style> | |
.stTextArea textarea { font-size: 16px !important; } | |
.stButton button { width: 100%; background-color: #4CAF50; color: white; } | |
.stDownloadButton button { width: 100%; background-color: #008CBA; color: white; } | |
.stMarkdown { font-size: 18px; } | |
.stSpinner div { margin: auto; } | |
</style> | |
""", unsafe_allow_html=True) | |
# Text Input for Job Opening Details | |
st.subheader("📢 Enter Opening Details") | |
job_opening_text = st.text_area( | |
"Paste the job/research opening details here...", | |
height=150, | |
placeholder="Example: 'We are hiring a Research Assistant at XYZ University. The ideal candidate has experience in machine learning and data analysis...'" | |
) | |
# Upload CV/Resume | |
st.subheader("📄 Upload CV/Resume") | |
cv_resume_file = st.file_uploader( | |
"Upload your CV/Resume (PDF or Image)", | |
type=["pdf", "png", "jpg", "jpeg"], | |
help="Upload a PDF or image of your CV/Resume for text extraction." | |
) | |
# Function to extract text from PDF | |
def extract_text_from_pdf(pdf_file): | |
try: | |
pdf_bytes = pdf_file.read() | |
with fitz.open(stream=pdf_bytes, filetype="pdf") as doc: | |
return " ".join([page.get_text() for page in doc]) | |
except Exception as e: | |
st.error(f"Error extracting text from PDF: {e}") | |
return "" | |
# Function to extract text from Image using OCR | |
def extract_text_from_image(image_file): | |
try: | |
image = Image.open(image_file) | |
return pytesseract.image_to_string(image) | |
except Exception as e: | |
st.error(f"Error extracting text from image: {e}") | |
return "" | |
# Function to extract text from uploaded files | |
def extract_text(uploaded_file): | |
if uploaded_file: | |
file_type = uploaded_file.type | |
if file_type == "application/pdf": | |
return extract_text_from_pdf(uploaded_file) | |
else: | |
return extract_text_from_image(uploaded_file) | |
return "" | |
# Extract text from CV/Resume | |
cv_resume_text = extract_text(cv_resume_file) | |
# Display Extracted Text | |
if job_opening_text: | |
with st.expander("🔍 View Entered Opening Details"): | |
st.markdown(f"**Job Opening Details:**\n\n{job_opening_text}") | |
if cv_resume_text: | |
with st.expander("🔍 View Extracted CV/Resume Details"): | |
st.markdown(f"**CV/Resume Details:**\n\n{cv_resume_text}") | |
# Function to extract professor name, designation, and university | |
def extract_professor_details(text): | |
professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)" | |
university_pattern = r"(University|Institute|College|School of [A-Za-z]+)" | |
professor_match = re.search(professor_pattern, text) | |
university_match = re.search(university_pattern, text) | |
professor_name = professor_match.group(0) if professor_match else "Not Found" | |
university_name = university_match.group(0) if university_match else "Not Found" | |
return professor_name, university_name | |
# Extract professor details if job opening is uploaded | |
professor_name, university_name = extract_professor_details(job_opening_text) | |
# LLM Prompt Templates | |
email_template = PromptTemplate.from_template(""" | |
Write a professional cold email for a research position. | |
- Address the professor formally. | |
- Introduce yourself and academic background. | |
- Express interest in their research. | |
- Highlight key skills from your CV. | |
- Conclude with a polite request. | |
### Input: | |
- Professor: {professor_name} | |
- University: {university_name} | |
- Research Interests: {research_interests} | |
- Why This Lab: {reason} | |
- CV Highlights: {resume_text} | |
### Output: | |
A well-structured, professional cold email. | |
""") | |
cover_letter_template = PromptTemplate.from_template(""" | |
Write a compelling job application cover letter. | |
- Address the employer formally. | |
- Mention job title and where you found it. | |
- Highlight key skills and experiences. | |
- Relate background to the company. | |
- Conclude with enthusiasm. | |
### Input: | |
- Job Title: {job_title} | |
- Company: {company} | |
- Key Skills: {key_skills} | |
- CV Highlights: {resume_text} | |
### Output: | |
A strong, well-formatted cover letter. | |
""") | |
research_statement_template = PromptTemplate.from_template(""" | |
Write a research statement for Ph.D. applications. | |
- Discuss research background and motivation. | |
- Explain key research experiences and findings. | |
- Outline future research interests and goals. | |
- Highlight contributions to the field. | |
### Input: | |
- Research Background: {research_background} | |
- Key Research Projects: {key_projects} | |
- Future Goals: {future_goals} | |
### Output: | |
A well-structured, professional research statement. | |
""") | |
sop_template = PromptTemplate.from_template(""" | |
Write a compelling Statement of Purpose (SOP). | |
- Introduce motivation for graduate studies. | |
- Discuss academic background. | |
- Explain relevant experiences and research. | |
- Outline career goals. | |
- Justify fit for the program. | |
### Input: | |
- Motivation: {motivation} | |
- Academic Background: {academic_background} | |
- Research & Projects: {research_experiences} | |
- Career Goals: {career_goals} | |
- Why This Program: {why_this_program} | |
### Output: | |
A well-structured SOP. | |
""") | |
# LangChain Chains | |
email_chain = LLMChain(llm=llm, prompt=email_template) | |
cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template) | |
research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template) | |
sop_chain = LLMChain(llm=llm, prompt=sop_template) | |
# User Inputs | |
st.subheader("📩 Generate Application Documents") | |
tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"]) | |
# Cold Email Generation | |
with tab1: | |
st.write(f"🧑🏫 **Detected Professor:** {professor_name} at {university_name}") | |
research_interests = st.text_area("Research Interests", placeholder="Example: Machine Learning, Data Analysis, etc.") | |
reason = st.text_area("Why this professor/lab?", placeholder="Example: I am particularly interested in your work on...") | |
if st.button("Generate Cold Email"): | |
if not job_opening_text or not cv_resume_text: | |
st.error("Please provide job opening details and upload your CV/Resume.") | |
else: | |
with st.spinner("Generating Cold Email..."): | |
try: | |
email = email_chain.run({ | |
"professor_name": professor_name, | |
"university_name": university_name, | |
"research_interests": research_interests, | |
"reason": reason, | |
"resume_text": cv_resume_text | |
}) | |
st.markdown("**Generated Cold Email:**") | |
st.markdown(email) | |
st.download_button("Download Email", email, file_name="cold_email.txt") | |
except Exception as e: | |
st.error(f"Error generating cold email: {e}") | |
# Cover Letter Generation | |
with tab2: | |
job_title = st.text_input("Job Title", placeholder="Example: Research Assistant") | |
company_name = university_name if university_name != "Not Found" else st.text_input("Company/University", placeholder="Example: XYZ University") | |
key_skills = st.text_area("Key Skills", placeholder="Example: Python, Machine Learning, Data Analysis") | |
if st.button("Generate Cover Letter"): | |
if not job_opening_text or not cv_resume_text: | |
st.error("Please provide job opening details and upload your CV/Resume.") | |
else: | |
with st.spinner("Generating Cover Letter..."): | |
try: | |
cover_letter = cover_letter_chain.run({ | |
"job_title": job_title, | |
"company": company_name, | |
"key_skills": key_skills, | |
"resume_text": cv_resume_text | |
}) | |
st.markdown("**Generated Cover Letter:**") | |
st.markdown(cover_letter) | |
st.download_button("Download Cover Letter", cover_letter, file_name="cover_letter.txt") | |
except Exception as e: | |
st.error(f"Error generating cover letter: {e}") | |
# Research Statement Generation | |
with tab3: | |
research_background = st.text_area("Research Background", placeholder="Example: My research focuses on...") | |
key_projects = st.text_area("Key Research Projects", placeholder="Example: Developed a machine learning model for...") | |
future_goals = st.text_area("Future Research Goals", placeholder="Example: I aim to explore...") | |
if st.button("Generate Research Statement"): | |
with st.spinner("Generating Research Statement..."): | |
try: | |
research_statement = research_statement_chain.run({ | |
"research_background": research_background, | |
"key_projects": key_projects, | |
"future_goals": future_goals | |
}) | |
st.markdown("**Generated Research Statement:**") | |
st.markdown(research_statement) | |
st.download_button("Download Research Statement", research_statement, file_name="research_statement.txt") | |
except Exception as e: | |
st.error(f"Error generating research statement: {e}") | |
# SOP Generation | |
with tab4: | |
motivation = st.text_area("Motivation for Graduate Studies", placeholder="Example: I have always been passionate about...") | |
academic_background = st.text_area("Academic Background", placeholder="Example: I completed my undergraduate degree in...") | |
research_experiences = st.text_area("Research & Projects", placeholder="Example: During my undergraduate studies, I worked on...") | |
career_goals = st.text_area("Career Goals", placeholder="Example: My long-term goal is to...") | |
why_this_program = st.text_area("Why This Program", placeholder="Example: This program aligns with my research interests because...") | |
if st.button("Generate SOP"): | |
with st.spinner("Generating SOP..."): | |
try: | |
sop = sop_chain.run({ | |
"motivation": motivation, | |
"academic_background": academic_background, | |
"research_experiences": research_experiences, | |
"career_goals": career_goals, | |
"why_this_program": why_this_program | |
}) | |
st.markdown("**Generated SOP:**") | |
st.markdown(sop) | |
st.download_button("Download SOP", sop, file_name="sop.txt") | |
except Exception as e: | |
st.error(f"Error generating SOP: {e}") | |
# Reset Button | |
if st.button("🔄 Reset All Inputs and Outputs"): | |
st.experimental_rerun() |