DocuMentorAI / app.py
Sobit's picture
Update app.py
1dfec0d verified
raw
history blame
8 kB
import streamlit as st
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFaceHub
import fitz # PyMuPDF for PDF text extraction
import pytesseract
from PIL import Image
import os
# Set Hugging Face API Key (Set this in Hugging Face Secrets)
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
# Load Free LLM from Hugging Face
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})
# Define Streamlit App
st.set_page_config(page_title="DocuMentorAI", layout="wide")
st.title("📄 DocuMentorAI")
st.write("Upload your CV/Resume and generate professional application documents.")
# File Upload (PDF/Image)
uploaded_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
def extract_text_from_pdf(pdf_file):
# Read the file's bytes
pdf_bytes = pdf_file.read()
# Open the document from the bytes stream; specify the file type as PDF
with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
text = ""
for page in doc:
text += page.get_text()
return text
def extract_text_from_image(image_file):
"""Extract text from an image using OCR."""
image = Image.open(image_file)
return pytesseract.image_to_string(image)
if uploaded_file:
file_type = uploaded_file.type
extracted_text = ""
if file_type == "application/pdf":
extracted_text = extract_text_from_pdf(uploaded_file)
else:
extracted_text = extract_text_from_image(uploaded_file)
st.subheader("Extracted Text from CV/Resume")
st.text_area("Preview:", extracted_text, height=150)
def get_final_output(full_text):
# If the model returns text with the prompt details followed by "### Output:", only keep what comes after it.
if "### Output:" in full_text:
return full_text.split("### Output:")[-1].strip()
else:
return full_text.strip()
# Define LLM Prompt Templates
email_template = PromptTemplate.from_template("""
You are an AI assistant skilled in crafting personalized and engaging cold emails for research positions.
### Instructions:
- Address the recipient warmly and professionally.
- Introduce yourself succinctly, highlighting relevant background.
- Clearly express your interest in the specific position and align it with your research interests.
- Articulate why you are particularly drawn to this professor's work or lab.
- Mention pertinent details from your resume that strengthen your candidacy.
- Conclude with a polite call to action and gratitude.
### Input:
- Recipient Name: {recipient_name}
- Position Title: {position_name}
- Your Research Interests: {research_interests}
- Reason for Choosing This Professor/Lab: {reason}
- Key Resume Highlights: {resume_text}
### Output:
Compose a well-structured, concise cold email with a polite and engaging tone that reflects genuine interest and professionalism.
""")
cover_letter_template = PromptTemplate.from_template("""
You are an AI assistant proficient in generating personalized and compelling cover letters for job applications.
### Instructions:
- Begin with a formal salutation.
- State the position you are applying for and how you discovered it.
- Highlight your key skills and experiences that make you a strong fit for the role.
- Connect your professional background to the company's mission and values.
- Include specific achievements from your resume that demonstrate your qualifications.
- End with a courteous closing and express enthusiasm for the opportunity.
### Input:
- Job Title: {job_title}
- Company Name: {company}
- Relevant Skills and Experiences: {key_skills}
- Resume Highlights: {resume_text}
### Output:
Draft a polished and formal cover letter that showcases your suitability for the position and aligns with the company's ethos.
""")
research_statement_template = PromptTemplate.from_template("""
You are an AI assistant adept at composing insightful and persuasive research statements for Ph.D. applications.
### Instructions:
- Open with a summary of your research interests and their significance.
- Detail your academic background and any research projects that have prepared you for this field.
- Discuss your future research goals and how they align with the program's strengths.
- Emphasize your passion and commitment to advancing knowledge in this area.
### Input:
- Research Interests: {research_interests}
- Academic Background and Experience: {resume_text}
- Future Research Objectives: {goals}
### Output:
Generate a compelling research statement with a strong academic tone that reflects your expertise and aspirations.
""")
sop_template = PromptTemplate.from_template("""
You are an AI assistant experienced in crafting detailed and engaging Statements of Purpose for graduate program applications.
### Instructions:
- Introduce yourself and your academic interests.
- Explain why you are interested in the chosen program and university.
- Describe your relevant experiences and how they have prepared you for this program.
- Outline your career goals and how this program will help you achieve them.
- Highlight aspects of your resume that support your application.
### Input:
- Program Name: {program_name}
- University Name: {university}
- Your Research Interests: {research_interests}
- Career Objectives: {career_goals}
- Resume Details: {resume_text}
### Output:
Compose a structured and professional Statement of Purpose that conveys your qualifications, motivations, and fit for the program.
""")
# Create LangChain Chains
email_chain = LLMChain(llm=llm, prompt=email_template)
cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
sop_chain = LLMChain(llm=llm, prompt=sop_template)
# User Inputs for Document Generation
st.subheader("📩 Generate Application Documents")
tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
with tab1:
recipient = st.text_input("Recipient Name")
position = st.text_input("Position Name")
research_interests = st.text_area("Research Interests")
reason = st.text_area("Why this professor/lab?")
if st.button("Generate Cold Email"):
email = email_chain.run({"recipient_name": recipient, "position_name": position, "research_interests": research_interests, "reason": reason, "resume_text": extracted_text})
final_email = get_final_output(email)
st.text_area("Generated Cold Email", email, height=250)
with tab2:
job_title = st.text_input("Job Title")
company = st.text_input("Company/University")
key_skills = st.text_area("Key Skills")
if st.button("Generate Cover Letter"):
cover_letter = cover_letter_chain.run({"job_title": job_title, "company": company, "key_skills": key_skills, "resume_text": extracted_text})
final_cover_letter = get_final_output(cover_letter)
st.text_area("Generated Cover Letter", final_cover_letter, height=250)
with tab3:
research_goals = st.text_area("Future Research Goals")
if st.button("Generate Research Statement"):
research_statement = research_statement_chain.run({"research_interests": research_interests, "goals": research_goals, "resume_text": extracted_text})
final_rs = get_final_output(research_statement)
st.text_area("Generated Research Statement", final_rs, height=250)
with tab4:
program_name = st.text_input("Program Name")
university = st.text_input("University")
career_goals = st.text_area("Career Goals")
if st.button("Generate SOP"):
sop = sop_chain.run({"program_name": program_name, "university": university, "research_interests": research_interests, "career_goals": career_goals, "resume_text": extracted_text})
final_sop = get_final_output(sop)
st.text_area("Generated SOP", final_sop, height=250)