Spaces:
Sleeping
Sleeping
import streamlit as st | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
from langchain.llms import HuggingFaceHub | |
import fitz # PyMuPDF for PDF text extraction | |
import pytesseract | |
from PIL import Image | |
import os | |
# Set Hugging Face API Key (Set this in Hugging Face Secrets) | |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"] | |
# Load Free LLM from Hugging Face | |
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5}) | |
# Define Streamlit App | |
st.set_page_config(page_title="DocuMentorAI", layout="wide") | |
st.title("📄 DocuMentorAI") | |
st.write("Upload your CV/Resume and generate professional application documents.") | |
# File Upload (PDF/Image) | |
uploaded_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"]) | |
def extract_text_from_pdf(pdf_file): | |
# Read the file's bytes | |
pdf_bytes = pdf_file.read() | |
# Open the document from the bytes stream; specify the file type as PDF | |
with fitz.open(stream=pdf_bytes, filetype="pdf") as doc: | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
return text | |
def extract_text_from_image(image_file): | |
"""Extract text from an image using OCR.""" | |
image = Image.open(image_file) | |
return pytesseract.image_to_string(image) | |
if uploaded_file: | |
file_type = uploaded_file.type | |
extracted_text = "" | |
if file_type == "application/pdf": | |
extracted_text = extract_text_from_pdf(uploaded_file) | |
else: | |
extracted_text = extract_text_from_image(uploaded_file) | |
st.subheader("Extracted Text from CV/Resume") | |
st.text_area("Preview:", extracted_text, height=150) | |
def get_final_output(full_text): | |
# If the model returns text with the prompt details followed by "### Output:", only keep what comes after it. | |
if "### Output:" in full_text: | |
return full_text.split("### Output:")[-1].strip() | |
else: | |
return full_text.strip() | |
# Define LLM Prompt Templates | |
email_template = PromptTemplate.from_template(""" | |
You are an AI assistant skilled in crafting personalized and engaging cold emails for research positions. | |
### Instructions: | |
- Address the recipient warmly and professionally. | |
- Introduce yourself succinctly, highlighting relevant background. | |
- Clearly express your interest in the specific position and align it with your research interests. | |
- Articulate why you are particularly drawn to this professor's work or lab. | |
- Mention pertinent details from your resume that strengthen your candidacy. | |
- Conclude with a polite call to action and gratitude. | |
### Input: | |
- Recipient Name: {recipient_name} | |
- Position Title: {position_name} | |
- Your Research Interests: {research_interests} | |
- Reason for Choosing This Professor/Lab: {reason} | |
- Key Resume Highlights: {resume_text} | |
### Output: | |
Compose a well-structured, concise cold email with a polite and engaging tone that reflects genuine interest and professionalism. | |
""") | |
cover_letter_template = PromptTemplate.from_template(""" | |
You are an AI assistant proficient in generating personalized and compelling cover letters for job applications. | |
### Instructions: | |
- Begin with a formal salutation. | |
- State the position you are applying for and how you discovered it. | |
- Highlight your key skills and experiences that make you a strong fit for the role. | |
- Connect your professional background to the company's mission and values. | |
- Include specific achievements from your resume that demonstrate your qualifications. | |
- End with a courteous closing and express enthusiasm for the opportunity. | |
### Input: | |
- Job Title: {job_title} | |
- Company Name: {company} | |
- Relevant Skills and Experiences: {key_skills} | |
- Resume Highlights: {resume_text} | |
### Output: | |
Draft a polished and formal cover letter that showcases your suitability for the position and aligns with the company's ethos. | |
""") | |
research_statement_template = PromptTemplate.from_template(""" | |
You are an AI assistant adept at composing insightful and persuasive research statements for Ph.D. applications. | |
### Instructions: | |
- Open with a summary of your research interests and their significance. | |
- Detail your academic background and any research projects that have prepared you for this field. | |
- Discuss your future research goals and how they align with the program's strengths. | |
- Emphasize your passion and commitment to advancing knowledge in this area. | |
### Input: | |
- Research Interests: {research_interests} | |
- Academic Background and Experience: {resume_text} | |
- Future Research Objectives: {goals} | |
### Output: | |
Generate a compelling research statement with a strong academic tone that reflects your expertise and aspirations. | |
""") | |
sop_template = PromptTemplate.from_template(""" | |
You are an AI assistant experienced in crafting detailed and engaging Statements of Purpose for graduate program applications. | |
### Instructions: | |
- Introduce yourself and your academic interests. | |
- Explain why you are interested in the chosen program and university. | |
- Describe your relevant experiences and how they have prepared you for this program. | |
- Outline your career goals and how this program will help you achieve them. | |
- Highlight aspects of your resume that support your application. | |
### Input: | |
- Program Name: {program_name} | |
- University Name: {university} | |
- Your Research Interests: {research_interests} | |
- Career Objectives: {career_goals} | |
- Resume Details: {resume_text} | |
### Output: | |
Compose a structured and professional Statement of Purpose that conveys your qualifications, motivations, and fit for the program. | |
""") | |
# Create LangChain Chains | |
email_chain = LLMChain(llm=llm, prompt=email_template) | |
cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template) | |
research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template) | |
sop_chain = LLMChain(llm=llm, prompt=sop_template) | |
# User Inputs for Document Generation | |
st.subheader("📩 Generate Application Documents") | |
tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"]) | |
with tab1: | |
recipient = st.text_input("Recipient Name") | |
position = st.text_input("Position Name") | |
research_interests = st.text_area("Research Interests") | |
reason = st.text_area("Why this professor/lab?") | |
if st.button("Generate Cold Email"): | |
email = email_chain.run({"recipient_name": recipient, "position_name": position, "research_interests": research_interests, "reason": reason, "resume_text": extracted_text}) | |
final_email = get_final_output(email) | |
st.text_area("Generated Cold Email", email, height=250) | |
with tab2: | |
job_title = st.text_input("Job Title") | |
company = st.text_input("Company/University") | |
key_skills = st.text_area("Key Skills") | |
if st.button("Generate Cover Letter"): | |
cover_letter = cover_letter_chain.run({"job_title": job_title, "company": company, "key_skills": key_skills, "resume_text": extracted_text}) | |
final_cover_letter = get_final_output(cover_letter) | |
st.text_area("Generated Cover Letter", final_cover_letter, height=250) | |
with tab3: | |
research_goals = st.text_area("Future Research Goals") | |
if st.button("Generate Research Statement"): | |
research_statement = research_statement_chain.run({"research_interests": research_interests, "goals": research_goals, "resume_text": extracted_text}) | |
final_rs = get_final_output(research_statement) | |
st.text_area("Generated Research Statement", final_rs, height=250) | |
with tab4: | |
program_name = st.text_input("Program Name") | |
university = st.text_input("University") | |
career_goals = st.text_area("Career Goals") | |
if st.button("Generate SOP"): | |
sop = sop_chain.run({"program_name": program_name, "university": university, "research_interests": research_interests, "career_goals": career_goals, "resume_text": extracted_text}) | |
final_sop = get_final_output(sop) | |
st.text_area("Generated SOP", final_sop, height=250) | |