DocuMentorAI / app.py
Sobit's picture
Create app.py
8349bb4 verified
raw
history blame
5.24 kB
import streamlit as st
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFaceHub
import fitz # PyMuPDF for PDF text extraction
import pytesseract
from PIL import Image
import os
# Set Hugging Face API Key (Set this in Hugging Face Secrets)
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
# Load Free LLM from Hugging Face
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct", model_kwargs={"temperature": 0.5})
# Define Streamlit App
st.set_page_config(page_title="DocuMentorAI", layout="wide")
st.title("📄 DocuMentorAI")
st.write("Upload your CV/Resume and generate professional application documents.")
# File Upload (PDF/Image)
uploaded_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
def extract_text_from_pdf(pdf_file):
"""Extract text from a PDF file."""
text = ""
with fitz.open(pdf_file) as doc:
for page in doc:
text += page.get_text()
return text
def extract_text_from_image(image_file):
"""Extract text from an image using OCR."""
image = Image.open(image_file)
return pytesseract.image_to_string(image)
if uploaded_file:
file_type = uploaded_file.type
extracted_text = ""
if file_type == "application/pdf":
extracted_text = extract_text_from_pdf(uploaded_file)
else:
extracted_text = extract_text_from_image(uploaded_file)
st.subheader("Extracted Text from CV/Resume")
st.text_area("Preview:", extracted_text, height=150)
# Define LLM Prompt Templates
email_template = PromptTemplate.from_template("""
You are an AI assistant helping users craft a professional cold email for a research position.
### Input:
- Recipient: {recipient_name}
- Position: {position_name}
- Research Interests: {research_interests}
- Why this professor/lab: {reason}
- Resume Details: {resume_text}
### Output:
A well-structured, concise cold email with a polite and engaging tone.
""")
cover_letter_template = PromptTemplate.from_template("""
You are an AI assistant generating a professional cover letter.
### Input:
- Job Title: {job_title}
- Company/University: {company}
- Key Skills: {key_skills}
- Resume Details: {resume_text}
### Output:
A polished and formal cover letter.
""")
research_statement_template = PromptTemplate.from_template("""
You are an AI assistant generating a research statement for a Ph.D. application.
### Input:
- Research Interests: {research_interests}
- Academic Background: {resume_text}
- Future Research Goals: {goals}
### Output:
A compelling research statement with a strong academic tone.
""")
sop_template = PromptTemplate.from_template("""
You are an AI assistant writing a Statement of Purpose (SOP) for a master's or Ph.D. program.
### Input:
- Program Name: {program_name}
- University: {university}
- Research Interests: {research_interests}
- Career Goals: {career_goals}
- Resume Details: {resume_text}
### Output:
A structured and professional SOP.
""")
# Create LangChain Chains
email_chain = LLMChain(llm=llm, prompt=email_template)
cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
sop_chain = LLMChain(llm=llm, prompt=sop_template)
# User Inputs for Document Generation
st.subheader("📩 Generate Application Documents")
tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
with tab1:
recipient = st.text_input("Recipient Name")
position = st.text_input("Position Name")
research_interests = st.text_area("Research Interests")
reason = st.text_area("Why this professor/lab?")
if st.button("Generate Cold Email"):
email = email_chain.run({"recipient_name": recipient, "position_name": position, "research_interests": research_interests, "reason": reason, "resume_text": extracted_text})
st.text_area("Generated Cold Email", email, height=250)
with tab2:
job_title = st.text_input("Job Title")
company = st.text_input("Company/University")
key_skills = st.text_area("Key Skills")
if st.button("Generate Cover Letter"):
cover_letter = cover_letter_chain.run({"job_title": job_title, "company": company, "key_skills": key_skills, "resume_text": extracted_text})
st.text_area("Generated Cover Letter", cover_letter, height=250)
with tab3:
research_goals = st.text_area("Future Research Goals")
if st.button("Generate Research Statement"):
research_statement = research_statement_chain.run({"research_interests": research_interests, "goals": research_goals, "resume_text": extracted_text})
st.text_area("Generated Research Statement", research_statement, height=250)
with tab4:
program_name = st.text_input("Program Name")
university = st.text_input("University")
career_goals = st.text_area("Career Goals")
if st.button("Generate SOP"):
sop = sop_chain.run({"program_name": program_name, "university": university, "research_interests": research_interests, "career_goals": career_goals, "resume_text": extracted_text})
st.text_area("Generated SOP", sop, height=250)