Spaces:
Sleeping
Sleeping
import streamlit as st | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
from langchain.llms import HuggingFaceHub | |
import fitz # PyMuPDF for PDF text extraction | |
import pytesseract | |
from PIL import Image | |
import os | |
# Set Hugging Face API Key (Set this in Hugging Face Secrets) | |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"] | |
# Load Free LLM from Hugging Face | |
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct", model_kwargs={"temperature": 0.5}) | |
# Define Streamlit App | |
st.set_page_config(page_title="DocuMentorAI", layout="wide") | |
st.title("📄 DocuMentorAI") | |
st.write("Upload your CV/Resume and generate professional application documents.") | |
# File Upload (PDF/Image) | |
uploaded_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"]) | |
def extract_text_from_pdf(pdf_file): | |
"""Extract text from a PDF file.""" | |
text = "" | |
with fitz.open(pdf_file) as doc: | |
for page in doc: | |
text += page.get_text() | |
return text | |
def extract_text_from_image(image_file): | |
"""Extract text from an image using OCR.""" | |
image = Image.open(image_file) | |
return pytesseract.image_to_string(image) | |
if uploaded_file: | |
file_type = uploaded_file.type | |
extracted_text = "" | |
if file_type == "application/pdf": | |
extracted_text = extract_text_from_pdf(uploaded_file) | |
else: | |
extracted_text = extract_text_from_image(uploaded_file) | |
st.subheader("Extracted Text from CV/Resume") | |
st.text_area("Preview:", extracted_text, height=150) | |
# Define LLM Prompt Templates | |
email_template = PromptTemplate.from_template(""" | |
You are an AI assistant helping users craft a professional cold email for a research position. | |
### Input: | |
- Recipient: {recipient_name} | |
- Position: {position_name} | |
- Research Interests: {research_interests} | |
- Why this professor/lab: {reason} | |
- Resume Details: {resume_text} | |
### Output: | |
A well-structured, concise cold email with a polite and engaging tone. | |
""") | |
cover_letter_template = PromptTemplate.from_template(""" | |
You are an AI assistant generating a professional cover letter. | |
### Input: | |
- Job Title: {job_title} | |
- Company/University: {company} | |
- Key Skills: {key_skills} | |
- Resume Details: {resume_text} | |
### Output: | |
A polished and formal cover letter. | |
""") | |
research_statement_template = PromptTemplate.from_template(""" | |
You are an AI assistant generating a research statement for a Ph.D. application. | |
### Input: | |
- Research Interests: {research_interests} | |
- Academic Background: {resume_text} | |
- Future Research Goals: {goals} | |
### Output: | |
A compelling research statement with a strong academic tone. | |
""") | |
sop_template = PromptTemplate.from_template(""" | |
You are an AI assistant writing a Statement of Purpose (SOP) for a master's or Ph.D. program. | |
### Input: | |
- Program Name: {program_name} | |
- University: {university} | |
- Research Interests: {research_interests} | |
- Career Goals: {career_goals} | |
- Resume Details: {resume_text} | |
### Output: | |
A structured and professional SOP. | |
""") | |
# Create LangChain Chains | |
email_chain = LLMChain(llm=llm, prompt=email_template) | |
cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template) | |
research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template) | |
sop_chain = LLMChain(llm=llm, prompt=sop_template) | |
# User Inputs for Document Generation | |
st.subheader("📩 Generate Application Documents") | |
tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"]) | |
with tab1: | |
recipient = st.text_input("Recipient Name") | |
position = st.text_input("Position Name") | |
research_interests = st.text_area("Research Interests") | |
reason = st.text_area("Why this professor/lab?") | |
if st.button("Generate Cold Email"): | |
email = email_chain.run({"recipient_name": recipient, "position_name": position, "research_interests": research_interests, "reason": reason, "resume_text": extracted_text}) | |
st.text_area("Generated Cold Email", email, height=250) | |
with tab2: | |
job_title = st.text_input("Job Title") | |
company = st.text_input("Company/University") | |
key_skills = st.text_area("Key Skills") | |
if st.button("Generate Cover Letter"): | |
cover_letter = cover_letter_chain.run({"job_title": job_title, "company": company, "key_skills": key_skills, "resume_text": extracted_text}) | |
st.text_area("Generated Cover Letter", cover_letter, height=250) | |
with tab3: | |
research_goals = st.text_area("Future Research Goals") | |
if st.button("Generate Research Statement"): | |
research_statement = research_statement_chain.run({"research_interests": research_interests, "goals": research_goals, "resume_text": extracted_text}) | |
st.text_area("Generated Research Statement", research_statement, height=250) | |
with tab4: | |
program_name = st.text_input("Program Name") | |
university = st.text_input("University") | |
career_goals = st.text_area("Career Goals") | |
if st.button("Generate SOP"): | |
sop = sop_chain.run({"program_name": program_name, "university": university, "research_interests": research_interests, "career_goals": career_goals, "resume_text": extracted_text}) | |
st.text_area("Generated SOP", sop, height=250) | |