import streamlit as st from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from langchain.llms import HuggingFaceHub import fitz # PyMuPDF for PDF text extraction import pytesseract from PIL import Image import os # Set Hugging Face API Key (Set this in Hugging Face Secrets) os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"] # Load Free LLM from Hugging Face llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct", model_kwargs={"temperature": 0.5}) # Define Streamlit App st.set_page_config(page_title="DocuMentorAI", layout="wide") st.title("📄 DocuMentorAI") st.write("Upload your CV/Resume and generate professional application documents.") # File Upload (PDF/Image) uploaded_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"]) def extract_text_from_pdf(pdf_file): """Extract text from a PDF file.""" text = "" with fitz.open(pdf_file) as doc: for page in doc: text += page.get_text() return text def extract_text_from_image(image_file): """Extract text from an image using OCR.""" image = Image.open(image_file) return pytesseract.image_to_string(image) if uploaded_file: file_type = uploaded_file.type extracted_text = "" if file_type == "application/pdf": extracted_text = extract_text_from_pdf(uploaded_file) else: extracted_text = extract_text_from_image(uploaded_file) st.subheader("Extracted Text from CV/Resume") st.text_area("Preview:", extracted_text, height=150) # Define LLM Prompt Templates email_template = PromptTemplate.from_template(""" You are an AI assistant helping users craft a professional cold email for a research position. ### Input: - Recipient: {recipient_name} - Position: {position_name} - Research Interests: {research_interests} - Why this professor/lab: {reason} - Resume Details: {resume_text} ### Output: A well-structured, concise cold email with a polite and engaging tone. """) cover_letter_template = PromptTemplate.from_template(""" You are an AI assistant generating a professional cover letter. ### Input: - Job Title: {job_title} - Company/University: {company} - Key Skills: {key_skills} - Resume Details: {resume_text} ### Output: A polished and formal cover letter. """) research_statement_template = PromptTemplate.from_template(""" You are an AI assistant generating a research statement for a Ph.D. application. ### Input: - Research Interests: {research_interests} - Academic Background: {resume_text} - Future Research Goals: {goals} ### Output: A compelling research statement with a strong academic tone. """) sop_template = PromptTemplate.from_template(""" You are an AI assistant writing a Statement of Purpose (SOP) for a master's or Ph.D. program. ### Input: - Program Name: {program_name} - University: {university} - Research Interests: {research_interests} - Career Goals: {career_goals} - Resume Details: {resume_text} ### Output: A structured and professional SOP. """) # Create LangChain Chains email_chain = LLMChain(llm=llm, prompt=email_template) cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template) research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template) sop_chain = LLMChain(llm=llm, prompt=sop_template) # User Inputs for Document Generation st.subheader("📩 Generate Application Documents") tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"]) with tab1: recipient = st.text_input("Recipient Name") position = st.text_input("Position Name") research_interests = st.text_area("Research Interests") reason = st.text_area("Why this professor/lab?") if st.button("Generate Cold Email"): email = email_chain.run({"recipient_name": recipient, "position_name": position, "research_interests": research_interests, "reason": reason, "resume_text": extracted_text}) st.text_area("Generated Cold Email", email, height=250) with tab2: job_title = st.text_input("Job Title") company = st.text_input("Company/University") key_skills = st.text_area("Key Skills") if st.button("Generate Cover Letter"): cover_letter = cover_letter_chain.run({"job_title": job_title, "company": company, "key_skills": key_skills, "resume_text": extracted_text}) st.text_area("Generated Cover Letter", cover_letter, height=250) with tab3: research_goals = st.text_area("Future Research Goals") if st.button("Generate Research Statement"): research_statement = research_statement_chain.run({"research_interests": research_interests, "goals": research_goals, "resume_text": extracted_text}) st.text_area("Generated Research Statement", research_statement, height=250) with tab4: program_name = st.text_input("Program Name") university = st.text_input("University") career_goals = st.text_area("Career Goals") if st.button("Generate SOP"): sop = sop_chain.run({"program_name": program_name, "university": university, "research_interests": research_interests, "career_goals": career_goals, "resume_text": extracted_text}) st.text_area("Generated SOP", sop, height=250)