import streamlit as st from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from langchain.llms import HuggingFaceHub import fitz # PyMuPDF for PDF text extraction import pytesseract from PIL import Image import os # Set Hugging Face API Key (Set this in Hugging Face Secrets) os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"] # Load Free LLM from Hugging Face llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5}) # Define Streamlit App st.set_page_config(page_title="DocuMentorAI", layout="wide") st.title("📄 DocuMentorAI") st.write("Upload your CV/Resume and generate professional application documents.") # File Upload (PDF/Image) uploaded_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"]) def extract_text_from_pdf(pdf_file): # Read the file's bytes pdf_bytes = pdf_file.read() # Open the document from the bytes stream; specify the file type as PDF with fitz.open(stream=pdf_bytes, filetype="pdf") as doc: text = "" for page in doc: text += page.get_text() return text def extract_text_from_image(image_file): """Extract text from an image using OCR.""" image = Image.open(image_file) return pytesseract.image_to_string(image) if uploaded_file: file_type = uploaded_file.type extracted_text = "" if file_type == "application/pdf": extracted_text = extract_text_from_pdf(uploaded_file) else: extracted_text = extract_text_from_image(uploaded_file) st.subheader("Extracted Text from CV/Resume") st.text_area("Preview:", extracted_text, height=150) def get_final_output(full_text): # If the model returns text with the prompt details followed by "### Output:", only keep what comes after it. if "### Output:" in full_text: return full_text.split("### Output:")[-1].strip() else: return full_text.strip() # Define LLM Prompt Templates email_template = PromptTemplate.from_template(""" You are an AI assistant skilled in crafting personalized and engaging cold emails for research positions. ### Instructions: - Address the recipient warmly and professionally. - Introduce yourself succinctly, highlighting relevant background. - Clearly express your interest in the specific position and align it with your research interests. - Articulate why you are particularly drawn to this professor's work or lab. - Mention pertinent details from your resume that strengthen your candidacy. - Conclude with a polite call to action and gratitude. ### Input: - Recipient Name: {recipient_name} - Position Title: {position_name} - Your Research Interests: {research_interests} - Reason for Choosing This Professor/Lab: {reason} - Key Resume Highlights: {resume_text} ### Output: Compose a well-structured, concise cold email with a polite and engaging tone that reflects genuine interest and professionalism. """) cover_letter_template = PromptTemplate.from_template(""" You are an AI assistant proficient in generating personalized and compelling cover letters for job applications. ### Instructions: - Begin with a formal salutation. - State the position you are applying for and how you discovered it. - Highlight your key skills and experiences that make you a strong fit for the role. - Connect your professional background to the company's mission and values. - Include specific achievements from your resume that demonstrate your qualifications. - End with a courteous closing and express enthusiasm for the opportunity. ### Input: - Job Title: {job_title} - Company Name: {company} - Relevant Skills and Experiences: {key_skills} - Resume Highlights: {resume_text} ### Output: Draft a polished and formal cover letter that showcases your suitability for the position and aligns with the company's ethos. """) research_statement_template = PromptTemplate.from_template(""" You are an AI assistant adept at composing insightful and persuasive research statements for Ph.D. applications. ### Instructions: - Open with a summary of your research interests and their significance. - Detail your academic background and any research projects that have prepared you for this field. - Discuss your future research goals and how they align with the program's strengths. - Emphasize your passion and commitment to advancing knowledge in this area. ### Input: - Research Interests: {research_interests} - Academic Background and Experience: {resume_text} - Future Research Objectives: {goals} ### Output: Generate a compelling research statement with a strong academic tone that reflects your expertise and aspirations. """) sop_template = PromptTemplate.from_template(""" You are an AI assistant experienced in crafting detailed and engaging Statements of Purpose for graduate program applications. ### Instructions: - Introduce yourself and your academic interests. - Explain why you are interested in the chosen program and university. - Describe your relevant experiences and how they have prepared you for this program. - Outline your career goals and how this program will help you achieve them. - Highlight aspects of your resume that support your application. ### Input: - Program Name: {program_name} - University Name: {university} - Your Research Interests: {research_interests} - Career Objectives: {career_goals} - Resume Details: {resume_text} ### Output: Compose a structured and professional Statement of Purpose that conveys your qualifications, motivations, and fit for the program. """) # Create LangChain Chains email_chain = LLMChain(llm=llm, prompt=email_template) cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template) research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template) sop_chain = LLMChain(llm=llm, prompt=sop_template) # User Inputs for Document Generation st.subheader("📩 Generate Application Documents") tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"]) with tab1: recipient = st.text_input("Recipient Name") position = st.text_input("Position Name") research_interests = st.text_area("Research Interests") reason = st.text_area("Why this professor/lab?") if st.button("Generate Cold Email"): email = email_chain.run({"recipient_name": recipient, "position_name": position, "research_interests": research_interests, "reason": reason, "resume_text": extracted_text}) final_email = get_final_output(email) st.text_area("Generated Cold Email", email, height=250) with tab2: job_title = st.text_input("Job Title") company = st.text_input("Company/University") key_skills = st.text_area("Key Skills") if st.button("Generate Cover Letter"): cover_letter = cover_letter_chain.run({"job_title": job_title, "company": company, "key_skills": key_skills, "resume_text": extracted_text}) final_cover_letter = get_final_output(cover_letter) st.text_area("Generated Cover Letter", final_cover_letter, height=250) with tab3: research_goals = st.text_area("Future Research Goals") if st.button("Generate Research Statement"): research_statement = research_statement_chain.run({"research_interests": research_interests, "goals": research_goals, "resume_text": extracted_text}) final_rs = get_final_output(research_statement) st.text_area("Generated Research Statement", final_rs, height=250) with tab4: program_name = st.text_input("Program Name") university = st.text_input("University") career_goals = st.text_area("Career Goals") if st.button("Generate SOP"): sop = sop_chain.run({"program_name": program_name, "university": university, "research_interests": research_interests, "career_goals": career_goals, "resume_text": extracted_text}) final_sop = get_final_output(sop) st.text_area("Generated SOP", final_sop, height=250)