import streamlit as st from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from langchain.llms import HuggingFaceHub import fitz # PyMuPDF for PDF extraction from PIL import Image import os import pytesseract import re # Set Hugging Face API Key (Set this in Hugging Face Secrets) os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"] # Load Free LLM from Hugging Face llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5}) # Streamlit App Configuration st.set_page_config(page_title="DocuMentorAI", layout="wide", page_icon="📄") st.title("📄 DocuMentorAI") st.write("Generate professional application documents with ease!") # Custom CSS for better UI st.markdown(""" """, unsafe_allow_html=True) # Text Input for Job Opening Details st.subheader("📢 Enter Opening Details") job_opening_text = st.text_area( "Paste the job/research opening details here...", height=150, placeholder="Example: 'We are hiring a Research Assistant at XYZ University. The ideal candidate has experience in machine learning and data analysis...'" ) # Upload CV/Resume st.subheader("📄 Upload CV/Resume") cv_resume_file = st.file_uploader( "Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"], help="Upload a PDF or image of your CV/Resume for text extraction." ) # Function to extract text from PDF def extract_text_from_pdf(pdf_file): try: pdf_bytes = pdf_file.read() with fitz.open(stream=pdf_bytes, filetype="pdf") as doc: return " ".join([page.get_text() for page in doc]) except Exception as e: st.error(f"Error extracting text from PDF: {e}") return "" # Function to extract text from Image using OCR def extract_text_from_image(image_file): try: image = Image.open(image_file) return pytesseract.image_to_string(image) except Exception as e: st.error(f"Error extracting text from image: {e}") return "" # Function to extract text from uploaded files def extract_text(uploaded_file): if uploaded_file: file_type = uploaded_file.type if file_type == "application/pdf": return extract_text_from_pdf(uploaded_file) else: return extract_text_from_image(uploaded_file) return "" # Extract text from CV/Resume cv_resume_text = extract_text(cv_resume_file) # Display Extracted Text if job_opening_text: with st.expander("🔍 View Entered Opening Details"): st.markdown(f"**Job Opening Details:**\n\n{job_opening_text}") if cv_resume_text: with st.expander("🔍 View Extracted CV/Resume Details"): st.markdown(f"**CV/Resume Details:**\n\n{cv_resume_text}") # Function to extract professor name, designation, and university def extract_professor_details(text): professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)" university_pattern = r"(University|Institute|College|School of [A-Za-z]+)" professor_match = re.search(professor_pattern, text) university_match = re.search(university_pattern, text) professor_name = professor_match.group(0) if professor_match else "Not Found" university_name = university_match.group(0) if university_match else "Not Found" return professor_name, university_name # Extract professor details if job opening is uploaded professor_name, university_name = extract_professor_details(job_opening_text) # LLM Prompt Templates email_template = PromptTemplate.from_template(""" Write a professional cold email for a research position. - Address the professor formally. - Introduce yourself and academic background. - Express interest in their research. - Highlight key skills from your CV. - Conclude with a polite request. ### Input: - Professor: {professor_name} - University: {university_name} - Research Interests: {research_interests} - Why This Lab: {reason} - CV Highlights: {resume_text} ### Output: A well-structured, professional cold email. """) cover_letter_template = PromptTemplate.from_template(""" Write a compelling job application cover letter. - Address the employer formally. - Mention job title and where you found it. - Highlight key skills and experiences. - Relate background to the company. - Conclude with enthusiasm. ### Input: - Job Title: {job_title} - Company: {company} - Key Skills: {key_skills} - CV Highlights: {resume_text} ### Output: A strong, well-formatted cover letter. """) research_statement_template = PromptTemplate.from_template(""" Write a research statement for Ph.D. applications. - Discuss research background and motivation. - Explain key research experiences and findings. - Outline future research interests and goals. - Highlight contributions to the field. ### Input: - Research Background: {research_background} - Key Research Projects: {key_projects} - Future Goals: {future_goals} ### Output: A well-structured, professional research statement. """) sop_template = PromptTemplate.from_template(""" Write a compelling Statement of Purpose (SOP). - Introduce motivation for graduate studies. - Discuss academic background. - Explain relevant experiences and research. - Outline career goals. - Justify fit for the program. ### Input: - Motivation: {motivation} - Academic Background: {academic_background} - Research & Projects: {research_experiences} - Career Goals: {career_goals} - Why This Program: {why_this_program} ### Output: A well-structured SOP. """) # LangChain Chains email_chain = LLMChain(llm=llm, prompt=email_template) cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template) research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template) sop_chain = LLMChain(llm=llm, prompt=sop_template) # User Inputs st.subheader("📩 Generate Application Documents") tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"]) # Cold Email Generation with tab1: st.write(f"🧑‍🏫 **Detected Professor:** {professor_name} at {university_name}") research_interests = st.text_area("Research Interests", placeholder="Example: Machine Learning, Data Analysis, etc.") reason = st.text_area("Why this professor/lab?", placeholder="Example: I am particularly interested in your work on...") if st.button("Generate Cold Email"): if not job_opening_text or not cv_resume_text: st.error("Please provide job opening details and upload your CV/Resume.") else: with st.spinner("Generating Cold Email..."): try: email = email_chain.run({ "professor_name": professor_name, "university_name": university_name, "research_interests": research_interests, "reason": reason, "resume_text": cv_resume_text }) st.markdown("**Generated Cold Email:**") st.markdown(email) st.download_button("Download Email", email, file_name="cold_email.txt") except Exception as e: st.error(f"Error generating cold email: {e}") # Cover Letter Generation with tab2: job_title = st.text_input("Job Title", placeholder="Example: Research Assistant") company_name = university_name if university_name != "Not Found" else st.text_input("Company/University", placeholder="Example: XYZ University") key_skills = st.text_area("Key Skills", placeholder="Example: Python, Machine Learning, Data Analysis") if st.button("Generate Cover Letter"): if not job_opening_text or not cv_resume_text: st.error("Please provide job opening details and upload your CV/Resume.") else: with st.spinner("Generating Cover Letter..."): try: cover_letter = cover_letter_chain.run({ "job_title": job_title, "company": company_name, "key_skills": key_skills, "resume_text": cv_resume_text }) st.markdown("**Generated Cover Letter:**") st.markdown(cover_letter) st.download_button("Download Cover Letter", cover_letter, file_name="cover_letter.txt") except Exception as e: st.error(f"Error generating cover letter: {e}") # Research Statement Generation with tab3: research_background = st.text_area("Research Background", placeholder="Example: My research focuses on...") key_projects = st.text_area("Key Research Projects", placeholder="Example: Developed a machine learning model for...") future_goals = st.text_area("Future Research Goals", placeholder="Example: I aim to explore...") if st.button("Generate Research Statement"): with st.spinner("Generating Research Statement..."): try: research_statement = research_statement_chain.run({ "research_background": research_background, "key_projects": key_projects, "future_goals": future_goals }) st.markdown("**Generated Research Statement:**") st.markdown(research_statement) st.download_button("Download Research Statement", research_statement, file_name="research_statement.txt") except Exception as e: st.error(f"Error generating research statement: {e}") # SOP Generation with tab4: motivation = st.text_area("Motivation for Graduate Studies", placeholder="Example: I have always been passionate about...") academic_background = st.text_area("Academic Background", placeholder="Example: I completed my undergraduate degree in...") research_experiences = st.text_area("Research & Projects", placeholder="Example: During my undergraduate studies, I worked on...") career_goals = st.text_area("Career Goals", placeholder="Example: My long-term goal is to...") why_this_program = st.text_area("Why This Program", placeholder="Example: This program aligns with my research interests because...") if st.button("Generate SOP"): with st.spinner("Generating SOP..."): try: sop = sop_chain.run({ "motivation": motivation, "academic_background": academic_background, "research_experiences": research_experiences, "career_goals": career_goals, "why_this_program": why_this_program }) st.markdown("**Generated SOP:**") st.markdown(sop) st.download_button("Download SOP", sop, file_name="sop.txt") except Exception as e: st.error(f"Error generating SOP: {e}") # Reset Button if st.button("🔄 Reset All Inputs and Outputs"): st.experimental_rerun()