import streamlit as st from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from langchain.llms import HuggingFaceHub import fitz # PyMuPDF for PDF extraction import pytesseract from PIL import Image import os import re # Set Hugging Face API Key (Set this in Hugging Face Secrets) os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"] # Load Free LLM from Hugging Face llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5}) # Streamlit App Configuration st.set_page_config(page_title="DocuMentorAI", layout="wide") st.title("📄 DocuMentorAI") st.write("Upload job openings and your CV/Resume to generate professional application documents.") # Upload Job Opening (PDF/Image/Text) st.subheader("📢 Upload Job Opening Details") job_opening_file = st.file_uploader("Upload Job Opening (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"]) # Upload CV/Resume st.subheader("📄 Upload CV/Resume") cv_resume_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"]) # Function to extract text from PDF def extract_text_from_pdf(pdf_file): pdf_bytes = pdf_file.read() with fitz.open(stream=pdf_bytes, filetype="pdf") as doc: return " ".join([page.get_text() for page in doc]) # Function to extract text from Image using OCR def extract_text_from_image(image_file): image = Image.open(image_file) return pytesseract.image_to_string(image) # Function to extract text from uploaded files def extract_text(uploaded_file): if uploaded_file: file_type = uploaded_file.type if file_type == "application/pdf": return extract_text_from_pdf(uploaded_file) else: return extract_text_from_image(uploaded_file) return "" # Extract text from job opening and CV/Resume job_opening_text = extract_text(job_opening_file) cv_resume_text = extract_text(cv_resume_file) # Display Extracted Text if job_opening_text: st.subheader("Extracted Job Opening Details") st.text_area("Preview:", job_opening_text, height=150) if cv_resume_text: st.subheader("Extracted CV/Resume Details") st.text_area("Preview:", cv_resume_text, height=150) # Function to extract professor name, designation, and university def extract_professor_details(text): professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)" university_pattern = r"(University|Institute|College|School of [A-Za-z]+)" professor_match = re.search(professor_pattern, text) university_match = re.search(university_pattern, text) professor_name = professor_match.group(0) if professor_match else "Not Found" university_name = university_match.group(0) if university_match else "Not Found" return professor_name, university_name # Extract professor details if job opening is uploaded professor_name, university_name = extract_professor_details(job_opening_text) # LLM Prompt Templates email_template = PromptTemplate.from_template(""" Write a professional cold email for a research position. - Address the professor formally. - Introduce yourself and academic background. - Express interest in their research. - Highlight key skills from your CV. - Conclude with a polite request. ### Input: - Professor: {professor_name} - University: {university_name} - Research Interests: {research_interests} - Why This Lab: {reason} - CV Highlights: {resume_text} ### Output: A well-structured, professional cold email. """) cover_letter_template = PromptTemplate.from_template(""" Write a compelling job application cover letter. - Address the employer formally. - Mention job title and where you found it. - Highlight key skills and experiences. - Relate background to the company. - Conclude with enthusiasm. ### Input: - Job Title: {job_title} - Company: {company} - Key Skills: {key_skills} - CV Highlights: {resume_text} ### Output: A strong, well-formatted cover letter. """) research_statement_template = PromptTemplate.from_template(""" Write a research statement for Ph.D. applications. - Discuss research background and motivation. - Explain key research experiences and findings. - Outline future research interests and goals. - Highlight contributions to the field. ### Input: - Research Background: {research_background} - Key Research Projects: {key_projects} - Future Goals: {future_goals} ### Output: A well-structured, professional research statement. """) sop_template = PromptTemplate.from_template(""" Write a compelling Statement of Purpose (SOP). - Introduce motivation for graduate studies. - Discuss academic background. - Explain relevant experiences and research. - Outline career goals. - Justify fit for the program. ### Input: - Motivation: {motivation} - Academic Background: {academic_background} - Research & Projects: {research_experiences} - Career Goals: {career_goals} - Why This Program: {why_this_program} ### Output: A well-structured SOP. """) # LangChain Chains email_chain = LLMChain(llm=llm, prompt=email_template) cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template) research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template) sop_chain = LLMChain(llm=llm, prompt=sop_template) # User Inputs st.subheader("📩 Generate Application Documents") tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"]) # Cold Email Generation with tab1: st.write(f"🧑‍🏫 **Detected Professor:** {professor_name} at {university_name}") research_interests = st.text_area("Research Interests") reason = st.text_area("Why this professor/lab?") if st.button("Generate Cold Email"): email = email_chain.run({ "professor_name": professor_name, "university_name": university_name, "research_interests": research_interests, "reason": reason, "resume_text": cv_resume_text }) st.text_area("Generated Cold Email", email, height=250) # Cover Letter Generation with tab2: job_title = st.text_input("Job Title") company_name = university_name if university_name != "Not Found" else st.text_input("Company/University") key_skills = st.text_area("Key Skills") if st.button("Generate Cover Letter"): cover_letter = cover_letter_chain.run({ "job_title": job_title, "company": company_name, "key_skills": key_skills, "resume_text": cv_resume_text }) st.text_area("Generated Cover Letter", cover_letter, height=250) # Research Statement Generation with tab3: research_background = st.text_area("Research Background") key_projects = st.text_area("Key Research Projects") future_goals = st.text_area("Future Research Goals") if st.button("Generate Research Statement"): research_statement = research_statement_chain.run({ "research_background": research_background, "key_projects": key_projects, "future_goals": future_goals }) st.text_area("Generated Research Statement", research_statement, height=250) # SOP Generation with tab4: motivation = st.text_area("Motivation for Graduate Studies") academic_background = st.text_area("Academic Background") research_experiences = st.text_area("Research & Projects") career_goals = st.text_area("Career Goals") why_this_program = st.text_area("Why This Program") if st.button("Generate SOP"): sop = sop_chain.run({ "motivation": motivation, "academic_background": academic_background, "research_experiences": research_experiences, "career_goals": career_goals, "why_this_program": why_this_program }) st.text_area("Generated SOP", sop, height=250)