Spaces:

Sobit
/

DocuMentorAI

Sleeping

App Files Files Community

Sobit commited on Feb 16

Commit

8d56069

verified ·

1 Parent(s): 099e1dd

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -124

app.py CHANGED Viewed

@@ -2,10 +2,11 @@ import streamlit as st
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
 from langchain.llms import HuggingFaceHub
-import fitz  # PyMuPDF for PDF text extraction
 import pytesseract
 from PIL import Image
 import os
 # Set Hugging Face API Key (Set this in Hugging Face Secrets)
 os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
@@ -13,183 +14,203 @@ os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
 # Load Free LLM from Hugging Face
 llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})
-# Define Streamlit App
 st.set_page_config(page_title="DocuMentorAI", layout="wide")
 st.title("📄 DocuMentorAI")
-st.write("Upload your CV/Resume and generate professional application documents.")
-# File Upload (PDF/Image)
-uploaded_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
 def extract_text_from_pdf(pdf_file):
-    # Read the file's bytes
     pdf_bytes = pdf_file.read()
-    # Open the document from the bytes stream; specify the file type as PDF
     with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
-        text = ""
-        for page in doc:
-            text += page.get_text()
-    return text
 def extract_text_from_image(image_file):
-    """Extract text from an image using OCR."""
     image = Image.open(image_file)
     return pytesseract.image_to_string(image)
-if uploaded_file:
-    file_type = uploaded_file.type
-    extracted_text = ""
-    if file_type == "application/pdf":
-        extracted_text = extract_text_from_pdf(uploaded_file)
-    else:
-        extracted_text = extract_text_from_image(uploaded_file)
-    st.subheader("Extracted Text from CV/Resume")
-    st.text_area("Preview:", extracted_text, height=150)
-def get_final_output(full_text):
-    # Find the last occurrence of "### Output:" and return everything after it
-    if "### Output:" in full_text:
-        output_text = full_text.split("### Output:", 1)[-1].strip()
-        # If there's an instruction-like sentence at the beginning, remove it
-        if "." in output_text:
-            output_text = output_text.split(".", 1)[-1].strip()
-        return output_text
-    return full_text.strip()
-# Define LLM Prompt Templates
-email_template = PromptTemplate.from_template("""
-You are an AI assistant skilled in crafting personalized and engaging cold emails for research positions.
-### Instructions:
-- Address the recipient warmly and professionally.
-- Introduce yourself succinctly, highlighting relevant background.
-- Clearly express your interest in the specific position and align it with your research interests.
-- Articulate why you are particularly drawn to this professor's work or lab.
-- Mention pertinent details from your resume that strengthen your candidacy.
-- Conclude with a polite call to action and gratitude.
-### Input:
-- Recipient Name: {recipient_name}
-- Position Title: {position_name}
-- Your Research Interests: {research_interests}
-- Reason for Choosing This Professor/Lab: {reason}
-- Key Resume Highlights: {resume_text}
 ### Output:
-Compose a well-structured, concise cold email with a polite and engaging tone that reflects genuine interest and professionalism.
 """)
 cover_letter_template = PromptTemplate.from_template("""
-You are an AI assistant proficient in generating personalized and compelling cover letters for job applications.
-### Instructions:
-- Begin with a formal salutation.
-- State the position you are applying for and how you discovered it.
-- Highlight your key skills and experiences that make you a strong fit for the role.
-- Connect your professional background to the company's mission and values.
-- Include specific achievements from your resume that demonstrate your qualifications.
-- End with a courteous closing and express enthusiasm for the opportunity.
 ### Input:
 - Job Title: {job_title}
-- Company Name: {company}
-- Relevant Skills and Experiences: {key_skills}
-- Resume Highlights: {resume_text}
 ### Output:
-Draft a polished and formal cover letter that showcases your suitability for the position and aligns with the company's ethos.
 """)
 research_statement_template = PromptTemplate.from_template("""
-You are an AI assistant adept at composing insightful and persuasive research statements for Ph.D. applications.
-### Instructions:
-- Open with a summary of your research interests and their significance.
-- Detail your academic background and any research projects that have prepared you for this field.
-- Discuss your future research goals and how they align with the program's strengths.
-- Emphasize your passion and commitment to advancing knowledge in this area.
 ### Input:
-- Research Interests: {research_interests}
-- Academic Background and Experience: {resume_text}
-- Future Research Objectives: {goals}
 ### Output:
-Generate a compelling research statement with a strong academic tone that reflects your expertise and aspirations.
 """)
 sop_template = PromptTemplate.from_template("""
-You are an AI assistant experienced in crafting detailed and engaging Statements of Purpose for graduate program applications.
-### Instructions:
-- Introduce yourself and your academic interests.
-- Explain why you are interested in the chosen program and university.
-- Describe your relevant experiences and how they have prepared you for this program.
-- Outline your career goals and how this program will help you achieve them.
-- Highlight aspects of your resume that support your application.
 ### Input:
-- Program Name: {program_name}
-- University Name: {university}
-- Your Research Interests: {research_interests}
-- Career Objectives: {career_goals}
-- Resume Details: {resume_text}
 ### Output:
-Compose a structured and professional Statement of Purpose that conveys your qualifications, motivations, and fit for the program.
 """)
-# Create LangChain Chains
 email_chain = LLMChain(llm=llm, prompt=email_template)
 cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
 research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
 sop_chain = LLMChain(llm=llm, prompt=sop_template)
-# User Inputs for Document Generation
 st.subheader("📩 Generate Application Documents")
 tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
 with tab1:
-    recipient = st.text_input("Recipient Name")
-    position = st.text_input("Position Name")
     research_interests = st.text_area("Research Interests")
     reason = st.text_area("Why this professor/lab?")
     if st.button("Generate Cold Email"):
-        email = email_chain.run({"recipient_name": recipient, "position_name": position, "research_interests": research_interests, "reason": reason, "resume_text": extracted_text})
-        final_email = get_final_output(email)
-        st.text_area("Generated Cold Email", final_email, height=250)
 with tab2:
     job_title = st.text_input("Job Title")
-    company = st.text_input("Company/University")
     key_skills = st.text_area("Key Skills")
-    if st.button("Generate Cover Letter"):
-        cover_letter = cover_letter_chain.run({"job_title": job_title, "company": company, "key_skills": key_skills, "resume_text": extracted_text})
-        final_cover_letter = get_final_output(cover_letter)
-        st.text_area("Generated Cover Letter", final_cover_letter, height=250)
 with tab3:
-    research_goals = st.text_area("Future Research Goals")
-    if st.button("Generate Research Statement"):
-        research_statement = research_statement_chain.run({"research_interests": research_interests, "goals": research_goals, "resume_text": extracted_text})
-        final_rs = get_final_output(research_statement)
-        st.text_area("Generated Research Statement", final_rs, height=250)
 with tab4:
-    program_name = st.text_input("Program Name")
-    university = st.text_input("University")
     career_goals = st.text_area("Career Goals")
     if st.button("Generate SOP"):
-        sop = sop_chain.run({"program_name": program_name, "university": university, "research_interests": research_interests, "career_goals": career_goals, "resume_text": extracted_text})
-        final_sop = get_final_output(sop)
-        st.text_area("Generated SOP", final_sop, height=250)

 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
 from langchain.llms import HuggingFaceHub
+import fitz  # PyMuPDF for PDF extraction
 import pytesseract
 from PIL import Image
 import os
+import re
 # Set Hugging Face API Key (Set this in Hugging Face Secrets)
 os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
 # Load Free LLM from Hugging Face
 llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})
+# Streamlit App Configuration
 st.set_page_config(page_title="DocuMentorAI", layout="wide")
 st.title("📄 DocuMentorAI")
+st.write("Upload job openings and your CV/Resume to generate professional application documents.")
+# Upload Job Opening (PDF/Image/Text)
+st.subheader("📢 Upload Job Opening Details")
+job_opening_file = st.file_uploader("Upload Job Opening (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"])
+# Upload CV/Resume
+st.subheader("📄 Upload CV/Resume")
+cv_resume_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
+# Function to extract text from PDF
 def extract_text_from_pdf(pdf_file):
     pdf_bytes = pdf_file.read()
     with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
+        return " ".join([page.get_text() for page in doc])
+# Function to extract text from Image using OCR
 def extract_text_from_image(image_file):
     image = Image.open(image_file)
     return pytesseract.image_to_string(image)
+# Function to extract text from uploaded files
+def extract_text(uploaded_file):
+    if uploaded_file:
+        file_type = uploaded_file.type
+        if file_type == "application/pdf":
+            return extract_text_from_pdf(uploaded_file)
+        else:
+            return extract_text_from_image(uploaded_file)
+    return ""
+# Extract text from job opening and CV/Resume
+job_opening_text = extract_text(job_opening_file)
+cv_resume_text = extract_text(cv_resume_file)
+# Display Extracted Text
+if job_opening_text:
+    st.subheader("Extracted Job Opening Details")
+    st.text_area("Preview:", job_opening_text, height=150)
+if cv_resume_text:
+    st.subheader("Extracted CV/Resume Details")
+    st.text_area("Preview:", cv_resume_text, height=150)
+# Function to extract professor name, designation, and university
+def extract_professor_details(text):
+    professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
+    university_pattern = r"(University|Institute|College|School of [A-Za-z]+)"
+    professor_match = re.search(professor_pattern, text)
+    university_match = re.search(university_pattern, text)
+    professor_name = professor_match.group(0) if professor_match else "Not Found"
+    university_name = university_match.group(0) if university_match else "Not Found"
+    return professor_name, university_name
+# Extract professor details if job opening is uploaded
+professor_name, university_name = extract_professor_details(job_opening_text)
+# LLM Prompt Templates
+email_template = PromptTemplate.from_template("""
+Write a professional cold email for a research position.
+- Address the professor formally.
+- Introduce yourself and academic background.
+- Express interest in their research.
+- Highlight key skills from your CV.
+- Conclude with a polite request.
+### Input:
+- Professor: {professor_name}
+- University: {university_name}
+- Research Interests: {research_interests}
+- Why This Lab: {reason}
+- CV Highlights: {resume_text}
 ### Output:
+A well-structured, professional cold email.
 """)
 cover_letter_template = PromptTemplate.from_template("""
+Write a compelling job application cover letter.
+- Address the employer formally.
+- Mention job title and where you found it.
+- Highlight key skills and experiences.
+- Relate background to the company.
+- Conclude with enthusiasm.
 ### Input:
 - Job Title: {job_title}
+- Company: {company}
+- Key Skills: {key_skills}
+- CV Highlights: {resume_text}
 ### Output:
+A strong, well-formatted cover letter.
 """)
 research_statement_template = PromptTemplate.from_template("""
+Write a research statement for Ph.D. applications.
+- Discuss research background and motivation.
+- Explain key research experiences and findings.
+- Outline future research interests and goals.
+- Highlight contributions to the field.
 ### Input:
+- Research Background: {research_background}
+- Key Research Projects: {key_projects}
+- Future Goals: {future_goals}
 ### Output:
+A well-structured, professional research statement.
 """)
 sop_template = PromptTemplate.from_template("""
+Write a compelling Statement of Purpose (SOP).
+- Introduce motivation for graduate studies.
+- Discuss academic background.
+- Explain relevant experiences and research.
+- Outline career goals.
+- Justify fit for the program.
 ### Input:
+- Motivation: {motivation}
+- Academic Background: {academic_background}
+- Research & Projects: {research_experiences}
+- Career Goals: {career_goals}
+- Why This Program: {why_this_program}
 ### Output:
+A well-structured SOP.
 """)
+# LangChain Chains
 email_chain = LLMChain(llm=llm, prompt=email_template)
 cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
 research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
 sop_chain = LLMChain(llm=llm, prompt=sop_template)
+# User Inputs
 st.subheader("📩 Generate Application Documents")
 tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
+# Cold Email Generation
 with tab1:
+    st.write(f"🧑‍🏫 **Detected Professor:** {professor_name} at {university_name}")
     research_interests = st.text_area("Research Interests")
     reason = st.text_area("Why this professor/lab?")
     if st.button("Generate Cold Email"):
+        email = email_chain.run({
+            "professor_name": professor_name,
+            "university_name": university_name,
+            "research_interests": research_interests,
+            "reason": reason,
+            "resume_text": cv_resume_text
+        })
+        st.text_area("Generated Cold Email", email, height=250)
+# Cover Letter Generation
 with tab2:
     job_title = st.text_input("Job Title")
+    company_name = university_name if university_name != "Not Found" else st.text_input("Company/University")
     key_skills = st.text_area("Key Skills")
+    if st.button("Generate Cover Letter"):
+        cover_letter = cover_letter_chain.run({
+            "job_title": job_title,
+            "company": company_name,
+            "key_skills": key_skills,
+            "resume_text": cv_resume_text
+        })
+        st.text_area("Generated Cover Letter", cover_letter, height=250)
+# Research Statement Generation
 with tab3:
+    research_background = st.text_area("Research Background")
+    key_projects = st.text_area("Key Research Projects")
+    future_goals = st.text_area("Future Research Goals")
+    if st.button("Generate Research Statement"):
+        research_statement = research_statement_chain.run({
+            "research_background": research_background,
+            "key_projects": key_projects,
+            "future_goals": future_goals
+        })
+        st.text_area("Generated Research Statement", research_statement, height=250)
+# SOP Generation
 with tab4:
+    motivation = st.text_area("Motivation for Graduate Studies")
+    academic_background = st.text_area("Academic Background")
+    research_experiences = st.text_area("Research & Projects")
     career_goals = st.text_area("Career Goals")
+    why_this_program = st.text_area("Why This Program")
     if st.button("Generate SOP"):
+        sop = sop_chain.run({
+            "motivation": motivation,
+            "academic_background": academic_background,
+            "research_experiences": research_experiences,
+            "career_goals": career_goals,
+            "why_this_program": why_this_program
+        })
+        st.text_area("Generated SOP", sop, height=250)