Sobit commited on
Commit
8d56069
·
verified ·
1 Parent(s): 099e1dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -124
app.py CHANGED
@@ -2,10 +2,11 @@ import streamlit as st
2
  from langchain.chains import LLMChain
3
  from langchain.prompts import PromptTemplate
4
  from langchain.llms import HuggingFaceHub
5
- import fitz # PyMuPDF for PDF text extraction
6
  import pytesseract
7
  from PIL import Image
8
  import os
 
9
 
10
  # Set Hugging Face API Key (Set this in Hugging Face Secrets)
11
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
@@ -13,183 +14,203 @@ os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
13
  # Load Free LLM from Hugging Face
14
  llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})
15
 
16
- # Define Streamlit App
17
  st.set_page_config(page_title="DocuMentorAI", layout="wide")
18
  st.title("📄 DocuMentorAI")
19
- st.write("Upload your CV/Resume and generate professional application documents.")
20
-
21
- # File Upload (PDF/Image)
22
- uploaded_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
23
 
 
 
 
24
 
 
 
 
25
 
 
26
  def extract_text_from_pdf(pdf_file):
27
- # Read the file's bytes
28
  pdf_bytes = pdf_file.read()
29
- # Open the document from the bytes stream; specify the file type as PDF
30
  with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
31
- text = ""
32
- for page in doc:
33
- text += page.get_text()
34
- return text
35
-
36
 
 
37
  def extract_text_from_image(image_file):
38
- """Extract text from an image using OCR."""
39
  image = Image.open(image_file)
40
  return pytesseract.image_to_string(image)
41
 
42
- if uploaded_file:
43
- file_type = uploaded_file.type
44
- extracted_text = ""
45
-
46
- if file_type == "application/pdf":
47
- extracted_text = extract_text_from_pdf(uploaded_file)
48
- else:
49
- extracted_text = extract_text_from_image(uploaded_file)
50
-
51
- st.subheader("Extracted Text from CV/Resume")
52
- st.text_area("Preview:", extracted_text, height=150)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- def get_final_output(full_text):
55
- # Find the last occurrence of "### Output:" and return everything after it
56
- if "### Output:" in full_text:
57
- output_text = full_text.split("### Output:", 1)[-1].strip()
58
- # If there's an instruction-like sentence at the beginning, remove it
59
- if "." in output_text:
60
- output_text = output_text.split(".", 1)[-1].strip()
61
- return output_text
62
- return full_text.strip()
63
-
64
-
65
- # Define LLM Prompt Templates
66
- email_template = PromptTemplate.from_template("""
67
- You are an AI assistant skilled in crafting personalized and engaging cold emails for research positions.
68
 
69
- ### Instructions:
70
- - Address the recipient warmly and professionally.
71
- - Introduce yourself succinctly, highlighting relevant background.
72
- - Clearly express your interest in the specific position and align it with your research interests.
73
- - Articulate why you are particularly drawn to this professor's work or lab.
74
- - Mention pertinent details from your resume that strengthen your candidacy.
75
- - Conclude with a polite call to action and gratitude.
76
 
77
- ### Input:
78
- - Recipient Name: {recipient_name}
79
- - Position Title: {position_name}
80
- - Your Research Interests: {research_interests}
81
- - Reason for Choosing This Professor/Lab: {reason}
82
- - Key Resume Highlights: {resume_text}
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  ### Output:
85
- Compose a well-structured, concise cold email with a polite and engaging tone that reflects genuine interest and professionalism.
86
  """)
87
 
88
-
89
  cover_letter_template = PromptTemplate.from_template("""
90
- You are an AI assistant proficient in generating personalized and compelling cover letters for job applications.
91
-
92
- ### Instructions:
93
- - Begin with a formal salutation.
94
- - State the position you are applying for and how you discovered it.
95
- - Highlight your key skills and experiences that make you a strong fit for the role.
96
- - Connect your professional background to the company's mission and values.
97
- - Include specific achievements from your resume that demonstrate your qualifications.
98
- - End with a courteous closing and express enthusiasm for the opportunity.
99
-
100
  ### Input:
101
  - Job Title: {job_title}
102
- - Company Name: {company}
103
- - Relevant Skills and Experiences: {key_skills}
104
- - Resume Highlights: {resume_text}
105
-
106
  ### Output:
107
- Draft a polished and formal cover letter that showcases your suitability for the position and aligns with the company's ethos.
108
  """)
109
 
110
-
111
  research_statement_template = PromptTemplate.from_template("""
112
- You are an AI assistant adept at composing insightful and persuasive research statements for Ph.D. applications.
113
-
114
- ### Instructions:
115
- - Open with a summary of your research interests and their significance.
116
- - Detail your academic background and any research projects that have prepared you for this field.
117
- - Discuss your future research goals and how they align with the program's strengths.
118
- - Emphasize your passion and commitment to advancing knowledge in this area.
119
-
120
  ### Input:
121
- - Research Interests: {research_interests}
122
- - Academic Background and Experience: {resume_text}
123
- - Future Research Objectives: {goals}
124
-
125
  ### Output:
126
- Generate a compelling research statement with a strong academic tone that reflects your expertise and aspirations.
127
  """)
128
 
129
  sop_template = PromptTemplate.from_template("""
130
- You are an AI assistant experienced in crafting detailed and engaging Statements of Purpose for graduate program applications.
131
-
132
- ### Instructions:
133
- - Introduce yourself and your academic interests.
134
- - Explain why you are interested in the chosen program and university.
135
- - Describe your relevant experiences and how they have prepared you for this program.
136
- - Outline your career goals and how this program will help you achieve them.
137
- - Highlight aspects of your resume that support your application.
138
-
139
  ### Input:
140
- - Program Name: {program_name}
141
- - University Name: {university}
142
- - Your Research Interests: {research_interests}
143
- - Career Objectives: {career_goals}
144
- - Resume Details: {resume_text}
145
-
146
  ### Output:
147
- Compose a structured and professional Statement of Purpose that conveys your qualifications, motivations, and fit for the program.
148
  """)
149
 
150
-
151
- # Create LangChain Chains
152
  email_chain = LLMChain(llm=llm, prompt=email_template)
153
  cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
154
  research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
155
  sop_chain = LLMChain(llm=llm, prompt=sop_template)
156
 
157
- # User Inputs for Document Generation
158
  st.subheader("📩 Generate Application Documents")
159
-
160
  tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
161
 
 
162
  with tab1:
163
- recipient = st.text_input("Recipient Name")
164
- position = st.text_input("Position Name")
165
  research_interests = st.text_area("Research Interests")
166
  reason = st.text_area("Why this professor/lab?")
 
167
  if st.button("Generate Cold Email"):
168
- email = email_chain.run({"recipient_name": recipient, "position_name": position, "research_interests": research_interests, "reason": reason, "resume_text": extracted_text})
169
- final_email = get_final_output(email)
170
- st.text_area("Generated Cold Email", final_email, height=250)
171
-
 
 
 
 
 
 
172
  with tab2:
173
  job_title = st.text_input("Job Title")
174
- company = st.text_input("Company/University")
175
  key_skills = st.text_area("Key Skills")
176
- if st.button("Generate Cover Letter"):
177
- cover_letter = cover_letter_chain.run({"job_title": job_title, "company": company, "key_skills": key_skills, "resume_text": extracted_text})
178
- final_cover_letter = get_final_output(cover_letter)
179
- st.text_area("Generated Cover Letter", final_cover_letter, height=250)
180
 
 
 
 
 
 
 
 
 
 
 
181
  with tab3:
182
- research_goals = st.text_area("Future Research Goals")
183
- if st.button("Generate Research Statement"):
184
- research_statement = research_statement_chain.run({"research_interests": research_interests, "goals": research_goals, "resume_text": extracted_text})
185
- final_rs = get_final_output(research_statement)
186
- st.text_area("Generated Research Statement", final_rs, height=250)
187
 
 
 
 
 
 
 
 
 
 
188
  with tab4:
189
- program_name = st.text_input("Program Name")
190
- university = st.text_input("University")
 
191
  career_goals = st.text_area("Career Goals")
 
 
192
  if st.button("Generate SOP"):
193
- sop = sop_chain.run({"program_name": program_name, "university": university, "research_interests": research_interests, "career_goals": career_goals, "resume_text": extracted_text})
194
- final_sop = get_final_output(sop)
195
- st.text_area("Generated SOP", final_sop, height=250)
 
 
 
 
 
 
2
  from langchain.chains import LLMChain
3
  from langchain.prompts import PromptTemplate
4
  from langchain.llms import HuggingFaceHub
5
+ import fitz # PyMuPDF for PDF extraction
6
  import pytesseract
7
  from PIL import Image
8
  import os
9
+ import re
10
 
11
  # Set Hugging Face API Key (Set this in Hugging Face Secrets)
12
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
 
14
  # Load Free LLM from Hugging Face
15
  llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})
16
 
17
+ # Streamlit App Configuration
18
  st.set_page_config(page_title="DocuMentorAI", layout="wide")
19
  st.title("📄 DocuMentorAI")
20
+ st.write("Upload job openings and your CV/Resume to generate professional application documents.")
 
 
 
21
 
22
+ # Upload Job Opening (PDF/Image/Text)
23
+ st.subheader("📢 Upload Job Opening Details")
24
+ job_opening_file = st.file_uploader("Upload Job Opening (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"])
25
 
26
+ # Upload CV/Resume
27
+ st.subheader("📄 Upload CV/Resume")
28
+ cv_resume_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
29
 
30
+ # Function to extract text from PDF
31
  def extract_text_from_pdf(pdf_file):
 
32
  pdf_bytes = pdf_file.read()
 
33
  with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
34
+ return " ".join([page.get_text() for page in doc])
 
 
 
 
35
 
36
+ # Function to extract text from Image using OCR
37
  def extract_text_from_image(image_file):
 
38
  image = Image.open(image_file)
39
  return pytesseract.image_to_string(image)
40
 
41
+ # Function to extract text from uploaded files
42
+ def extract_text(uploaded_file):
43
+ if uploaded_file:
44
+ file_type = uploaded_file.type
45
+ if file_type == "application/pdf":
46
+ return extract_text_from_pdf(uploaded_file)
47
+ else:
48
+ return extract_text_from_image(uploaded_file)
49
+ return ""
50
+
51
+ # Extract text from job opening and CV/Resume
52
+ job_opening_text = extract_text(job_opening_file)
53
+ cv_resume_text = extract_text(cv_resume_file)
54
+
55
+ # Display Extracted Text
56
+ if job_opening_text:
57
+ st.subheader("Extracted Job Opening Details")
58
+ st.text_area("Preview:", job_opening_text, height=150)
59
+
60
+ if cv_resume_text:
61
+ st.subheader("Extracted CV/Resume Details")
62
+ st.text_area("Preview:", cv_resume_text, height=150)
63
+
64
+ # Function to extract professor name, designation, and university
65
+ def extract_professor_details(text):
66
+ professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
67
+ university_pattern = r"(University|Institute|College|School of [A-Za-z]+)"
68
 
69
+ professor_match = re.search(professor_pattern, text)
70
+ university_match = re.search(university_pattern, text)
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ professor_name = professor_match.group(0) if professor_match else "Not Found"
73
+ university_name = university_match.group(0) if university_match else "Not Found"
 
 
 
 
 
74
 
75
+ return professor_name, university_name
 
 
 
 
 
76
 
77
+ # Extract professor details if job opening is uploaded
78
+ professor_name, university_name = extract_professor_details(job_opening_text)
79
+
80
+ # LLM Prompt Templates
81
+ email_template = PromptTemplate.from_template("""
82
+ Write a professional cold email for a research position.
83
+ - Address the professor formally.
84
+ - Introduce yourself and academic background.
85
+ - Express interest in their research.
86
+ - Highlight key skills from your CV.
87
+ - Conclude with a polite request.
88
+ ### Input:
89
+ - Professor: {professor_name}
90
+ - University: {university_name}
91
+ - Research Interests: {research_interests}
92
+ - Why This Lab: {reason}
93
+ - CV Highlights: {resume_text}
94
  ### Output:
95
+ A well-structured, professional cold email.
96
  """)
97
 
 
98
  cover_letter_template = PromptTemplate.from_template("""
99
+ Write a compelling job application cover letter.
100
+ - Address the employer formally.
101
+ - Mention job title and where you found it.
102
+ - Highlight key skills and experiences.
103
+ - Relate background to the company.
104
+ - Conclude with enthusiasm.
 
 
 
 
105
  ### Input:
106
  - Job Title: {job_title}
107
+ - Company: {company}
108
+ - Key Skills: {key_skills}
109
+ - CV Highlights: {resume_text}
 
110
  ### Output:
111
+ A strong, well-formatted cover letter.
112
  """)
113
 
 
114
  research_statement_template = PromptTemplate.from_template("""
115
+ Write a research statement for Ph.D. applications.
116
+ - Discuss research background and motivation.
117
+ - Explain key research experiences and findings.
118
+ - Outline future research interests and goals.
119
+ - Highlight contributions to the field.
 
 
 
120
  ### Input:
121
+ - Research Background: {research_background}
122
+ - Key Research Projects: {key_projects}
123
+ - Future Goals: {future_goals}
 
124
  ### Output:
125
+ A well-structured, professional research statement.
126
  """)
127
 
128
  sop_template = PromptTemplate.from_template("""
129
+ Write a compelling Statement of Purpose (SOP).
130
+ - Introduce motivation for graduate studies.
131
+ - Discuss academic background.
132
+ - Explain relevant experiences and research.
133
+ - Outline career goals.
134
+ - Justify fit for the program.
 
 
 
135
  ### Input:
136
+ - Motivation: {motivation}
137
+ - Academic Background: {academic_background}
138
+ - Research & Projects: {research_experiences}
139
+ - Career Goals: {career_goals}
140
+ - Why This Program: {why_this_program}
 
141
  ### Output:
142
+ A well-structured SOP.
143
  """)
144
 
145
+ # LangChain Chains
 
146
  email_chain = LLMChain(llm=llm, prompt=email_template)
147
  cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
148
  research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
149
  sop_chain = LLMChain(llm=llm, prompt=sop_template)
150
 
151
+ # User Inputs
152
  st.subheader("📩 Generate Application Documents")
 
153
  tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
154
 
155
+ # Cold Email Generation
156
  with tab1:
157
+ st.write(f"🧑‍🏫 **Detected Professor:** {professor_name} at {university_name}")
 
158
  research_interests = st.text_area("Research Interests")
159
  reason = st.text_area("Why this professor/lab?")
160
+
161
  if st.button("Generate Cold Email"):
162
+ email = email_chain.run({
163
+ "professor_name": professor_name,
164
+ "university_name": university_name,
165
+ "research_interests": research_interests,
166
+ "reason": reason,
167
+ "resume_text": cv_resume_text
168
+ })
169
+ st.text_area("Generated Cold Email", email, height=250)
170
+
171
+ # Cover Letter Generation
172
  with tab2:
173
  job_title = st.text_input("Job Title")
174
+ company_name = university_name if university_name != "Not Found" else st.text_input("Company/University")
175
  key_skills = st.text_area("Key Skills")
 
 
 
 
176
 
177
+ if st.button("Generate Cover Letter"):
178
+ cover_letter = cover_letter_chain.run({
179
+ "job_title": job_title,
180
+ "company": company_name,
181
+ "key_skills": key_skills,
182
+ "resume_text": cv_resume_text
183
+ })
184
+ st.text_area("Generated Cover Letter", cover_letter, height=250)
185
+
186
+ # Research Statement Generation
187
  with tab3:
188
+ research_background = st.text_area("Research Background")
189
+ key_projects = st.text_area("Key Research Projects")
190
+ future_goals = st.text_area("Future Research Goals")
 
 
191
 
192
+ if st.button("Generate Research Statement"):
193
+ research_statement = research_statement_chain.run({
194
+ "research_background": research_background,
195
+ "key_projects": key_projects,
196
+ "future_goals": future_goals
197
+ })
198
+ st.text_area("Generated Research Statement", research_statement, height=250)
199
+
200
+ # SOP Generation
201
  with tab4:
202
+ motivation = st.text_area("Motivation for Graduate Studies")
203
+ academic_background = st.text_area("Academic Background")
204
+ research_experiences = st.text_area("Research & Projects")
205
  career_goals = st.text_area("Career Goals")
206
+ why_this_program = st.text_area("Why This Program")
207
+
208
  if st.button("Generate SOP"):
209
+ sop = sop_chain.run({
210
+ "motivation": motivation,
211
+ "academic_background": academic_background,
212
+ "research_experiences": research_experiences,
213
+ "career_goals": career_goals,
214
+ "why_this_program": why_this_program
215
+ })
216
+ st.text_area("Generated SOP", sop, height=250)