Sobit commited on
Commit
8349bb4
·
verified ·
1 Parent(s): c3f0756

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -0
app.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.chains import LLMChain
3
+ from langchain.prompts import PromptTemplate
4
+ from langchain.llms import HuggingFaceHub
5
+ import fitz # PyMuPDF for PDF text extraction
6
+ import pytesseract
7
+ from PIL import Image
8
+ import os
9
+
10
+ # Set Hugging Face API Key (Set this in Hugging Face Secrets)
11
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
12
+
13
+ # Load Free LLM from Hugging Face
14
+ llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct", model_kwargs={"temperature": 0.5})
15
+
16
+ # Define Streamlit App
17
+ st.set_page_config(page_title="DocuMentorAI", layout="wide")
18
+ st.title("📄 DocuMentorAI")
19
+ st.write("Upload your CV/Resume and generate professional application documents.")
20
+
21
+ # File Upload (PDF/Image)
22
+ uploaded_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
23
+
24
+ def extract_text_from_pdf(pdf_file):
25
+ """Extract text from a PDF file."""
26
+ text = ""
27
+ with fitz.open(pdf_file) as doc:
28
+ for page in doc:
29
+ text += page.get_text()
30
+ return text
31
+
32
+ def extract_text_from_image(image_file):
33
+ """Extract text from an image using OCR."""
34
+ image = Image.open(image_file)
35
+ return pytesseract.image_to_string(image)
36
+
37
+ if uploaded_file:
38
+ file_type = uploaded_file.type
39
+ extracted_text = ""
40
+
41
+ if file_type == "application/pdf":
42
+ extracted_text = extract_text_from_pdf(uploaded_file)
43
+ else:
44
+ extracted_text = extract_text_from_image(uploaded_file)
45
+
46
+ st.subheader("Extracted Text from CV/Resume")
47
+ st.text_area("Preview:", extracted_text, height=150)
48
+
49
+ # Define LLM Prompt Templates
50
+ email_template = PromptTemplate.from_template("""
51
+ You are an AI assistant helping users craft a professional cold email for a research position.
52
+
53
+ ### Input:
54
+ - Recipient: {recipient_name}
55
+ - Position: {position_name}
56
+ - Research Interests: {research_interests}
57
+ - Why this professor/lab: {reason}
58
+ - Resume Details: {resume_text}
59
+
60
+ ### Output:
61
+ A well-structured, concise cold email with a polite and engaging tone.
62
+ """)
63
+
64
+ cover_letter_template = PromptTemplate.from_template("""
65
+ You are an AI assistant generating a professional cover letter.
66
+
67
+ ### Input:
68
+ - Job Title: {job_title}
69
+ - Company/University: {company}
70
+ - Key Skills: {key_skills}
71
+ - Resume Details: {resume_text}
72
+
73
+ ### Output:
74
+ A polished and formal cover letter.
75
+ """)
76
+
77
+ research_statement_template = PromptTemplate.from_template("""
78
+ You are an AI assistant generating a research statement for a Ph.D. application.
79
+
80
+ ### Input:
81
+ - Research Interests: {research_interests}
82
+ - Academic Background: {resume_text}
83
+ - Future Research Goals: {goals}
84
+
85
+ ### Output:
86
+ A compelling research statement with a strong academic tone.
87
+ """)
88
+
89
+ sop_template = PromptTemplate.from_template("""
90
+ You are an AI assistant writing a Statement of Purpose (SOP) for a master's or Ph.D. program.
91
+
92
+ ### Input:
93
+ - Program Name: {program_name}
94
+ - University: {university}
95
+ - Research Interests: {research_interests}
96
+ - Career Goals: {career_goals}
97
+ - Resume Details: {resume_text}
98
+
99
+ ### Output:
100
+ A structured and professional SOP.
101
+ """)
102
+
103
+ # Create LangChain Chains
104
+ email_chain = LLMChain(llm=llm, prompt=email_template)
105
+ cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
106
+ research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
107
+ sop_chain = LLMChain(llm=llm, prompt=sop_template)
108
+
109
+ # User Inputs for Document Generation
110
+ st.subheader("📩 Generate Application Documents")
111
+
112
+ tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
113
+
114
+ with tab1:
115
+ recipient = st.text_input("Recipient Name")
116
+ position = st.text_input("Position Name")
117
+ research_interests = st.text_area("Research Interests")
118
+ reason = st.text_area("Why this professor/lab?")
119
+ if st.button("Generate Cold Email"):
120
+ email = email_chain.run({"recipient_name": recipient, "position_name": position, "research_interests": research_interests, "reason": reason, "resume_text": extracted_text})
121
+ st.text_area("Generated Cold Email", email, height=250)
122
+
123
+ with tab2:
124
+ job_title = st.text_input("Job Title")
125
+ company = st.text_input("Company/University")
126
+ key_skills = st.text_area("Key Skills")
127
+ if st.button("Generate Cover Letter"):
128
+ cover_letter = cover_letter_chain.run({"job_title": job_title, "company": company, "key_skills": key_skills, "resume_text": extracted_text})
129
+ st.text_area("Generated Cover Letter", cover_letter, height=250)
130
+
131
+ with tab3:
132
+ research_goals = st.text_area("Future Research Goals")
133
+ if st.button("Generate Research Statement"):
134
+ research_statement = research_statement_chain.run({"research_interests": research_interests, "goals": research_goals, "resume_text": extracted_text})
135
+ st.text_area("Generated Research Statement", research_statement, height=250)
136
+
137
+ with tab4:
138
+ program_name = st.text_input("Program Name")
139
+ university = st.text_input("University")
140
+ career_goals = st.text_area("Career Goals")
141
+ if st.button("Generate SOP"):
142
+ sop = sop_chain.run({"program_name": program_name, "university": university, "research_interests": research_interests, "career_goals": career_goals, "resume_text": extracted_text})
143
+ st.text_area("Generated SOP", sop, height=250)