Spaces:

Sobit
/

DocuMentorAI

Sleeping

App Files Files Community

DocuMentorAI / app.py

Sobit

Update app.py

1ad6ea2 verified 6 months ago

raw

history blame

11.4 kB

	import streamlit as st
	from langchain.chains import LLMChain
	from langchain.prompts import PromptTemplate
	from langchain.llms import HuggingFaceHub
	import fitz # PyMuPDF for PDF extraction
	from PIL import Image
	import os
	import pytesseract
	import re

	# Set Hugging Face API Key (Set this in Hugging Face Secrets)
	os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]

	# Load Free LLM from Hugging Face
	llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})

	# Streamlit App Configuration
	st.set_page_config(page_title="DocuMentorAI", layout="wide", page_icon="📄")
	st.title("📄 DocuMentorAI")
	st.write("Generate professional application documents with ease!")

	# Custom CSS for better UI
	st.markdown("""
	<style>
	.stTextArea textarea { font-size: 16px !important; }
	.stButton button { width: 100%; background-color: #4CAF50; color: white; }
	.stDownloadButton button { width: 100%; background-color: #008CBA; color: white; }
	.stMarkdown { font-size: 18px; }
	.stSpinner div { margin: auto; }
	</style>
	""", unsafe_allow_html=True)

	# Text Input for Job Opening Details
	st.subheader("📢 Enter Opening Details")
	job_opening_text = st.text_area(
	"Paste the job/research opening details here...",
	height=150,
	placeholder="Example: 'We are hiring a Research Assistant at XYZ University. The ideal candidate has experience in machine learning and data analysis...'"
	)

	# Upload CV/Resume
	st.subheader("📄 Upload CV/Resume")
	cv_resume_file = st.file_uploader(
	"Upload your CV/Resume (PDF or Image)",
	type=["pdf", "png", "jpg", "jpeg"],
	help="Upload a PDF or image of your CV/Resume for text extraction."
	)

	# Function to extract text from PDF
	def extract_text_from_pdf(pdf_file):
	try:
	pdf_bytes = pdf_file.read()
	with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
	return " ".join([page.get_text() for page in doc])
	except Exception as e:
	st.error(f"Error extracting text from PDF: {e}")
	return ""

	# Function to extract text from Image using OCR
	def extract_text_from_image(image_file):
	try:
	image = Image.open(image_file)
	return pytesseract.image_to_string(image)
	except Exception as e:
	st.error(f"Error extracting text from image: {e}")
	return ""

	# Function to extract text from uploaded files
	def extract_text(uploaded_file):
	if uploaded_file:
	file_type = uploaded_file.type
	if file_type == "application/pdf":
	return extract_text_from_pdf(uploaded_file)
	else:
	return extract_text_from_image(uploaded_file)
	return ""

	# Extract text from CV/Resume
	cv_resume_text = extract_text(cv_resume_file)

	# Display Extracted Text
	if job_opening_text:
	with st.expander("🔍 View Entered Opening Details"):
	st.markdown(f"Job Opening Details:\n\n{job_opening_text}")

	if cv_resume_text:
	with st.expander("🔍 View Extracted CV/Resume Details"):
	st.markdown(f"CV/Resume Details:\n\n{cv_resume_text}")

	# Function to extract professor name, designation, and university
	def extract_professor_details(text):
	professor_pattern = r"(Dr\.\|Professor\|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
	university_pattern = r"(University\|Institute\|College\|School of [A-Za-z]+)"

	professor_match = re.search(professor_pattern, text)
	university_match = re.search(university_pattern, text)

	professor_name = professor_match.group(0) if professor_match else "Not Found"
	university_name = university_match.group(0) if university_match else "Not Found"

	return professor_name, university_name

	# Extract professor details if job opening is uploaded
	professor_name, university_name = extract_professor_details(job_opening_text)

	# LLM Prompt Templates
	email_template = PromptTemplate.from_template("""
	Write a professional cold email for a research position.
	- Address the professor formally.
	- Introduce yourself and academic background.
	- Express interest in their research.
	- Highlight key skills from your CV.
	- Conclude with a polite request.
	### Input:
	- Professor: {professor_name}
	- University: {university_name}
	- Research Interests: {research_interests}
	- Why This Lab: {reason}
	- CV Highlights: {resume_text}
	### Output:
	A well-structured, professional cold email.
	""")

	cover_letter_template = PromptTemplate.from_template("""
	Write a compelling job application cover letter.
	- Address the employer formally.
	- Mention job title and where you found it.
	- Highlight key skills and experiences.
	- Relate background to the company.
	- Conclude with enthusiasm.
	### Input:
	- Job Title: {job_title}
	- Company: {company}
	- Key Skills: {key_skills}
	- CV Highlights: {resume_text}
	### Output:
	A strong, well-formatted cover letter.
	""")

	research_statement_template = PromptTemplate.from_template("""
	Write a research statement for Ph.D. applications.
	- Discuss research background and motivation.
	- Explain key research experiences and findings.
	- Outline future research interests and goals.
	- Highlight contributions to the field.
	### Input:
	- Research Background: {research_background}
	- Key Research Projects: {key_projects}
	- Future Goals: {future_goals}
	### Output:
	A well-structured, professional research statement.
	""")

	sop_template = PromptTemplate.from_template("""
	Write a compelling Statement of Purpose (SOP).
	- Introduce motivation for graduate studies.
	- Discuss academic background.
	- Explain relevant experiences and research.
	- Outline career goals.
	- Justify fit for the program.
	### Input:
	- Motivation: {motivation}
	- Academic Background: {academic_background}
	- Research & Projects: {research_experiences}
	- Career Goals: {career_goals}
	- Why This Program: {why_this_program}
	### Output:
	A well-structured SOP.
	""")

	# LangChain Chains
	email_chain = LLMChain(llm=llm, prompt=email_template)
	cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
	research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
	sop_chain = LLMChain(llm=llm, prompt=sop_template)

	# User Inputs
	st.subheader("📩 Generate Application Documents")
	tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])

	# Cold Email Generation
	with tab1:
	st.write(f"🧑‍🏫 Detected Professor: {professor_name} at {university_name}")
	research_interests = st.text_area("Research Interests", placeholder="Example: Machine Learning, Data Analysis, etc.")
	reason = st.text_area("Why this professor/lab?", placeholder="Example: I am particularly interested in your work on...")

	if st.button("Generate Cold Email"):
	if not job_opening_text or not cv_resume_text:
	st.error("Please provide job opening details and upload your CV/Resume.")
	else:
	with st.spinner("Generating Cold Email..."):
	try:
	email = email_chain.run({
	"professor_name": professor_name,
	"university_name": university_name,
	"research_interests": research_interests,
	"reason": reason,
	"resume_text": cv_resume_text
	})
	st.markdown("Generated Cold Email:")
	st.markdown(email)
	st.download_button("Download Email", email, file_name="cold_email.txt")
	except Exception as e:
	st.error(f"Error generating cold email: {e}")

	# Cover Letter Generation
	with tab2:
	job_title = st.text_input("Job Title", placeholder="Example: Research Assistant")
	company_name = university_name if university_name != "Not Found" else st.text_input("Company/University", placeholder="Example: XYZ University")
	key_skills = st.text_area("Key Skills", placeholder="Example: Python, Machine Learning, Data Analysis")

	if st.button("Generate Cover Letter"):
	if not job_opening_text or not cv_resume_text:
	st.error("Please provide job opening details and upload your CV/Resume.")
	else:
	with st.spinner("Generating Cover Letter..."):
	try:
	cover_letter = cover_letter_chain.run({
	"job_title": job_title,
	"company": company_name,
	"key_skills": key_skills,
	"resume_text": cv_resume_text
	})
	st.markdown("Generated Cover Letter:")
	st.markdown(cover_letter)
	st.download_button("Download Cover Letter", cover_letter, file_name="cover_letter.txt")
	except Exception as e:
	st.error(f"Error generating cover letter: {e}")

	# Research Statement Generation
	with tab3:
	research_background = st.text_area("Research Background", placeholder="Example: My research focuses on...")
	key_projects = st.text_area("Key Research Projects", placeholder="Example: Developed a machine learning model for...")
	future_goals = st.text_area("Future Research Goals", placeholder="Example: I aim to explore...")

	if st.button("Generate Research Statement"):
	with st.spinner("Generating Research Statement..."):
	try:
	research_statement = research_statement_chain.run({
	"research_background": research_background,
	"key_projects": key_projects,
	"future_goals": future_goals
	})
	st.markdown("Generated Research Statement:")
	st.markdown(research_statement)
	st.download_button("Download Research Statement", research_statement, file_name="research_statement.txt")
	except Exception as e:
	st.error(f"Error generating research statement: {e}")

	# SOP Generation
	with tab4:
	motivation = st.text_area("Motivation for Graduate Studies", placeholder="Example: I have always been passionate about...")
	academic_background = st.text_area("Academic Background", placeholder="Example: I completed my undergraduate degree in...")
	research_experiences = st.text_area("Research & Projects", placeholder="Example: During my undergraduate studies, I worked on...")
	career_goals = st.text_area("Career Goals", placeholder="Example: My long-term goal is to...")
	why_this_program = st.text_area("Why This Program", placeholder="Example: This program aligns with my research interests because...")

	if st.button("Generate SOP"):
	with st.spinner("Generating SOP..."):
	try:
	sop = sop_chain.run({
	"motivation": motivation,
	"academic_background": academic_background,
	"research_experiences": research_experiences,
	"career_goals": career_goals,
	"why_this_program": why_this_program
	})
	st.markdown("Generated SOP:")
	st.markdown(sop)
	st.download_button("Download SOP", sop, file_name="sop.txt")
	except Exception as e:
	st.error(f"Error generating SOP: {e}")

	# Reset Button
	if st.button("🔄 Reset All Inputs and Outputs"):
	st.experimental_rerun()