Spaces:

Sobit
/

DocuMentorAI

Sleeping

App Files Files Community

DocuMentorAI / app.py

Sobit

Create app.py

8349bb4 verified 6 months ago

raw

history blame

5.24 kB

	import streamlit as st
	from langchain.chains import LLMChain
	from langchain.prompts import PromptTemplate
	from langchain.llms import HuggingFaceHub
	import fitz # PyMuPDF for PDF text extraction
	import pytesseract
	from PIL import Image
	import os

	# Set Hugging Face API Key (Set this in Hugging Face Secrets)
	os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]

	# Load Free LLM from Hugging Face
	llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct", model_kwargs={"temperature": 0.5})

	# Define Streamlit App
	st.set_page_config(page_title="DocuMentorAI", layout="wide")
	st.title("📄 DocuMentorAI")
	st.write("Upload your CV/Resume and generate professional application documents.")

	# File Upload (PDF/Image)
	uploaded_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])

	def extract_text_from_pdf(pdf_file):
	"""Extract text from a PDF file."""
	text = ""
	with fitz.open(pdf_file) as doc:
	for page in doc:
	text += page.get_text()
	return text

	def extract_text_from_image(image_file):
	"""Extract text from an image using OCR."""
	image = Image.open(image_file)
	return pytesseract.image_to_string(image)

	if uploaded_file:
	file_type = uploaded_file.type
	extracted_text = ""

	if file_type == "application/pdf":
	extracted_text = extract_text_from_pdf(uploaded_file)
	else:
	extracted_text = extract_text_from_image(uploaded_file)

	st.subheader("Extracted Text from CV/Resume")
	st.text_area("Preview:", extracted_text, height=150)

	# Define LLM Prompt Templates
	email_template = PromptTemplate.from_template("""
	You are an AI assistant helping users craft a professional cold email for a research position.

	### Input:
	- Recipient: {recipient_name}
	- Position: {position_name}
	- Research Interests: {research_interests}
	- Why this professor/lab: {reason}
	- Resume Details: {resume_text}

	### Output:
	A well-structured, concise cold email with a polite and engaging tone.
	""")

	cover_letter_template = PromptTemplate.from_template("""
	You are an AI assistant generating a professional cover letter.

	### Input:
	- Job Title: {job_title}
	- Company/University: {company}
	- Key Skills: {key_skills}
	- Resume Details: {resume_text}

	### Output:
	A polished and formal cover letter.
	""")

	research_statement_template = PromptTemplate.from_template("""
	You are an AI assistant generating a research statement for a Ph.D. application.

	### Input:
	- Research Interests: {research_interests}
	- Academic Background: {resume_text}
	- Future Research Goals: {goals}

	### Output:
	A compelling research statement with a strong academic tone.
	""")

	sop_template = PromptTemplate.from_template("""
	You are an AI assistant writing a Statement of Purpose (SOP) for a master's or Ph.D. program.

	### Input:
	- Program Name: {program_name}
	- University: {university}
	- Research Interests: {research_interests}
	- Career Goals: {career_goals}
	- Resume Details: {resume_text}

	### Output:
	A structured and professional SOP.
	""")

	# Create LangChain Chains
	email_chain = LLMChain(llm=llm, prompt=email_template)
	cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
	research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
	sop_chain = LLMChain(llm=llm, prompt=sop_template)

	# User Inputs for Document Generation
	st.subheader("📩 Generate Application Documents")

	tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])

	with tab1:
	recipient = st.text_input("Recipient Name")
	position = st.text_input("Position Name")
	research_interests = st.text_area("Research Interests")
	reason = st.text_area("Why this professor/lab?")
	if st.button("Generate Cold Email"):
	email = email_chain.run({"recipient_name": recipient, "position_name": position, "research_interests": research_interests, "reason": reason, "resume_text": extracted_text})
	st.text_area("Generated Cold Email", email, height=250)

	with tab2:
	job_title = st.text_input("Job Title")
	company = st.text_input("Company/University")
	key_skills = st.text_area("Key Skills")
	if st.button("Generate Cover Letter"):
	cover_letter = cover_letter_chain.run({"job_title": job_title, "company": company, "key_skills": key_skills, "resume_text": extracted_text})
	st.text_area("Generated Cover Letter", cover_letter, height=250)

	with tab3:
	research_goals = st.text_area("Future Research Goals")
	if st.button("Generate Research Statement"):
	research_statement = research_statement_chain.run({"research_interests": research_interests, "goals": research_goals, "resume_text": extracted_text})
	st.text_area("Generated Research Statement", research_statement, height=250)

	with tab4:
	program_name = st.text_input("Program Name")
	university = st.text_input("University")
	career_goals = st.text_area("Career Goals")
	if st.button("Generate SOP"):
	sop = sop_chain.run({"program_name": program_name, "university": university, "research_interests": research_interests, "career_goals": career_goals, "resume_text": extracted_text})
	st.text_area("Generated SOP", sop, height=250)