Spaces:

Sobit
/

DocuMentorAI

Sleeping

File size: 5,239 Bytes

8349bb4

import streamlit as st
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFaceHub
import fitz  # PyMuPDF for PDF text extraction
import pytesseract
from PIL import Image
import os

# Set Hugging Face API Key (Set this in Hugging Face Secrets)
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]

# Load Free LLM from Hugging Face
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct", model_kwargs={"temperature": 0.5})

# Define Streamlit App
st.set_page_config(page_title="DocuMentorAI", layout="wide")
st.title("📄 DocuMentorAI")
st.write("Upload your CV/Resume and generate professional application documents.")

# File Upload (PDF/Image)
uploaded_file = st.file_uploader("Upload your CV/Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])

def extract_text_from_pdf(pdf_file):
    """Extract text from a PDF file."""
    text = ""
    with fitz.open(pdf_file) as doc:
        for page in doc:
            text += page.get_text()
    return text

def extract_text_from_image(image_file):
    """Extract text from an image using OCR."""
    image = Image.open(image_file)
    return pytesseract.image_to_string(image)

if uploaded_file:
    file_type = uploaded_file.type
    extracted_text = ""
    
    if file_type == "application/pdf":
        extracted_text = extract_text_from_pdf(uploaded_file)
    else:
        extracted_text = extract_text_from_image(uploaded_file)
    
    st.subheader("Extracted Text from CV/Resume")
    st.text_area("Preview:", extracted_text, height=150)

# Define LLM Prompt Templates
email_template = PromptTemplate.from_template("""
You are an AI assistant helping users craft a professional cold email for a research position.

### Input:
- Recipient: {recipient_name}
- Position: {position_name}
- Research Interests: {research_interests}
- Why this professor/lab: {reason}
- Resume Details: {resume_text}

### Output:
A well-structured, concise cold email with a polite and engaging tone.
""")

cover_letter_template = PromptTemplate.from_template("""
You are an AI assistant generating a professional cover letter.

### Input:
- Job Title: {job_title}
- Company/University: {company}
- Key Skills: {key_skills}
- Resume Details: {resume_text}

### Output:
A polished and formal cover letter.
""")

research_statement_template = PromptTemplate.from_template("""
You are an AI assistant generating a research statement for a Ph.D. application.

### Input:
- Research Interests: {research_interests}
- Academic Background: {resume_text}
- Future Research Goals: {goals}

### Output:
A compelling research statement with a strong academic tone.
""")

sop_template = PromptTemplate.from_template("""
You are an AI assistant writing a Statement of Purpose (SOP) for a master's or Ph.D. program.

### Input:
- Program Name: {program_name}
- University: {university}
- Research Interests: {research_interests}
- Career Goals: {career_goals}
- Resume Details: {resume_text}

### Output:
A structured and professional SOP.
""")

# Create LangChain Chains
email_chain = LLMChain(llm=llm, prompt=email_template)
cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
sop_chain = LLMChain(llm=llm, prompt=sop_template)

# User Inputs for Document Generation
st.subheader("📩 Generate Application Documents")

tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])

with tab1:
    recipient = st.text_input("Recipient Name")
    position = st.text_input("Position Name")
    research_interests = st.text_area("Research Interests")
    reason = st.text_area("Why this professor/lab?")
    if st.button("Generate Cold Email"):
        email = email_chain.run({"recipient_name": recipient, "position_name": position, "research_interests": research_interests, "reason": reason, "resume_text": extracted_text})
        st.text_area("Generated Cold Email", email, height=250)

with tab2:
    job_title = st.text_input("Job Title")
    company = st.text_input("Company/University")
    key_skills = st.text_area("Key Skills")
    if st.button("Generate Cover Letter"):
        cover_letter = cover_letter_chain.run({"job_title": job_title, "company": company, "key_skills": key_skills, "resume_text": extracted_text})
        st.text_area("Generated Cover Letter", cover_letter, height=250)

with tab3:
    research_goals = st.text_area("Future Research Goals")
    if st.button("Generate Research Statement"):
        research_statement = research_statement_chain.run({"research_interests": research_interests, "goals": research_goals, "resume_text": extracted_text})
        st.text_area("Generated Research Statement", research_statement, height=250)

with tab4:
    program_name = st.text_input("Program Name")
    university = st.text_input("University")
    career_goals = st.text_area("Career Goals")
    if st.button("Generate SOP"):
        sop = sop_chain.run({"program_name": program_name, "university": university, "research_interests": research_interests, "career_goals": career_goals, "resume_text": extracted_text})
        st.text_area("Generated SOP", sop, height=250)