Nasma commited on
Commit
292ea38
·
verified ·
1 Parent(s): a11b122

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pdfplumber
2
+ import docx
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
+ import gradio as gr
5
+
6
+ # Function to extract text from PDF using pdfplumber
7
+ def extract_text_from_pdf(pdf_file):
8
+ with pdfplumber.open(pdf_file) as pdf:
9
+ text = ''
10
+ for page in pdf.pages:
11
+ text += page.extract_text()
12
+ return text
13
+
14
+ # Function to extract text from DOCX
15
+ def extract_text_from_docx(docx_file):
16
+ doc = docx.Document(docx_file)
17
+ full_text = []
18
+ for paragraph in doc.paragraphs:
19
+ full_text.append(paragraph.text)
20
+ return '\n'.join(full_text)
21
+
22
+ # Function to generate roast based on resume text
23
+ def generate_roast(resume_text):
24
+ tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
25
+ model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
26
+
27
+ # Define the prompt
28
+ prompt_text = "Roast this resume:\n\n"
29
+
30
+ # Tokenize the prompt
31
+ prompt_tokenized = tokenizer(prompt_text, return_tensors="pt")
32
+ prompt_tokens = prompt_tokenized['input_ids'].shape[1]
33
+
34
+ # Calculate remaining tokens for resume text
35
+ max_resume_tokens = 2048 - prompt_tokens
36
+
37
+ # Tokenize and truncate resume text
38
+ resume_tokenized = tokenizer(resume_text, truncation=True, max_length=max_resume_tokens, return_tensors="pt")
39
+
40
+ # Decode the truncated resume text back into a string
41
+ truncated_resume_text = tokenizer.decode(resume_tokenized['input_ids'][0], skip_special_tokens=True)
42
+
43
+ # Combine prompt and truncated resume text
44
+ final_prompt = f"{prompt_text}{truncated_resume_text}\n\nRoast:"
45
+
46
+ # Generate roast
47
+ generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
48
+ roast = generator(final_prompt, max_new_tokens=50, num_return_sequences=1)
49
+
50
+ return roast[0]['generated_text']
51
+
52
+ # Function to handle file uploads and extract text from resume files
53
+ def roast_resume(file=None, resume_text=None):
54
+ if file:
55
+ # Handle file uploads for PDF or DOCX
56
+ if file.name.endswith('.pdf'):
57
+ resume_text = extract_text_from_pdf(file)
58
+ elif file.name.endswith('.docx'):
59
+ resume_text = extract_text_from_docx(file)
60
+ else:
61
+ return "Unsupported file format. Please upload a PDF or DOCX file."
62
+ elif resume_text:
63
+ # Use pasted resume text
64
+ pass
65
+ else:
66
+ return "No resume provided."
67
+
68
+ # Generate the roast based on extracted or pasted resume text
69
+ roast = generate_roast(resume_text)
70
+ return roast
71
+
72
+ # Gradio interface with file upload or text input options
73
+ interface = gr.Interface(
74
+ fn=roast_resume,
75
+ inputs=[gr.File(label="Upload Resume (PDF/DOCX)"), gr.Textbox(label="Or Paste Your Resume")],
76
+ outputs="text",
77
+ title="Resume Roaster",
78
+ description="Upload your resume in PDF/DOCX format or paste your resume text, and let the AI roast it!"
79
+ )
80
+
81
+ # Launch Gradio app
82
+ interface.launch()