Samay42 commited on
Commit
369caf5
·
verified ·
1 Parent(s): 5f89d45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -15
app.py CHANGED
@@ -7,22 +7,12 @@ from langchain.schema import StrOutputParser
7
  from docx import Document
8
  import fitz # PyMuPDF
9
 
10
- def extract_text_from_pdf_or_docx(file):
11
- """Extract text from PDF or Word document."""
12
- filename = file.name
13
  text = ""
14
- if filename.endswith('.pdf'):
15
- # Extract text from PDF
16
- with fitz.open(file) as doc:
17
- for page in doc:
18
- text += page.get_text()
19
- elif filename.endswith('.docx'):
20
- # Extract text from Word document
21
- doc = Document(file)
22
- for paragraph in doc.paragraphs:
23
- text += paragraph.text + "\n"
24
- else:
25
- text = "Unsupported file format. Please upload a PDF or Word document."
26
  return text
27
 
28
  def create_multiple_choice_prompt(num_questions, quiz_context, expertise):
 
7
  from docx import Document
8
  import fitz # PyMuPDF
9
 
10
+ def extract_text_from_pdf(file):
11
+ """Extract text from PDF."""
 
12
  text = ""
13
+ doc = fitz.open(stream=file.read(), filetype="pdf")
14
+ for page in doc:
15
+ text += page.get_text()
 
 
 
 
 
 
 
 
 
16
  return text
17
 
18
  def create_multiple_choice_prompt(num_questions, quiz_context, expertise):