Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,22 +7,12 @@ from langchain.schema import StrOutputParser
|
|
7 |
from docx import Document
|
8 |
import fitz # PyMuPDF
|
9 |
|
10 |
-
def
|
11 |
-
"""Extract text from PDF
|
12 |
-
filename = file.name
|
13 |
text = ""
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
for page in doc:
|
18 |
-
text += page.get_text()
|
19 |
-
elif filename.endswith('.docx'):
|
20 |
-
# Extract text from Word document
|
21 |
-
doc = Document(file)
|
22 |
-
for paragraph in doc.paragraphs:
|
23 |
-
text += paragraph.text + "\n"
|
24 |
-
else:
|
25 |
-
text = "Unsupported file format. Please upload a PDF or Word document."
|
26 |
return text
|
27 |
|
28 |
def create_multiple_choice_prompt(num_questions, quiz_context, expertise):
|
|
|
7 |
from docx import Document
|
8 |
import fitz # PyMuPDF
|
9 |
|
10 |
+
def extract_text_from_pdf(file):
|
11 |
+
"""Extract text from PDF."""
|
|
|
12 |
text = ""
|
13 |
+
doc = fitz.open(stream=file.read(), filetype="pdf")
|
14 |
+
for page in doc:
|
15 |
+
text += page.get_text()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
return text
|
17 |
|
18 |
def create_multiple_choice_prompt(num_questions, quiz_context, expertise):
|