Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,7 @@ import PyPDF2
|
|
| 4 |
from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor, pipeline
|
| 5 |
from gtts import gTTS
|
| 6 |
from PIL import Image
|
| 7 |
-
|
| 8 |
|
| 9 |
# Function to extract text from a PDF
|
| 10 |
def extract_text_from_pdf(pdf_file):
|
|
@@ -28,12 +28,18 @@ def text_to_speech(text):
|
|
| 28 |
|
| 29 |
# Function for document question answering
|
| 30 |
def answer_questions(pdf_file, question):
|
| 31 |
-
|
|
|
|
| 32 |
processor = Pix2StructProcessor.from_pretrained("google/pix2struct-docvqa-large")
|
| 33 |
model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-docvqa-large")
|
| 34 |
|
| 35 |
answers = []
|
| 36 |
-
for
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
inputs = processor(images=img, text=question, return_tensors="pt")
|
| 38 |
outputs = model.generate(**inputs)
|
| 39 |
answer = processor.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
| 4 |
from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor, pipeline
|
| 5 |
from gtts import gTTS
|
| 6 |
from PIL import Image
|
| 7 |
+
import fitz # PyMuPDF
|
| 8 |
|
| 9 |
# Function to extract text from a PDF
|
| 10 |
def extract_text_from_pdf(pdf_file):
|
|
|
|
| 28 |
|
| 29 |
# Function for document question answering
|
| 30 |
def answer_questions(pdf_file, question):
|
| 31 |
+
# Open PDF using PyMuPDF
|
| 32 |
+
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
| 33 |
processor = Pix2StructProcessor.from_pretrained("google/pix2struct-docvqa-large")
|
| 34 |
model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-docvqa-large")
|
| 35 |
|
| 36 |
answers = []
|
| 37 |
+
for page in doc:
|
| 38 |
+
# Convert page to an image
|
| 39 |
+
pix = page.get_pixmap()
|
| 40 |
+
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 41 |
+
|
| 42 |
+
# Process the image for Q&A
|
| 43 |
inputs = processor(images=img, text=question, return_tensors="pt")
|
| 44 |
outputs = model.generate(**inputs)
|
| 45 |
answer = processor.decode(outputs[0], skip_special_tokens=True)
|