Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,14 +7,14 @@ from transformers import pipeline
|
|
7 |
from gtts import gTTS
|
8 |
import tempfile
|
9 |
import os
|
10 |
-
import easyocr
|
11 |
|
12 |
app = FastAPI()
|
13 |
|
14 |
# Models
|
15 |
caption_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
16 |
vqa_model = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa")
|
17 |
-
|
18 |
|
19 |
def process_image_question(image: Image.Image, question: str):
|
20 |
if image is None:
|
@@ -24,9 +24,8 @@ def process_image_question(image: Image.Image, question: str):
|
|
24 |
# Convert image to numpy
|
25 |
np_image = np.array(image)
|
26 |
|
27 |
-
# OCR text
|
28 |
-
|
29 |
-
extracted_text = "\n".join(ocr_texts)
|
30 |
|
31 |
# Caption
|
32 |
caption = caption_model(image)[0]['generated_text']
|
|
|
7 |
from gtts import gTTS
|
8 |
import tempfile
|
9 |
import os
|
10 |
+
import pytesseract # ✅ Replacing easyocr
|
11 |
|
12 |
app = FastAPI()
|
13 |
|
14 |
# Models
|
15 |
caption_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
16 |
vqa_model = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa")
|
17 |
+
|
18 |
|
19 |
def process_image_question(image: Image.Image, question: str):
|
20 |
if image is None:
|
|
|
24 |
# Convert image to numpy
|
25 |
np_image = np.array(image)
|
26 |
|
27 |
+
# OCR text using pytesseract
|
28 |
+
extracted_text = pytesseract.image_to_string(image)
|
|
|
29 |
|
30 |
# Caption
|
31 |
caption = caption_model(image)[0]['generated_text']
|