Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,52 +1,48 @@
|
|
1 |
-
from fastapi import FastAPI
|
2 |
-
from fastapi.responses import RedirectResponse
|
3 |
-
import
|
|
|
4 |
from PIL import Image
|
5 |
-
import
|
6 |
-
from transformers import pipeline
|
7 |
from gtts import gTTS
|
|
|
8 |
import tempfile
|
9 |
-
import
|
10 |
-
import pytesseract # ✅ Replacing easyocr
|
11 |
-
|
12 |
-
|
13 |
|
14 |
app = FastAPI()
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
vqa_model =
|
19 |
-
|
20 |
|
21 |
-
def process_image_question(image: Image.Image, question: str):
|
22 |
-
if image is None:
|
23 |
-
return "No image uploaded.", None
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
# Visual QA
|
36 |
-
vqa_result = vqa_model(image=image, question=question)
|
37 |
-
answer = vqa_result[0]['answer']
|
38 |
-
|
39 |
-
# Answer as speech
|
40 |
tts = gTTS(text=answer)
|
41 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
|
42 |
tts.save(tmp.name)
|
43 |
audio_path = tmp.name
|
|
|
|
|
44 |
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
except Exception as e:
|
49 |
-
return f"❌ Error: {e}", None
|
50 |
|
51 |
gui = gr.Interface(
|
52 |
fn=process_image_question,
|
@@ -55,15 +51,15 @@ gui = gr.Interface(
|
|
55 |
gr.Textbox(lines=2, placeholder="Ask a question about the image...", label="Question")
|
56 |
],
|
57 |
outputs=[
|
58 |
-
gr.Textbox(label="
|
59 |
gr.Audio(label="Answer (Audio)", type="filepath")
|
60 |
],
|
61 |
title="🧠 Image QA with Voice",
|
62 |
-
description="Upload an image and ask
|
63 |
)
|
64 |
|
65 |
app = gr.mount_gradio_app(app, gui, path="/")
|
66 |
|
67 |
@app.get("/")
|
68 |
def home():
|
69 |
-
return RedirectResponse(url="/")
|
|
|
1 |
+
from fastapi import FastAPI, UploadFile, Form
|
2 |
+
from fastapi.responses import RedirectResponse, FileResponse, JSONResponse
|
3 |
+
import os
|
4 |
+
import shutil
|
5 |
from PIL import Image
|
6 |
+
from transformers import ViltProcessor, ViltForQuestionAnswering
|
|
|
7 |
from gtts import gTTS
|
8 |
+
import torch
|
9 |
import tempfile
|
10 |
+
import gradio as gr
|
|
|
|
|
|
|
11 |
|
12 |
app = FastAPI()
|
13 |
|
14 |
+
# Load VQA Model
|
15 |
+
vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
16 |
+
vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
|
|
17 |
|
|
|
|
|
|
|
18 |
|
19 |
+
def answer_question_from_image(image, question):
|
20 |
+
if image is None or not question.strip():
|
21 |
+
return "Please upload an image and ask a question.", None
|
22 |
|
23 |
+
# Process with model
|
24 |
+
inputs = vqa_processor(image, question, return_tensors="pt")
|
25 |
+
with torch.no_grad():
|
26 |
+
outputs = vqa_model(**inputs)
|
27 |
+
predicted_id = outputs.logits.argmax(-1).item()
|
28 |
+
answer = vqa_model.config.id2label[predicted_id]
|
29 |
|
30 |
+
# Generate TTS audio
|
31 |
+
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
tts = gTTS(text=answer)
|
33 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
|
34 |
tts.save(tmp.name)
|
35 |
audio_path = tmp.name
|
36 |
+
except Exception as e:
|
37 |
+
return f"Answer: {answer}\n\n⚠️ Audio generation error: {e}", None
|
38 |
|
39 |
+
return answer, audio_path
|
40 |
+
|
41 |
+
|
42 |
+
def process_image_question(image: Image.Image, question: str):
|
43 |
+
answer, audio_path = answer_question_from_image(image, question)
|
44 |
+
return answer, audio_path
|
45 |
|
|
|
|
|
46 |
|
47 |
gui = gr.Interface(
|
48 |
fn=process_image_question,
|
|
|
51 |
gr.Textbox(lines=2, placeholder="Ask a question about the image...", label="Question")
|
52 |
],
|
53 |
outputs=[
|
54 |
+
gr.Textbox(label="Answer", lines=5),
|
55 |
gr.Audio(label="Answer (Audio)", type="filepath")
|
56 |
],
|
57 |
title="🧠 Image QA with Voice",
|
58 |
+
description="Upload an image and ask a question. You'll get a text + spoken answer."
|
59 |
)
|
60 |
|
61 |
app = gr.mount_gradio_app(app, gui, path="/")
|
62 |
|
63 |
@app.get("/")
|
64 |
def home():
|
65 |
+
return RedirectResponse(url="/")
|