File size: 940 Bytes
236cd54
 
 
 
 
 
 
 
c541eea
 
236cd54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b7ffc7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import gradio as gr
import google.generativeai as genai
import os

# Set up Gemini API key (use environment variable on Hugging Face)
API_KEY = os.getenv("GEMINI_API_KEY")  # Set this in Hugging Face secrets
genai.configure(api_key=API_KEY)

# Use Gemini 2.0 Flash (free-tier accessible)
model = genai.GenerativeModel("gemini-2.0-flash")

def vqa(image, question):
    """Performs Visual Question Answering (VQA) using the Gemini API."""
    try:
        response = model.generate_content([question, image])
        return response.text
    except Exception as e:
        return f"Error: {e}"

# Create Gradio interfaces

vqa_interface = gr.Interface(
    fn=vqa,
    inputs=[gr.Image(type="filepath"), gr.Textbox(label="Question")],
    outputs=gr.Textbox(label="Answer"),
    title="Visual Question Answering (VQA)",
    description="Upload an image and ask a question about it."
)


if __name__ == "__main__":
    vqa_interface.launch()