Spaces:
Running
Running
File size: 3,070 Bytes
f156c0f 7c3b331 f156c0f 62f1b80 f156c0f 62f1b80 f156c0f f172c98 f156c0f 62f1b80 f156c0f 62f1b80 f156c0f 62f1b80 f156c0f f172c98 f156c0f 62f1b80 f156c0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
from gradio import Interface, File
import requests
import io
import base64
import json
# Replace with your Google AI Studio API key
API_KEY = "GOOGLE_API_KEY"
project_id= "genai-test-396922"
location= "us-central1"
model_id= "gemini-2.0-flash-exp"
# Endpoint for Gemini 2.0 Text Generation (adapt if needed for specific multimodal endpoints)
ENDPOINT = "https://aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/models/{model_id}:textGenerate"
def encode_file(file_obj, file_type):
"""Encodes a file (image, video, or PDF) to base64."""
try:
if file_type == "pdf":
content = file_obj.read()
else:
content = file_obj.getvalue() # For images and videos
encoded_content = base64.b64encode(content).decode("utf-8")
return encoded_content
except Exception as e:
print(f"Error encoding file: {e}")
return None
def generate_response(text_prompt, files):
"""Handles multiple file inputs (images, videos, PDFs)."""
url = ENDPOINT.format(project_id=project_id, location=location, model_id=model_id)
headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
inputs = [{"text": text_prompt}]
for file_data in files:
if file_data is None:
continue
file_path = file_data.name
file_type = file_path.split(".")[-1].lower()
try:
with open(file_path, "rb") as file_obj:
encoded_content = encode_file(file_obj, file_type)
if encoded_content:
inputs.append({file_type: encoded_content}) # Use file type as key
except Exception as e:
return f"Error processing file {file_path}: {e}"
request_body = {"inputs": inputs}
try:
response = requests.post(url, headers=headers, json=request_body)
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
return response.json().get("generated_text", "No generated text found in response.") # Handle missing key
except requests.exceptions.RequestException as e:
if response is not None:
try:
error_message = response.json().get("error", {}).get("message", "Unknown error")
except json.JSONDecodeError:
error_message = response.text
return f"API Error: {response.status_code} - {error_message}"
else:
return f"Request Exception: {e}"
except Exception as e:
return f"An unexpected error occurred: {e}"
# Define Gradio interface with multiple file input
interface = Interface(
fn=generate_response,
inputs=[
"text",
[File(label="Upload Files (Images, Videos, PDFs)", file_types=[".png", ".jpg", ".jpeg", ".mp4", ".mov", ".pdf"], file_count="multiple")], # Multiple files
],
outputs="text",
title="Multimodal Prompt with Gemini 2.0 (Experimental)",
description="Enter a text prompt and upload one or more files (images, videos, PDFs).",
)
interface.launch(share=True) |