File size: 3,070 Bytes
f156c0f
 
 
 
 
 
 
7c3b331
f156c0f
 
 
 
 
 
 
 
62f1b80
f156c0f
 
 
 
 
 
62f1b80
f156c0f
 
 
 
 
f172c98
f156c0f
 
 
 
 
 
 
 
 
62f1b80
f156c0f
 
 
 
 
 
 
62f1b80
f156c0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62f1b80
 
f156c0f
 
 
 
 
f172c98
f156c0f
 
 
 
 
62f1b80
f156c0f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from gradio import Interface, File
import requests
import io
import base64
import json

# Replace with your Google AI Studio API key
API_KEY = "GOOGLE_API_KEY"
project_id= "genai-test-396922"
location= "us-central1" 
model_id= "gemini-2.0-flash-exp"
# Endpoint for Gemini 2.0 Text Generation (adapt if needed for specific multimodal endpoints)
ENDPOINT = "https://aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/models/{model_id}:textGenerate"

def encode_file(file_obj, file_type):
    """Encodes a file (image, video, or PDF) to base64."""
    try:
      if file_type == "pdf":
        content = file_obj.read()
      else:
        content = file_obj.getvalue() # For images and videos
      encoded_content = base64.b64encode(content).decode("utf-8")
      return encoded_content
    except Exception as e:
        print(f"Error encoding file: {e}")
        return None

def generate_response(text_prompt, files):
    """Handles multiple file inputs (images, videos, PDFs)."""
    url = ENDPOINT.format(project_id=project_id, location=location, model_id=model_id)
    headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}

    inputs = [{"text": text_prompt}]
    for file_data in files:
        if file_data is None:
            continue

        file_path = file_data.name
        file_type = file_path.split(".")[-1].lower()

        try:
            with open(file_path, "rb") as file_obj:
                encoded_content = encode_file(file_obj, file_type)
                if encoded_content:
                    inputs.append({file_type: encoded_content}) # Use file type as key
        except Exception as e:
            return f"Error processing file {file_path}: {e}"

    request_body = {"inputs": inputs}

    try:
        response = requests.post(url, headers=headers, json=request_body)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        return response.json().get("generated_text", "No generated text found in response.") # Handle missing key

    except requests.exceptions.RequestException as e:
        if response is not None:
            try:
                error_message = response.json().get("error", {}).get("message", "Unknown error")
            except json.JSONDecodeError:
                error_message = response.text
            return f"API Error: {response.status_code} - {error_message}"
        else:
            return f"Request Exception: {e}"
    except Exception as e:
        return f"An unexpected error occurred: {e}"


# Define Gradio interface with multiple file input
interface = Interface(
    fn=generate_response,
    inputs=[
        "text",
        [File(label="Upload Files (Images, Videos, PDFs)", file_types=[".png", ".jpg", ".jpeg", ".mp4", ".mov", ".pdf"], file_count="multiple")], # Multiple files
    ],
    outputs="text",
    title="Multimodal Prompt with Gemini 2.0 (Experimental)",
    description="Enter a text prompt and upload one or more files (images, videos, PDFs).",
)

interface.launch(share=True)