Spaces:
Running
Running
import os | |
import time | |
import uuid | |
from PIL import Image | |
import google.generativeai as genai | |
import gradio as gr | |
from dotenv import load_dotenv | |
from typing import List, Tuple, Optional, Union | |
# Cargar las variables de entorno | |
load_dotenv() | |
API_KEY = os.getenv("GOOGLE_API_KEY") | |
if not API_KEY: | |
raise ValueError("La clave de API 'GOOGLE_API_KEY' no est谩 configurada en el archivo .env") | |
# Configuraci贸n del modelo Gemini | |
generation_config = { | |
"temperature": 1, | |
"top_p": 0.95, | |
"top_k": 40, | |
"max_output_tokens": 8192, | |
"response_mime_type": "text/plain", | |
} | |
genai.configure(api_key=API_KEY) | |
model = genai.GenerativeModel( | |
model_name="gemini-1.5-flash", | |
generation_config=generation_config, | |
) | |
# Inicializar la sesi贸n de chat | |
chat = model.start_chat(history=[]) | |
# Funci贸n para transformar el historial de Gradio al formato de Gemini | |
def transform_history(history): | |
new_history = [] | |
for chat in history: | |
new_history.append({"parts": [{"text": chat[0]}], "role": "user"}) | |
new_history.append({"parts": [{"text": chat[1]}], "role": "model"}) | |
return new_history | |
# Funci贸n de respuesta que maneja el texto y los archivos multimodales | |
def response(message, history): | |
global chat | |
# Transformar el historial al formato esperado por Gemini | |
chat.history = transform_history(history) | |
# Enviar el mensaje al modelo y obtener la respuesta | |
response = chat.send_message(message["text"]) | |
# Eliminar la llamada a `response.resolve()` porque no es necesario. | |
return response.text | |
# Constantes y configuraciones | |
IMAGE_CACHE_DIRECTORY = "/tmp" | |
IMAGE_WIDTH = 512 | |
CHAT_HISTORY = List[Tuple[Optional[Union[Tuple[str], str]], Optional[str]]] | |
# Funci贸n para preprocesar la imagen | |
def preprocess_image(image: Image.Image) -> Optional[Image.Image]: | |
if image: | |
image_height = int(image.height * IMAGE_WIDTH / image.width) | |
return image.resize((IMAGE_WIDTH, image_height)) | |
# Funci贸n para almacenar la imagen en el cach茅 | |
def cache_pil_image(image: Image.Image) -> str: | |
image_filename = f"{uuid.uuid4()}.jpeg" | |
os.makedirs(IMAGE_CACHE_DIRECTORY, exist_ok=True) | |
image_path = os.path.join(IMAGE_CACHE_DIRECTORY, image_filename) | |
image.save(image_path, "JPEG") | |
return image_path | |
# Funci贸n para cargar im谩genes | |
def upload(files: Optional[List[str]], chatbot: CHAT_HISTORY) -> CHAT_HISTORY: | |
for file in files: | |
image = Image.open(file).convert('RGB') | |
image_preview = preprocess_image(image) | |
if image_preview: | |
# Enviar la imagen procesada para su visualizaci贸n en Gradio | |
gr.Image(image_preview).render() | |
image_path = cache_pil_image(image) | |
chatbot.append(((image_path,), None)) | |
return chatbot | |
# Funci贸n para manejar el mensaje del usuario | |
def user(text_prompt: str, chatbot: CHAT_HISTORY): | |
if text_prompt: | |
chatbot.append((text_prompt, None)) | |
return "", chatbot | |
# Funci贸n para la respuesta del bot | |
def bot( | |
files: Optional[List[str]], | |
model_choice: str, | |
system_instruction: Optional[str], # Sistema de instrucciones opcional | |
chatbot: CHAT_HISTORY | |
): | |
if not API_KEY: | |
raise ValueError("GOOGLE_API_KEY is not set.") | |
genai.configure(api_key=API_KEY) | |
generation_config = genai.types.GenerationConfig( | |
temperature=0.7, | |
max_output_tokens=8192, | |
top_k=10, | |
top_p=0.9 | |
) | |
# Usar el valor por defecto para system_instruction si est谩 vac铆o | |
if not system_instruction: | |
system_instruction = "1" # O puedes poner un valor predeterminado como "No system instruction provided." | |
text_prompt = [chatbot[-1][0]] if chatbot and chatbot[-1][0] and isinstance(chatbot[-1][0], str) else [] | |
image_prompt = [preprocess_image(Image.open(file).convert('RGB')) for file in files] if files else [] | |
model = genai.GenerativeModel( | |
model_name=model_choice, | |
generation_config=generation_config, | |
system_instruction=system_instruction # Usar el valor por defecto si est谩 vac铆o | |
) | |
# Se debe usar la generaci贸n de contenido multimodal para procesar im谩genes y texto | |
response = model.generate_content(text_prompt + image_prompt, stream=True, generation_config=generation_config) | |
chatbot[-1][1] = "" | |
for chunk in response: | |
for i in range(0, len(chunk.text), 10): | |
section = chunk.text[i:i + 10] | |
chatbot[-1][1] += section | |
time.sleep(0.01) | |
yield chatbot | |
# Crear la interfaz de Gradio | |
demo = gr.ChatInterface( | |
bot, # Funci贸n de chat para manejar texto y archivos | |
examples=[ # Ejemplos iniciales de mensajes | |
{"text": "No files", "files": []} | |
], | |
multimodal=True, # Activar la modalidad multimodal | |
textbox=gr.MultimodalTextbox( # Configuraci贸n del cuadro de texto multimodal | |
file_count="multiple", # Permitir m煤ltiples archivos | |
file_types=["image"], # Aceptar solo im谩genes | |
sources=["upload", "microphone"] # Fuentes de entrada: carga de archivos y micr贸fono | |
) | |
) | |
# Iniciar la interfaz | |
demo.launch() | |