DHEIVER's picture
Update app.py
3d2d4c8 verified
raw
history blame
2.38 kB
import torch
from PIL import Image
from transformers import AutoModel, AutoTokenizer
import gradio as gr
# Configuração inicial
torch.manual_seed(100)
# Carregar o modelo e o tokenizer
model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6-gguf', trust_remote_code=True,
attn_implementation='sdpa', torch_dtype=torch.bfloat16)
model = model.eval().cuda()
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6-gguf', trust_remote_code=True)
# Função para interagir com o modelo
def chat_with_model(image, question, chat_history=None):
if chat_history is None:
chat_history = []
# Converter a imagem para RGB (se necessário)
if isinstance(image, str):
image = Image.open(image).convert('RGB')
else:
image = image.convert('RGB')
# Preparar a mensagem para o modelo
msgs = [{'role': 'user', 'content': [image, question]}]
# Adicionar histórico de conversa, se houver
for msg in chat_history:
msgs.append(msg)
# Gerar resposta do modelo
answer = model.chat(
msgs=msgs,
tokenizer=tokenizer
)
# Atualizar o histórico de conversa
chat_history.append({"role": "user", "content": [image, question]})
chat_history.append({"role": "assistant", "content": [answer]})
# Retornar a resposta e o histórico atualizado
return answer, chat_history
# Interface Gradio
def gradio_interface(image, question, chat_history=None):
response, updated_history = chat_with_model(image, question, chat_history)
return response, updated_history
# Criar a interface Gradio
with gr.Blocks() as demo:
gr.Markdown("# MiniCPM-o-2_6 Chat with Images")
gr.Markdown("Envie uma imagem e faça perguntas sobre ela.")
with gr.Row():
image_input = gr.Image(label="Upload Image", type="pil")
question_input = gr.Textbox(label="Your Question", placeholder="What is in the image?")
chat_history = gr.State([]) # Armazenar o histórico de conversa
output_text = gr.Textbox(label="Model Response", interactive=False)
submit_button = gr.Button("Submit")
# Ação ao clicar no botão
submit_button.click(
fn=gradio_interface,
inputs=[image_input, question_input, chat_history],
outputs=[output_text, chat_history]
)
# Iniciar a interface
demo.launch()