|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from PIL import Image |
|
import gradio as gr |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
"dd360-v1-3b", |
|
torch_dtype=torch.float32, |
|
device_map="auto", |
|
trust_remote_code=True) |
|
tokenizer = AutoTokenizer.from_pretrained("tokenizer-dd360",torch_dtype=torch.float32) |
|
|
|
|
|
def generate_answer(text, image): |
|
|
|
input_ids = tokenizer(text, return_tensors='pt').input_ids |
|
image_tensor = model.image_preprocess(image) |
|
|
|
output_ids = model.generate( |
|
input_ids, |
|
max_new_tokens=100, |
|
images=image_tensor, |
|
use_cache=True)[0] |
|
|
|
return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip() |
|
|
|
text_input = gr.Textbox(lines=5, label="Enter text") |
|
image_input = gr.Image(type="pil", label="Upload Image") |
|
|
|
iface = gr.Interface( |
|
fn=generate_answer, |
|
inputs=[text_input, image_input], |
|
outputs="text", |
|
title="DD360-Bot-Multimodal", |
|
description="Enter text and upload an image to receive a response from the chatbot." |
|
) |
|
|
|
iface.launch() |
|
|