import torch from transformers import AutoModelForCausalLM, AutoTokenizer from PIL import Image import gradio as gr model = AutoModelForCausalLM.from_pretrained( "dd360-v1-3b", torch_dtype=torch.float32, device_map="auto", trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained("tokenizer-dd360",torch_dtype=torch.float32) def generate_answer(text, image): input_ids = tokenizer(text, return_tensors='pt').input_ids image_tensor = model.image_preprocess(image) output_ids = model.generate( input_ids, max_new_tokens=100, images=image_tensor, use_cache=True)[0] return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip() text_input = gr.Textbox(lines=5, label="Enter text") image_input = gr.Image(type="pil", label="Upload Image") iface = gr.Interface( fn=generate_answer, inputs=[text_input, image_input], outputs="text", title="DD360-Bot-Multimodal", description="Enter text and upload an image to receive a response from the chatbot." ) iface.launch()