Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,549 Bytes
7ac4196 0568dda 7ac4196 bcb49b1 8c7e507 7ac4196 f82d3e1 c349d1e 139a262 bcb49b1 7ac4196 00b84bb bcb49b1 0568dda bcb49b1 0568dda bcb49b1 0568dda bcb49b1 00b84bb bcb49b1 00b84bb bcb49b1 00b84bb bcb49b1 00b84bb 7ac4196 64871c7 6da457d 7ac4196 00b84bb 6da457d 00b84bb bcb49b1 7ac4196 bcb49b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoProcessor
from PIL import Image
import torch
import os
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
os.system('pip install -U bitsandbytes-cuda117')
# Define the repository for the quantized model
repo_name = "cyan2k/molmo-7B-D-bnb-4bit"
# Load processor and model with GPU optimization
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
processor = AutoProcessor.from_pretrained(repo_name, trust_remote_code=True)
# Load model with 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(repo_name,
device_map="auto",
torch_dtype=torch.float16,
load_in_4bit=True,
trust_remote_code=True)
model.to(device)
def process_image_and_text(image, text):
# Convert numpy image to PIL format
pil_image = Image.fromarray(image)
# Process image and text with processor
inputs = processor(images=[pil_image], text=text, return_tensors="pt").to(device)
# Generate output using the model
output = model.generate(**inputs, max_new_tokens=200)
# Decode the generated output
generated_text = processor.decode(output[0], skip_special_tokens=True)
return generated_text
def chatbot(image, text, history):
# Check if the image is uploaded
if image is None:
return history + [("Please upload an image first.", None)]
# Get response by processing the image and text
response = process_image_and_text(image, text)
# Append question and response to the chat history
history.append((text, response))
return history
# Define the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Image Chatbot with Molmo-7B-4 Bit Quantized")
with gr.Row():
image_input = gr.Image(type="numpy")
chatbot_output = gr.Chatbot()
text_input = gr.Textbox(placeholder="Ask a question about the image...")
submit_button = gr.Button("Submit")
state = gr.State([])
# Connect the submit button and textbox to the chatbot function
submit_button.click(fn=chatbot, inputs=[image_input, text_input, state], outputs=chatbot_output)
text_input.submit(fn=chatbot, inputs=[image_input, text_input, state], outputs=chatbot_output)
# Launch the Gradio app with GPU
demo.launch(share=True)
|