Spaces:
Build error
Build error
File size: 2,771 Bytes
789acc7 fd950ef 66011b0 789acc7 fd950ef 93f8b15 fd950ef 2fbbc3e fd950ef f81e89d fd950ef 93f8b15 2fbbc3e 93f8b15 4f9f0e6 2fbbc3e fd950ef 9aeab55 fd950ef 2fbbc3e 225c3f2 2fbbc3e fd950ef 2fbbc3e fd950ef cd44f8b 66011b0 2fbbc3e 5220358 9aeab55 5220358 fd950ef 789acc7 93f8b15 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import warnings
from accelerate import Accelerator, DistributedType
import os
# disable some warnings
transformers.logging.set_verbosity_error()
transformers.logging.disable_progress_bar()
warnings.filterwarnings('ignore')
# Initialize Accelerator
accelerator = Accelerator()
model_name = 'cognitivecomputations/dolphin-vision-72b'
# Determine the number of GPUs available
num_gpus = torch.cuda.device_count()
print(f"Number of GPUs available: {num_gpus}")
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
# Prepare model
model = accelerator.prepare(model)
def inference(prompt, image, temperature, beam_size):
messages = [
{"role": "user", "content": f'<image>\n{prompt}'}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
image_tensor = model.process_images([image], model.config)
# Move tensors to the appropriate device
input_ids = input_ids.to(accelerator.device)
image_tensor = image_tensor.to(accelerator.device)
# generate
with torch.cuda.amp.autocast():
output_ids = accelerator.unwrap_model(model).generate(
input_ids,
images=image_tensor,
max_new_tokens=1024,
temperature=temperature,
num_beams=beam_size,
use_cache=True
)[0]
return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
# Create Gradio interface
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(label="Prompt", placeholder="Describe this image in detail")
image_input = gr.Image(label="Image", type="pil")
temperature_input = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
beam_size_input = gr.Slider(minimum=1, maximum=10, value=4, step=1, label="Beam Size")
submit_button = gr.Button("Submit")
with gr.Column():
output_text = gr.Textbox(label="Output")
submit_button.click(
fn=inference,
inputs=[prompt_input, image_input, temperature_input, beam_size_input],
outputs=output_text
)
# Launch the app
demo.launch() |