Spaces:
Build error
Build error
File size: 3,094 Bytes
789acc7 fd950ef 2a5c763 789acc7 fd950ef e1a9191 fd950ef 5220358 fd950ef 41b96d9 e1a9191 fd950ef 2a5c763 e1a9191 fd950ef e1a9191 2a5c763 e1a9191 4f9f0e6 f8d9f18 4f9f0e6 e1a9191 fd950ef 5220358 225c3f2 5220358 fd950ef 225c3f2 fd950ef cd44f8b 2a5c763 5220358 fd950ef 789acc7 5ee7893 fd950ef 789acc7 f8d9f18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import gradio as gr
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from PIL import Image
import warnings
import os
# disable some warnings
transformers.logging.set_verbosity_error()
transformers.logging.disable_progress_bar()
warnings.filterwarnings('ignore')
# Set device to GPU if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model_name = 'cognitivecomputations/dolphin-vision-72b'
model_path = '/data/dolphin-vision-72b'
# Configure 8-bit quantization
quantization_config = BitsAndBytesConfig(
load_in_8bit=True,
llm_int8_threshold=6.0,
llm_int8_has_fp16_weight=False
)
# Check if the model is already downloaded
if not os.path.exists(model_path):
print(f"Downloading model to {model_path}")
# create model and save it to the specified path
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=quantization_config,
device_map="auto",
trust_remote_code=True
)
model.save_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.save_pretrained(model_path)
else:
print(f"Loading model from {model_path}")
# Load the model from the saved path
model = AutoModelForCausalLM.from_pretrained(
model_path,
quantization_config=quantization_config,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
def inference(prompt, image):
messages = [
{"role": "user", "content": f'<image>\n{prompt}'}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0).to(device)
image_tensor = model.process_images([image], model.config).to(device)
# Add debug prints
print(f"Device of model: {next(model.parameters()).device}")
print(f"Device of input_ids: {input_ids.device}")
print(f"Device of image_tensor: {image_tensor.device}")
# generate
with torch.inference_mode():
output_ids = model.generate(
input_ids,
images=image_tensor,
max_new_tokens=1024,
use_cache=True
)[0]
return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(label="Prompt", placeholder="Describe this image in detail")
image_input = gr.Image(label="Image", type="pil")
submit_button = gr.Button("Submit")
with gr.Column():
output_text = gr.Textbox(label="Output")
submit_button.click(fn=inference, inputs=[prompt_input, image_input], outputs=output_text)
demo.launch(share=True) |