Spaces:
Runtime error
Runtime error
File size: 2,704 Bytes
8f558df 352c3f8 b178c1a 21fcfe6 8f558df 352c3f8 8f558df 21fcfe6 b178c1a 21fcfe6 ebb3eb4 21fcfe6 b178c1a 21fcfe6 1f684a1 b178c1a e3bbb23 b178c1a 352c3f8 b178c1a 27d875e 8f558df 21fcfe6 b178c1a 8f558df b178c1a 8f558df 987a1de 190ad42 987a1de 8f558df 5c3571f b178c1a 987a1de b178c1a 21fcfe6 b178c1a 8f558df 352c3f8 b178c1a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoProcessor
import torch
from PIL import Image
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
models = {
"microsoft/Phi-3.5-vision-instruct": AutoModelForCausalLM.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
}
processors = {
"microsoft/Phi-3.5-vision-instruct": AutoProcessor.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True)
}
kwargs = {}
kwargs['torch_dtype'] = torch.bfloat16
user_prompt = '<|user|>\n'
assistant_prompt = '<|assistant|>\n'
prompt_suffix = "<|end|>\n"
default_question = "highly detailed caption"
@spaces.GPU
def run_example(image, text_input=default_question, model_id="microsoft/Phi-3.5-vision-instruct"):
model = models[model_id]
processor = processors[model_id]
prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
image = Image.fromarray(image).convert("RGB")
inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
generate_ids = model.generate(**inputs,
max_new_tokens=1000,
eos_token_id=processor.tokenizer.eos_token_id,
)
generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
response = processor.batch_decode(generate_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=False)[0]
return response
css = """
#output_text {
height: 500px;
overflow: auto;
border: 1px solid #333;
}
#model_selector, #text_input {
display: none !important;
}
#main_container {
border: 2px solid black;
padding: 20px;
border-radius: 10px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Row():
with gr.Column():
input_img = gr.Image(label="Input Picture", interactive=True)
model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="microsoft/Phi-3.5-vision-instruct", visible=False)
text_input = gr.Textbox(label="Question", value=default_question, visible=False)
submit_btn = gr.Button(value="Submit")
with gr.Column():
output_text = gr.Textbox(label="Output Text")
submit_btn.click(run_example, [input_img, text_input, model_selector], [output_text])
demo.queue(api_open=False)
demo.launch(debug=True, show_api=False) |