Spaces:
Runtime error
Runtime error
File size: 3,347 Bytes
8f558df 352c3f8 8f558df 21fcfe6 8f558df 352c3f8 8f558df 21fcfe6 352c3f8 21fcfe6 8f558df 21fcfe6 352c3f8 27d875e 352c3f8 27d875e 8f558df 21fcfe6 352c3f8 8f558df 352c3f8 8f558df 27d875e 3901da8 190ad42 3901da8 27d875e 3901da8 27d875e 3901da8 27d875e 3901da8 27d875e 8f558df 3901da8 27d875e 3901da8 27d875e 21fcfe6 27d875e 8f558df 352c3f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoProcessor
import torch
from PIL import Image
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
models = {
"microsoft/Phi-3.5-vision-instruct": AutoModelForCausalLM.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
}
processors = {
"microsoft/Phi-3.5-vision-instruct": AutoProcessor.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True)
}
default_question = "You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 100 words in this format: [image content/subject, description of action, state, and mood], [art form, style], [artist/photographer reference if needed], [additional settings such as camera and lens settings, lighting, colors, effects, texture, background, rendering]."
user_prompt = '<|user|>\n'
assistant_prompt = '<|assistant|>\n'
prompt_suffix = "<|end|>\n"
@spaces.GPU
def run_example(image, text_input=default_question, model_id="microsoft/Phi-3.5-vision-instruct"):
model = models[model_id]
processor = processors[model_id]
prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
image = Image.fromarray(image).convert("RGB")
inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
generate_ids = model.generate(**inputs,
max_new_tokens=1000,
eos_token_id=processor.tokenizer.eos_token_id,
)
generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
response = processor.batch_decode(generate_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=False)[0]
return response
css = """
#container {
border: 2px solid #333;
padding: 20px;
max-width: 400px;
margin: auto;
}
#input_img, #output_text {
border: 1px solid #444;
border-radius: 5px;
}
#input_img {
height: 200px;
overflow: hidden;
}
#output_text {
height: 150px;
overflow-y: auto;
}
.copy-btn {
display: inline-block;
padding: 5px 10px;
font-size: 14px;
background-color: #333;
color: #fff;
border: none;
border-radius: 3px;
cursor: pointer;
margin-top: 10px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Box(elem_id="container"):
input_img = gr.Image(label="Input Picture", elem_id="input_img")
text_input = gr.Textbox(value=default_question, visible=False)
submit_btn = gr.Button(value="Generate")
output_text = gr.Textbox(label="Output Text", elem_id="output_text")
submit_btn.click(run_example, [input_img, text_input], [output_text])
def copy_to_clipboard(content):
import pyperclip
pyperclip.copy(content)
return "Text copied!"
copy_button = gr.Button("Copy Text", elem_id="copy-btn")
copy_button.click(copy_to_clipboard, inputs=output_text, outputs=None)
demo.queue(api_open=False)
demo.launch(debug=True, show_api=False)
|