File size: 3,106 Bytes
ade70cf 1d51385 d69fd19 d7f29ce 2ff3a1c 39ae23a ade70cf d502400 45845d0 110cd3e d502400 45845d0 afe3c68 d502400 ade70cf 0ac529d ca16909 beec895 5ae9be1 1d51385 5ae9be1 2ff3a1c 84d0e49 6172e67 2ff3a1c 628b60d 4d69588 6172e67 1d51385 6172e67 45845d0 4d69588 90d93e3 6172e67 84d0e49 6172e67 2ff3a1c 6172e67 84d0e49 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
from transformers import AutoProcessor, AutoModelForCausalLM
import spaces
import io
from PIL import Image
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
models = {
'J-LAB/Florence_2_B_FluxiAI_Product_Caption': AutoModelForCausalLM.from_pretrained('J-LAB/Florence_2_B_FluxiAI_Product_Caption', trust_remote_code=True).to("cuda").eval(),
'J-LAB/Florence_2_L_FluxiAI_Product_Caption': AutoModelForCausalLM.from_pretrained('J-LAB/Florence_2_L_FluxiAI_Product_Caption', trust_remote_code=True).to("cuda").eval()
}
processors = {
'J-LAB/Florence_2_B_FluxiAI_Product_Caption': AutoProcessor.from_pretrained('J-LAB/Florence_2_B_FluxiAI_Product_Caption', trust_remote_code=True),
'J-LAB/Florence_2_L_FluxiAI_Product_Caption': AutoProcessor.from_pretrained('J-LAB/Florence_2_L_FluxiAI_Product_Caption', trust_remote_code=True)
}
DESCRIPTION = "# [Florence-2 Product Describe by Fluxi IA](https://huggingface.co/microsoft/Florence-2-large)"
@spaces.GPU
def process_image(image, task_prompt, text_input=None, model_id='J-LAB/Florence_2_B_FluxiAI_Product_Caption'):
image = Image.fromarray(image) # Convert NumPy array to PIL Image
if task_prompt == 'Product Caption':
task_prompt = '<PC>'
results = run_example(task_prompt, image, model_id=model_id)
elif task_prompt == 'More Detailed Caption':
task_prompt = '<MORE_DETAILED_CAPTION>'
results = run_example(task_prompt, image, model_id=model_id)
else:
return "", None # Return empty string and None for unknown task prompts
# Remove the key and get the text value
if results and task_prompt in results:
output_text = results[task_prompt]
else:
output_text = ""
# Convert newline characters to HTML line breaks
output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")
return output_text
css = """
#output {
height: 500px;
overflow: auto;
border: 1px solid #ccc;
}
"""
single_task_list = [
'Product Caption', 'More Detailed Caption'
]
with gr.Blocks(css=css) as demo:
gr.Markdown(DESCRIPTION)
with gr.Tab(label="Florence-2 Image Captioning"):
with gr.Row():
with gr.Column():
input_img = gr.Image(label="Input Picture")
model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='J-LAB/Florence_2_B_FluxiAI_Product_Caption')
task_type = gr.Radio(choices=['Single task', 'Cascased task'], label='Task type selector', value='Single task')
task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Caption")
text_input = gr.Textbox(label="Text Input (optional)")
submit_btn = gr.Button(value="Submit")
with gr.Column():
output_text = gr.HTML(label="Output Text")
submit_btn.click(process_image, [input_img, task_prompt, text_input, model_selector], [output_text])
demo.launch(debug=True) |