File size: 3,106 Bytes
ade70cf
1d51385
d69fd19
d7f29ce
 
2ff3a1c
39ae23a
 
ade70cf
d502400
45845d0
110cd3e
d502400
 
 
45845d0
afe3c68
d502400
ade70cf
0ac529d
ca16909
beec895
5ae9be1
 
 
 
 
 
 
 
 
 
 
 
 
 
1d51385
5ae9be1
 
 
 
 
2ff3a1c
84d0e49
6172e67
 
 
 
 
 
 
 
2ff3a1c
628b60d
4d69588
 
6172e67
 
 
1d51385
6172e67
 
45845d0
4d69588
90d93e3
6172e67
 
 
84d0e49
6172e67
2ff3a1c
6172e67
84d0e49
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
from transformers import AutoProcessor, AutoModelForCausalLM
import spaces

import io
from PIL import Image
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

models = {
    'J-LAB/Florence_2_B_FluxiAI_Product_Caption': AutoModelForCausalLM.from_pretrained('J-LAB/Florence_2_B_FluxiAI_Product_Caption', trust_remote_code=True).to("cuda").eval(),
    'J-LAB/Florence_2_L_FluxiAI_Product_Caption': AutoModelForCausalLM.from_pretrained('J-LAB/Florence_2_L_FluxiAI_Product_Caption', trust_remote_code=True).to("cuda").eval()
}

processors = {
    'J-LAB/Florence_2_B_FluxiAI_Product_Caption': AutoProcessor.from_pretrained('J-LAB/Florence_2_B_FluxiAI_Product_Caption', trust_remote_code=True),
    'J-LAB/Florence_2_L_FluxiAI_Product_Caption': AutoProcessor.from_pretrained('J-LAB/Florence_2_L_FluxiAI_Product_Caption', trust_remote_code=True)
}

DESCRIPTION = "# [Florence-2 Product Describe by Fluxi IA](https://huggingface.co/microsoft/Florence-2-large)"

@spaces.GPU
def process_image(image, task_prompt, text_input=None, model_id='J-LAB/Florence_2_B_FluxiAI_Product_Caption'):
    image = Image.fromarray(image)  # Convert NumPy array to PIL Image
    if task_prompt == 'Product Caption':
        task_prompt = '<PC>'
        results = run_example(task_prompt, image, model_id=model_id)
    elif task_prompt == 'More Detailed Caption':
        task_prompt = '<MORE_DETAILED_CAPTION>'
        results = run_example(task_prompt, image, model_id=model_id)
    else:
        return "", None  # Return empty string and None for unknown task prompts

    # Remove the key and get the text value
    if results and task_prompt in results:
        output_text = results[task_prompt]
    else:
        output_text = ""

    # Convert newline characters to HTML line breaks
    output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")

    return output_text

css = """
  #output {
    height: 500px; 
    overflow: auto; 
    border: 1px solid #ccc; 
  }
"""

single_task_list = [
    'Product Caption', 'More Detailed Caption'
]

with gr.Blocks(css=css) as demo:
    gr.Markdown(DESCRIPTION)
    with gr.Tab(label="Florence-2 Image Captioning"):
        with gr.Row():
            with gr.Column():
                input_img = gr.Image(label="Input Picture")
                model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='J-LAB/Florence_2_B_FluxiAI_Product_Caption')
                task_type = gr.Radio(choices=['Single task', 'Cascased task'], label='Task type selector', value='Single task')
                task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Caption")
                text_input = gr.Textbox(label="Text Input (optional)")
                submit_btn = gr.Button(value="Submit")
            with gr.Column():
                output_text = gr.HTML(label="Output Text")

        submit_btn.click(process_image, [input_img, task_prompt, text_input, model_selector], [output_text])

demo.launch(debug=True)