File size: 3,755 Bytes
01ba43f
 
 
 
 
 
17e2f81
01ba43f
 
 
afbfe97
8356f77
01ba43f
 
 
 
 
3f2b2d5
 
 
 
 
 
 
17e2f81
1e01463
17e2f81
1e01463
 
 
 
fe57642
 
 
17e2f81
1e01463
675c8e9
3f2b2d5
84db50f
1e01463
01ba43f
 
 
 
 
 
 
 
 
 
 
 
 
 
4d0bb56
 
01ba43f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
afbfe97
01ba43f
 
 
 
 
 
 
2f70e48
01ba43f
 
 
9cdfe23
 
01ba43f
 
 
 
2f70e48
 
 
71cd062
 
 
01ba43f
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
from PIL import Image
from transformers import BitsAndBytesConfig, PaliGemmaForConditionalGeneration, PaliGemmaProcessor
import spaces
import torch
import os


access_token = os.getenv('HF_token')

model_id = "selamw/BirdWatcher"
# model_id = "selamw/bird-Identifier"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)


def convert_to_markdown(input_text):
    """Converts bird information text to Markdown format, 
       making specific keywords bold and adding headings.
    Args:
        input_text (str): The input text containing bird information.
    Returns:
        str: The formatted Markdown text.
    """
    
    bold_words = ['Look:', 'Cool Fact!:', 'Habitat:', 'Food:', 'Birdie Behaviors:']
    
    # Split into title and content based on the first ":", handling extra whitespace
    title, content = map(str.strip, input_text.split(":", 1))
    # Bold the keywords 
    for word in bold_words:
        # content = content.replace(word, f'\n\n**{word}\n')
        content = content.replace(word, f'\n\n**{word}')
        # content = content.replace(f': **', f':**')
    
    # Construct the Markdown output with headings
    formatted_output = f"**{title}**{content}" 


    return formatted_output.strip()

@spaces.GPU
def infer_fin_pali(image, question):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, token=access_token)
    processor = PaliGemmaProcessor.from_pretrained(model_id, token=access_token)

    inputs = processor(images=image, text=question, return_tensors="pt").to(device)

    predictions = model.generate(**inputs, max_new_tokens=512)
    decoded_output = processor.decode(predictions[0], skip_special_tokens=True)[len(question):].lstrip("\n")

    # Ensure proper Markdown formatting
    formatted_output = convert_to_markdown(decoded_output)
    # formatted_output = (decoded_output)

    return formatted_output


css = """
    #mkd {
        height: 500px; 
        overflow: auto; 
        border: 1px solid #ccc; 
    }
    h1 {
        text-align: center;
    }
    h3 {
        text-align: center;
    }
    h2 {
        text-align: left;
    }
    span.gray-text {
        color: gray;
    }
"""

with gr.Blocks(css=css) as demo:
    gr.HTML("<h1>🦩  BirdWatcher  🦜</h1>")
    gr.HTML("<h3>Upload an image of a bird, and the model will generate a detailed description of its species.</h3>")
    
    with gr.Tab(label="Bird Identification"):
        with gr.Row():
            input_img = gr.Image(label="Input Bird Image") 
            with gr.Column():
                with gr.Row():
                    question = gr.Text(label="Default Prompt", value="Describe this bird species", elem_id="default-prompt")
                with gr.Row():
                    submit_btn = gr.Button(value="Run")
                with gr.Row():
                    output = gr.Markdown(label="Response")  # Use Markdown component to display output
                    # output = gr.Text(label="Response")  # Use Markdown component to display output
        
        submit_btn.click(infer_fin_pali, [input_img, question], [output])
        
        gr.Examples(
            [["01.jpg", "Describe this bird species"],
             ["02.jpg", "Describe this bird species"],
             ["03.jpg", "Describe this bird species"],
             ["04.jpg", "Describe this bird species"],
             ["05.jpg", "Describe this bird species"],
             ["06.jpg", "Describe this bird species"]],
            inputs=[input_img, question],
            outputs=[output],
            fn=infer_fin_pali,
            label='Examples 👇'
        )

demo.launch(debug=True)