File size: 4,593 Bytes
01ba43f
 
 
 
 
 
17e2f81
01ba43f
 
afbfe97
01ba43f
 
 
aeaeda9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01ba43f
aeaeda9
 
 
 
3f2b2d5
 
aeaeda9
3f2b2d5
 
 
aeaeda9
 
 
 
 
 
 
 
 
 
 
1e01463
aeaeda9
3f2b2d5
aeaeda9
 
1e01463
aeaeda9
01ba43f
 
 
 
 
 
 
 
 
 
 
 
 
4d0bb56
01ba43f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849dd99
01ba43f
 
 
 
 
 
 
849dd99
027ee2f
01ba43f
 
 
 
 
 
 
63a76b3
01ba43f
 
 
9cdfe23
01ba43f
 
 
 
2f70e48
 
 
71cd062
 
 
01ba43f
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import gradio as gr
from PIL import Image
from transformers import BitsAndBytesConfig, PaliGemmaForConditionalGeneration, PaliGemmaProcessor
import spaces
import torch
import os


access_token = os.getenv('HF_token')
model_id = "selamw/BirdWatcher"
bnb_config = BitsAndBytesConfig(load_in_8bit=True)


# def convert_to_markdown(input_text):
#     """Converts bird information text to Markdown format, 
#        making specific keywords bold and adding headings.
#     Args:
#         input_text (str): The input text containing bird information.
#     Returns:
#         str: The formatted Markdown text.
#     """
    
#     bold_words = ['Look:', 'Cool Fact!:', 'Habitat:', 'Food:', 'Birdie Behaviors:']

#     # Split into title and content based on the first ":", handling extra whitespace
#     if len(input_text.split(":", 1)) > 1:
#         title, content = map(str.strip, input_text.split(":", 1))
        
#         # Bold the keywords 
#         for word in bold_words:
#             content = content.replace(word, f'\n\n**{word}')
        
#         # Construct the Markdown output with headings
#         formatted_output = f"**{title}**{content}" 
#     else:
#         formatted_output = input_text
#     return formatted_output.strip()

import re

def convert_to_markdown(input_text):
    """
    Converts bird information text to Markdown format, 
    making specific keywords bold and adding headings.

    Args:
        input_text (str): The input text containing bird information.

    Returns:
        str: The formatted Markdown text.
    """

    bold_words = ['look:', 'cool fact!:', 'habitat:', 'food:', 'birdie behaviors:']

    # Split into title and content, handle missing ":"
    if ":" in input_text:
        title, content = map(str.strip, input_text.split(":", 1))
    else:
        title = input_text
        content = ""

    # Bold the keywords (case-insensitive, word boundaries)
    for word in bold_words:
        content = re.sub(rf"\b({word.lower()})\b", r"**\1**", content.lower()) 

    # Construct Markdown output
    formatted_output = f"**{title}**\n{content}" 
    return formatted_output.strip()
    
@spaces.GPU
def infer_fin_pali(image, question):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, token=access_token)
    processor = PaliGemmaProcessor.from_pretrained(model_id, token=access_token)

    inputs = processor(images=image, text=question, return_tensors="pt").to(device)

    predictions = model.generate(**inputs, max_new_tokens=512)
    decoded_output = processor.decode(predictions[0], skip_special_tokens=True)[len(question):].lstrip("\n")

    # Ensure proper Markdown formatting
    formatted_output = convert_to_markdown(decoded_output)

    return formatted_output


css = """
    #mkd {
        height: 500px; 
        overflow: auto; 
        border: 1px solid #ccc; 
    }
    h1 {
        text-align: center;
    }
    h3 {
        text-align: center;
    }
    h2 {
        text-align: center;
    }
    span.gray-text {
        color: gray;
    }
"""

with gr.Blocks(css=css) as demo:
    gr.HTML("<h1>🦩  BirdWatcher  🦜</h1>")
    gr.HTML("<h3>[Powered by Fine-tuned PaliGemma]</h3>")
    gr.HTML("<h3>Upload an image of a bird, and the model will generate a detailed description of its species.</h3>")
    
    with gr.Tab(label="Bird Identification"):
        with gr.Row():
            input_img = gr.Image(label="Input Bird Image") 
            with gr.Column():
                with gr.Row():
                    question = gr.Text(label="Default Prompt", value="Describe this bird species", elem_id="default-prompt", interactive=True)
                with gr.Row():
                    submit_btn = gr.Button(value="Run")
                with gr.Row():
                    output = gr.Markdown(label="Response")  # Use Markdown component to display output
        
        submit_btn.click(infer_fin_pali, [input_img, question], [output])
        
        gr.Examples(
            [["01.jpg", "Describe this bird species"],
             ["02.jpg", "Describe this bird species"],
             ["03.jpg", "Describe this bird species"],
             ["04.jpg", "Describe this bird species"],
             ["05.jpg", "Describe this bird species"],
             ["06.jpg", "Describe this bird species"]],
            inputs=[input_img, question],
            outputs=[output],
            fn=infer_fin_pali,
            label='Examples 👇'
        )

demo.launch(debug=True)