File size: 4,323 Bytes
01ba43f
 
 
 
 
 
17e2f81
01ba43f
 
 
8356f77
 
01ba43f
 
 
 
 
3f2b2d5
 
 
 
 
 
 
 
 
17e2f81
84db50f
17e2f81
84db50f
 
 
 
 
 
17e2f81
84db50f
 
d496f7c
3f2b2d5
84db50f
 
01ba43f
 
 
 
 
 
 
 
 
 
 
 
 
 
771aa35
 
01ba43f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91f0ca9
01ba43f
 
 
 
 
 
 
2f70e48
01ba43f
 
 
9cdfe23
 
01ba43f
 
 
 
2f70e48
 
 
 
01ba43f
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
from PIL import Image
from transformers import BitsAndBytesConfig, PaliGemmaForConditionalGeneration, PaliGemmaProcessor
import spaces
import torch
import os


access_token = os.getenv('HF_token')

model_id = "selamw/BirdWatcher-AI"
# model_id = "selamw/bird-Identifier"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)


def convert_to_markdown(input_text):
    """Converts bird information text to Markdown format, 
       making specific keywords bold and adding headings.

    Args:
        input_text (str): The input text containing bird information.

    Returns:
        str: The formatted Markdown text.
    """
    
    # bold_words = ['Look:', 'Cool Fact!:', 'Habitat:', 'Food:', 'Birdie Behaviors:']
    
    # # Split into title and content based on the first ":", handling extra whitespace
    # title, content = map(str.strip, input_text.split(":", 1))
    # # Bold the keywords 
    # for word in bold_words:
    #     content = content.replace(word, f'\n**{word}**\n')
    #     content = content.replace(f'** ', f' ')
    
    # # Construct the Markdown output with headings
    # formatted_output = f"**{title}{content}" 
    input_text = """**ABBOTT'S BABBLER (Malacocincla abbotti)** \n\n **Look:** \n  Robin-sized detective! This bird has a sandy-brown body with rusty flanks, a short tail, and a heavy hooked bill. Check for a pale gray eyebrow in Southeast Asia. \n\n **Cool Fact!:** \n  Works in pairs, hopping on the forest floor like little detectives searching for clues (their prey!). \n\n **Habitat:** \n  Prefers the shady undergrowth of permanent forests, especially near streams and tangled vegetation. \n\n **Food:** \n  Not picky eaters! They enjoy a varied menu of insects, worms, and even seeds. \n\n **Birdie Behaviors:** \n  Secretive by nature, but listen for their surprising calls – a mix of harsh churrs and melodic whistles."""


    return input_text

@spaces.GPU
def infer_fin_pali(image, question):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, token=access_token)
    processor = PaliGemmaProcessor.from_pretrained(model_id, token=access_token)

    inputs = processor(images=image, text=question, return_tensors="pt").to(device)

    predictions = model.generate(**inputs, max_new_tokens=512)
    decoded_output = processor.decode(predictions[0], skip_special_tokens=True)[len(question):].lstrip("\n")

    # Ensure proper Markdown formatting
    formatted_output = convert_to_markdown(decoded_output)
    # formatted_output = (decoded_output)

    return formatted_output


css = """
    #mkd {
        height: 500px; 
        overflow: auto; 
        border: 1px solid #ccc; 
    }
    h1 {
        text-align: center;
    }
    h3 {
        text-align: center;
    }
    h2 {
        text-align: left;
    }
    span.gray-text {
        color: gray;
    }
"""

with gr.Blocks(css=css) as demo:
    gr.HTML("<h1>🦩 BirdWatcher AI 🦜</h1>")
    gr.HTML("<h3>Upload an image of a bird, and the model will generate a detailed description of its species.</h3>")
    
    with gr.Tab(label="Bird Identification"):
        with gr.Row():
            input_img = gr.Image(label="Input Bird Image") 
            with gr.Column():
                with gr.Row():
                    question = gr.Text(label="Default Prompt", value="Describe this bird species", elem_id="default-prompt")
                with gr.Row():
                    submit_btn = gr.Button(value="Run")
                with gr.Row():
                    output = gr.Markdown(label="Response")  # Use Markdown component to display output
                    # output = gr.Text(label="Response")  # Use Markdown component to display output
        
        submit_btn.click(infer_fin_pali, [input_img, question], [output])
        
        gr.Examples(
            [["01.jpg", "Describe this bird species"],
             ["02.jpg", "Describe this bird species"],
             ["03.jpg", "Describe this bird species"],
             ["04.jpeg", "Describe this bird species"]],
            inputs=[input_img, question],
            outputs=[output],
            fn=infer_fin_pali,
            label='Examples 👇'
        )

demo.launch(debug=True)