Spaces:

selamw
/

BirdWatcher

Runtime error

File size: 4,593 Bytes

01ba43f
 
 
 
 
 
17e2f81
01ba43f
 
afbfe97
01ba43f
 
 
aeaeda9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01ba43f
aeaeda9
 
 
 
3f2b2d5
 
aeaeda9
3f2b2d5
 
 
aeaeda9
 
 
 
 
 
 
 
 
 
 
1e01463
aeaeda9
3f2b2d5
aeaeda9
 
1e01463
aeaeda9
01ba43f
 
 
 
 
 
 
 
 
 
 
 
 
4d0bb56
01ba43f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849dd99
01ba43f
 
 
 
 
 
 
849dd99
027ee2f
01ba43f
 
 
 
 
 
 
63a76b3
01ba43f
 
 
9cdfe23
01ba43f
 
 
 
2f70e48
 
 
71cd062
 
 
01ba43f

import gradio as gr
from PIL import Image
from transformers import BitsAndBytesConfig, PaliGemmaForConditionalGeneration, PaliGemmaProcessor
import spaces
import torch
import os


access_token = os.getenv('HF_token')
model_id = "selamw/BirdWatcher"
bnb_config = BitsAndBytesConfig(load_in_8bit=True)


# def convert_to_markdown(input_text):
#     """Converts bird information text to Markdown format, 
#        making specific keywords bold and adding headings.
#     Args:
#         input_text (str): The input text containing bird information.
#     Returns:
#         str: The formatted Markdown text.
#     """
    
#     bold_words = ['Look:', 'Cool Fact!:', 'Habitat:', 'Food:', 'Birdie Behaviors:']

#     # Split into title and content based on the first ":", handling extra whitespace
#     if len(input_text.split(":", 1)) > 1:
#         title, content = map(str.strip, input_text.split(":", 1))
        
#         # Bold the keywords 
#         for word in bold_words:
#             content = content.replace(word, f'\n\n**{word}')
        
#         # Construct the Markdown output with headings
#         formatted_output = f"**{title}**{content}" 
#     else:
#         formatted_output = input_text
#     return formatted_output.strip()

import re

def convert_to_markdown(input_text):
    """
    Converts bird information text to Markdown format, 
    making specific keywords bold and adding headings.

    Args:
        input_text (str): The input text containing bird information.

    Returns:
        str: The formatted Markdown text.
    """

    bold_words = ['look:', 'cool fact!:', 'habitat:', 'food:', 'birdie behaviors:']

    # Split into title and content, handle missing ":"
    if ":" in input_text:
        title, content = map(str.strip, input_text.split(":", 1))
    else:
        title = input_text
        content = ""

    # Bold the keywords (case-insensitive, word boundaries)
    for word in bold_words:
        content = re.sub(rf"\b({word.lower()})\b", r"**\1**", content.lower()) 

    # Construct Markdown output
    formatted_output = f"**{title}**\n{content}" 
    return formatted_output.strip()
    
@spaces.GPU
def infer_fin_pali(image, question):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, token=access_token)
    processor = PaliGemmaProcessor.from_pretrained(model_id, token=access_token)

    inputs = processor(images=image, text=question, return_tensors="pt").to(device)

    predictions = model.generate(**inputs, max_new_tokens=512)
    decoded_output = processor.decode(predictions[0], skip_special_tokens=True)[len(question):].lstrip("\n")

    # Ensure proper Markdown formatting
    formatted_output = convert_to_markdown(decoded_output)

    return formatted_output


css = """
    #mkd {
        height: 500px; 
        overflow: auto; 
        border: 1px solid #ccc; 
    }
    h1 {
        text-align: center;
    }
    h3 {
        text-align: center;
    }
    h2 {
        text-align: center;
    }
    span.gray-text {
        color: gray;
    }
"""

with gr.Blocks(css=css) as demo:
    gr.HTML("<h1>🦩  BirdWatcher  🦜</h1>")
    gr.HTML("<h3>[Powered by Fine-tuned PaliGemma]</h3>")
    gr.HTML("<h3>Upload an image of a bird, and the model will generate a detailed description of its species.</h3>")
    
    with gr.Tab(label="Bird Identification"):
        with gr.Row():
            input_img = gr.Image(label="Input Bird Image") 
            with gr.Column():
                with gr.Row():
                    question = gr.Text(label="Default Prompt", value="Describe this bird species", elem_id="default-prompt", interactive=True)
                with gr.Row():
                    submit_btn = gr.Button(value="Run")
                with gr.Row():
                    output = gr.Markdown(label="Response")  # Use Markdown component to display output
        
        submit_btn.click(infer_fin_pali, [input_img, question], [output])
        
        gr.Examples(
            [["01.jpg", "Describe this bird species"],
             ["02.jpg", "Describe this bird species"],
             ["03.jpg", "Describe this bird species"],
             ["04.jpg", "Describe this bird species"],
             ["05.jpg", "Describe this bird species"],
             ["06.jpg", "Describe this bird species"]],
            inputs=[input_img, question],
            outputs=[output],
            fn=infer_fin_pali,
            label='Examples 👇'
        )

demo.launch(debug=True)