Spaces:
Runtime error
Runtime error
File size: 8,828 Bytes
01ba43f 17e2f81 3f22c56 01ba43f 3f22c56 01ba43f 19067cd 3f2b2d5 19067cd aeaeda9 19067cd aeaeda9 da2e9d7 3f2b2d5 1e01463 19067cd aeaeda9 01ba43f 3f22c56 01ba43f 3f22c56 471e89f 3f22c56 d083d25 3f22c56 dc929f0 d083d25 dc929f0 d083d25 dc929f0 d083d25 dc929f0 d083d25 471e89f dc929f0 01ba43f 4d0bb56 01ba43f 849dd99 01ba43f 849dd99 027ee2f 01ba43f 9f4507b 01ba43f 63a76b3 01ba43f 9cdfe23 01ba43f 2f70e48 71cd062 01ba43f 3f22c56 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 |
import gradio as gr
from PIL import Image
from transformers import BitsAndBytesConfig, PaliGemmaForConditionalGeneration, PaliGemmaProcessor
import spaces
import torch
import os
from transformers import AutoProcessor, AutoModelForCausalLM
access_token = os.getenv('HF_token')
model_id = "selamw/BirdWatcher2"
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
def convert_to_markdown(input_text):
"""Converts bird information text to Markdown format,
making specific keywords bold and adding headings.
Args:
input_text (str): The input text containing bird information.
Returns:
str: The formatted Markdown text.
"""
bold_words = ['Look:', 'Cool Fact!:', 'Habitat:', 'Food:', 'Birdie Behaviors:']
# Split into title and content based on the first ":", handling extra whitespace
if ":" in input_text:
title, content = map(str.strip, input_text.split(":", 1))
else:
title = input_text
content = ""
# Bold the keywords
for word in bold_words:
content = content.replace(word, f'\n\n**{word}')
# Construct the Markdown output with headings
formatted_output = f"**{title}**{content}"
return formatted_output.strip()
@spaces.GPU
def infer_fin_pali(image, question):
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, token=access_token)
# processor = PaliGemmaProcessor.from_pretrained(model_id, token=access_token)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, trust_remote_code=True, quantization_config=bnb_config,token=access_token).to(device)
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, token=access_token)
###
# model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", torch_dtype=torch_dtype, trust_remote_code=True).to(device)
# processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
# prompt = "<OD>"
# url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
# image = Image.open(requests.get(url, stream=True).raw)
inputs = processor(text=question, images=image, return_tensors="pt").to(device, torch_dtype)
######
# inputs = processor(images=image, text=question, return_tensors="pt").to(device)
predictions = model.generate(**inputs, max_new_tokens=512)
decoded_output = processor.decode(predictions[0], skip_special_tokens=True)[len(question):].lstrip("\n")
# Ensure proper Markdown formatting
formatted_output = convert_to_markdown(decoded_output)
return formatted_output
css = """
#mkd {
height: 500px;
overflow: auto;
border: 1px solid #ccc;
}
h1 {
text-align: center;
}
h3 {
text-align: center;
}
h2 {
text-align: center;
}
span.gray-text {
color: gray;
}
"""
with gr.Blocks(css=css) as demo:
gr.HTML("<h1>𦩠BirdWatcher π¦</h1>")
gr.HTML("<h3>[Powered by Fine-tuned PaliGemma]</h3>")
gr.HTML("<h3>Upload an image of a bird, and the model will generate a detailed description of its species.</h3>")
gr.HTML("<p style='text-align: center;'>(There are over 11,000 bird species in the world, and this model was fine-tuned with over 500)</p>")
with gr.Tab(label="Bird Identification"):
with gr.Row():
input_img = gr.Image(label="Input Bird Image")
with gr.Column():
with gr.Row():
question = gr.Text(label="Default Prompt", value="Describe this bird species", elem_id="default-prompt", interactive=True)
with gr.Row():
submit_btn = gr.Button(value="Run")
with gr.Row():
output = gr.Markdown(label="Response") # Use Markdown component to display output
submit_btn.click(infer_fin_pali, [input_img, question], [output])
gr.Examples(
[["01.jpg", "Describe this bird species"],
["02.jpg", "Describe this bird species"],
["03.jpg", "Describe this bird species"],
["04.jpg", "Describe this bird species"],
["05.jpg", "Describe this bird species"],
["06.jpg", "Describe this bird species"]],
inputs=[input_img, question],
outputs=[output],
fn=infer_fin_pali,
label='Examples π'
)
demo.launch(debug=True, share=True)
# import gradio as gr
# from PIL import Image
# from transformers import BitsAndBytesConfig, PaliGemmaForConditionalGeneration, PaliGemmaProcessor
# import spaces
# import torch
# import os
# access_token = os.getenv('HF_token')
# model_id = "selamw/BirdWatcher"
# bnb_config = BitsAndBytesConfig(load_in_8bit=True)
# def convert_to_markdown(input_text):
# """Converts bird information text to Markdown format,
# making specific keywords bold and adding headings.
# Args:
# input_text (str): The input text containing bird information.
# Returns:
# str: The formatted Markdown text.
# """
# bold_words = ['Look:', 'Cool Fact!:', 'Habitat:', 'Food:', 'Birdie Behaviors:']
# # Split into title and content based on the first ":", handling extra whitespace
# if ":" in input_text:
# title, content = map(str.strip, input_text.split(":", 1))
# else:
# title = input_text
# content = ""
# # Bold the keywords
# for word in bold_words:
# content = content.replace(word, f'\n\n**{word}')
# # Construct the Markdown output with headings
# formatted_output = f"**{title}**{content}"
# return formatted_output.strip()
# @spaces.GPU
# def infer_fin_pali(image, question):
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, token=access_token)
# processor = PaliGemmaProcessor.from_pretrained(model_id, token=access_token)
# inputs = processor(images=image, text=question, return_tensors="pt").to(device)
# predictions = model.generate(**inputs, max_new_tokens=512)
# decoded_output = processor.decode(predictions[0], skip_special_tokens=True)[len(question):].lstrip("\n")
# # Ensure proper Markdown formatting
# formatted_output = convert_to_markdown(decoded_output)
# return formatted_output
# css = """
# #mkd {
# height: 500px;
# overflow: auto;
# border: 1px solid #ccc;
# }
# h1 {
# text-align: center;
# }
# h3 {
# text-align: center;
# }
# h2 {
# text-align: center;
# }
# span.gray-text {
# color: gray;
# }
# """
# with gr.Blocks(css=css) as demo:
# gr.HTML("<h1>𦩠BirdWatcher π¦</h1>")
# gr.HTML("<h3>[Powered by Fine-tuned PaliGemma]</h3>")
# gr.HTML("<h3>Upload an image of a bird, and the model will generate a detailed description of its species.</h3>")
# gr.HTML("<p style='text-align: center;'>(There are over 11,000 bird species in the world, and this model was fine-tuned with over 500)</p>")
# with gr.Tab(label="Bird Identification"):
# with gr.Row():
# input_img = gr.Image(label="Input Bird Image")
# with gr.Column():
# with gr.Row():
# question = gr.Text(label="Default Prompt", value="Describe this bird species", elem_id="default-prompt", interactive=True)
# with gr.Row():
# submit_btn = gr.Button(value="Run")
# with gr.Row():
# output = gr.Markdown(label="Response") # Use Markdown component to display output
# submit_btn.click(infer_fin_pali, [input_img, question], [output])
# gr.Examples(
# [["01.jpg", "Describe this bird species"],
# ["02.jpg", "Describe this bird species"],
# ["03.jpg", "Describe this bird species"],
# ["04.jpg", "Describe this bird species"],
# ["05.jpg", "Describe this bird species"],
# ["06.jpg", "Describe this bird species"]],
# inputs=[input_img, question],
# outputs=[output],
# fn=infer_fin_pali,
# label='Examples π'
# )
# demo.launch(debug=True, share=True) |